1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2022 Imagination Technologies Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy
5bf215546Sopenharmony_ci * of this software and associated documentation files (the "Software"), to deal
6bf215546Sopenharmony_ci * in the Software without restriction, including without limitation the rights
7bf215546Sopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8bf215546Sopenharmony_ci * copies of the Software, and to permit persons to whom the Software is
9bf215546Sopenharmony_ci * furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18bf215546Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <assert.h>
25bf215546Sopenharmony_ci#include <stdbool.h>
26bf215546Sopenharmony_ci#include <stdint.h>
27bf215546Sopenharmony_ci#include <stdio.h>
28bf215546Sopenharmony_ci#include <string.h>
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "pvr_device_info.h"
31bf215546Sopenharmony_ci#include "pvr_pds.h"
32bf215546Sopenharmony_ci#include "pvr_rogue_pds_defs.h"
33bf215546Sopenharmony_ci#include "pvr_rogue_pds_disasm.h"
34bf215546Sopenharmony_ci#include "pvr_rogue_pds_encode.h"
35bf215546Sopenharmony_ci#include "util/log.h"
36bf215546Sopenharmony_ci#include "util/macros.h"
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39bf215546Sopenharmony_ci#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci/*****************************************************************************
42bf215546Sopenharmony_ci Macro definitions
43bf215546Sopenharmony_ci*****************************************************************************/
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci#define PVR_PDS_DWORD_SHIFT 2
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci#define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48bf215546Sopenharmony_ci#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49bf215546Sopenharmony_ci#define PVR_PDS_TEMPS_BLOCK_BASE 128
50bf215546Sopenharmony_ci#define PVR_PDS_TEMPS_BLOCK_SIZE 32
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53bf215546Sopenharmony_ci#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci/* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56bf215546Sopenharmony_ci * available in the coefficient sync task.
57bf215546Sopenharmony_ci */
58bf215546Sopenharmony_ci#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59bf215546Sopenharmony_ci#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60bf215546Sopenharmony_ci#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61bf215546Sopenharmony_ci/* Local IDs are available in every task. */
62bf215546Sopenharmony_ci#define PVR_PDS_CDM_LOCAL_ID_X 0
63bf215546Sopenharmony_ci#define PVR_PDS_CDM_LOCAL_ID_YZ 1
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci#define PVR_PDS_DOUTW_LOWER32 0x0
66bf215546Sopenharmony_ci#define PVR_PDS_DOUTW_UPPER32 0x1
67bf215546Sopenharmony_ci#define PVR_PDS_DOUTW_LOWER64 0x2
68bf215546Sopenharmony_ci#define PVR_PDS_DOUTW_LOWER128 0x3
69bf215546Sopenharmony_ci#define PVR_PDS_DOUTW_MAXMASK 0x4
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72bf215546Sopenharmony_ci#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci/*****************************************************************************
75bf215546Sopenharmony_ci Static variables
76bf215546Sopenharmony_ci*****************************************************************************/
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_cistatic const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79bf215546Sopenharmony_ci   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80bf215546Sopenharmony_ci   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81bf215546Sopenharmony_ci   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82bf215546Sopenharmony_ci   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83bf215546Sopenharmony_ci};
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86bf215546Sopenharmony_ci * cache_control_const[1].
87bf215546Sopenharmony_ci */
88bf215546Sopenharmony_cistatic const uint32_t cache_control_const[2][2] = {
89bf215546Sopenharmony_ci   { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90bf215546Sopenharmony_ci     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91bf215546Sopenharmony_ci   { 0, 0 }
92bf215546Sopenharmony_ci};
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci/*****************************************************************************
95bf215546Sopenharmony_ci Function definitions
96bf215546Sopenharmony_ci*****************************************************************************/
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ciuint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99bf215546Sopenharmony_ci                                uint64_t count8,
100bf215546Sopenharmony_ci                                uint64_t src_add,
101bf215546Sopenharmony_ci                                bool cached,
102bf215546Sopenharmony_ci                                const struct pvr_device_info *dev_info)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   uint64_t encoded = 0;
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107bf215546Sopenharmony_ci      encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108bf215546Sopenharmony_ci                         : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109bf215546Sopenharmony_ci   }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113bf215546Sopenharmony_ci   encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115bf215546Sopenharmony_ci   encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116bf215546Sopenharmony_ci                      : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117bf215546Sopenharmony_ci   encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   return encoded;
121bf215546Sopenharmony_ci}
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ciuint64_t pvr_pds_encode_st_src0(uint64_t src,
124bf215546Sopenharmony_ci                                uint64_t count4,
125bf215546Sopenharmony_ci                                uint64_t dst_add,
126bf215546Sopenharmony_ci                                bool write_through,
127bf215546Sopenharmony_ci                                const struct pvr_device_info *device_info)
128bf215546Sopenharmony_ci{
129bf215546Sopenharmony_ci   uint64_t encoded = 0;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   if (device_info->features.has_slc_mcu_cache_controls) {
132bf215546Sopenharmony_ci      encoded |= (write_through
133bf215546Sopenharmony_ci                     ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134bf215546Sopenharmony_ci                     : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135bf215546Sopenharmony_ci   }
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139bf215546Sopenharmony_ci   encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141bf215546Sopenharmony_ci   encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142bf215546Sopenharmony_ci                             : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143bf215546Sopenharmony_ci   encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144bf215546Sopenharmony_ci               << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   return encoded;
147bf215546Sopenharmony_ci}
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t
150bf215546Sopenharmony_cipvr_pds_encode_doutw_src1(uint32_t dest,
151bf215546Sopenharmony_ci                          uint32_t dword_mask,
152bf215546Sopenharmony_ci                          uint32_t flags,
153bf215546Sopenharmony_ci                          bool cached,
154bf215546Sopenharmony_ci                          const struct pvr_device_info *dev_info)
155bf215546Sopenharmony_ci{
156bf215546Sopenharmony_ci   assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157bf215546Sopenharmony_ci          ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158bf215546Sopenharmony_ci          (dword_mask < PVR_PDS_DOUTW_LOWER64));
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   uint32_t encoded =
161bf215546Sopenharmony_ci      (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   encoded |= dword_mask_const[dword_mask];
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   encoded |= flags;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   encoded |=
168bf215546Sopenharmony_ci      cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169bf215546Sopenharmony_ci                                                                            : 1]
170bf215546Sopenharmony_ci                         [cached ? 1 : 0];
171bf215546Sopenharmony_ci   return encoded;
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175bf215546Sopenharmony_ci                                                     uint32_t end,
176bf215546Sopenharmony_ci                                                     uint32_t src1,
177bf215546Sopenharmony_ci                                                     uint32_t src0)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   return pvr_pds_inst_encode_dout(cc,
180bf215546Sopenharmony_ci                                   end,
181bf215546Sopenharmony_ci                                   src1,
182bf215546Sopenharmony_ci                                   src0,
183bf215546Sopenharmony_ci                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184bf215546Sopenharmony_ci}
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187bf215546Sopenharmony_ci                                                   uint32_t end,
188bf215546Sopenharmony_ci                                                   uint32_t src0)
189bf215546Sopenharmony_ci{
190bf215546Sopenharmony_ci   return pvr_pds_inst_encode_dout(cc,
191bf215546Sopenharmony_ci                                   end,
192bf215546Sopenharmony_ci                                   0,
193bf215546Sopenharmony_ci                                   src0,
194bf215546Sopenharmony_ci                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195bf215546Sopenharmony_ci}
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198bf215546Sopenharmony_ci                                                        uint32_t end)
199bf215546Sopenharmony_ci{
200bf215546Sopenharmony_ci   return pvr_pds_inst_encode_dout(cc,
201bf215546Sopenharmony_ci                                   end,
202bf215546Sopenharmony_ci                                   0,
203bf215546Sopenharmony_ci                                   0,
204bf215546Sopenharmony_ci                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205bf215546Sopenharmony_ci}
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208bf215546Sopenharmony_ci                                                   uint32_t end,
209bf215546Sopenharmony_ci                                                   uint32_t src1,
210bf215546Sopenharmony_ci                                                   uint32_t src0)
211bf215546Sopenharmony_ci{
212bf215546Sopenharmony_ci   return pvr_pds_inst_encode_dout(cc,
213bf215546Sopenharmony_ci                                   end,
214bf215546Sopenharmony_ci                                   src1,
215bf215546Sopenharmony_ci                                   src0,
216bf215546Sopenharmony_ci                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220bf215546Sopenharmony_ci                                                   uint32_t end,
221bf215546Sopenharmony_ci                                                   uint32_t src0)
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   return pvr_pds_inst_encode_dout(cc,
224bf215546Sopenharmony_ci                                   end,
225bf215546Sopenharmony_ci                                   0,
226bf215546Sopenharmony_ci                                   src0,
227bf215546Sopenharmony_ci                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228bf215546Sopenharmony_ci}
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_cistatic ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231bf215546Sopenharmony_ci                                                 uint32_t neg,
232bf215546Sopenharmony_ci                                                 uint32_t setc,
233bf215546Sopenharmony_ci                                                 int32_t relative_address)
234bf215546Sopenharmony_ci{
235bf215546Sopenharmony_ci   /* Address should be signed but API only allows unsigned value. */
236bf215546Sopenharmony_ci   return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237bf215546Sopenharmony_ci}
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci/**
240bf215546Sopenharmony_ci * Gets the next constant address and moves the next constant pointer along.
241bf215546Sopenharmony_ci *
242bf215546Sopenharmony_ci * \param next_constant Pointer to the next constant address.
243bf215546Sopenharmony_ci * \param num_constants The number of constants required.
244bf215546Sopenharmony_ci * \param count The number of constants allocated.
245bf215546Sopenharmony_ci * \return The address of the next constant.
246bf215546Sopenharmony_ci */
247bf215546Sopenharmony_cistatic uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248bf215546Sopenharmony_ci                                      uint32_t num_constants,
249bf215546Sopenharmony_ci                                      uint32_t *count)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   uint32_t constant;
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci   /* Work out starting constant number. For even number of constants, start on
254bf215546Sopenharmony_ci    * a 64-bit boundary.
255bf215546Sopenharmony_ci    */
256bf215546Sopenharmony_ci   if (num_constants & 1)
257bf215546Sopenharmony_ci      constant = *next_constant;
258bf215546Sopenharmony_ci   else
259bf215546Sopenharmony_ci      constant = (*next_constant + 1) & ~1;
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   /* Update the count with the number of constants actually allocated. */
262bf215546Sopenharmony_ci   *count += constant + num_constants - *next_constant;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci   /* Move the next constant pointer. */
265bf215546Sopenharmony_ci   *next_constant = constant + num_constants;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   return constant;
270bf215546Sopenharmony_ci}
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci/**
273bf215546Sopenharmony_ci * Gets the next temp address and moves the next temp pointer along.
274bf215546Sopenharmony_ci *
275bf215546Sopenharmony_ci * \param next_temp Pointer to the next temp address.
276bf215546Sopenharmony_ci * \param num_temps The number of temps required.
277bf215546Sopenharmony_ci * \param count The number of temps allocated.
278bf215546Sopenharmony_ci * \return The address of the next temp.
279bf215546Sopenharmony_ci */
280bf215546Sopenharmony_cistatic uint32_t
281bf215546Sopenharmony_cipvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282bf215546Sopenharmony_ci{
283bf215546Sopenharmony_ci   uint32_t temp;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   /* Work out starting temp number. For even number of temps, start on a
286bf215546Sopenharmony_ci    * 64-bit boundary.
287bf215546Sopenharmony_ci    */
288bf215546Sopenharmony_ci   if (num_temps & 1)
289bf215546Sopenharmony_ci      temp = *next_temp;
290bf215546Sopenharmony_ci   else
291bf215546Sopenharmony_ci      temp = (*next_temp + 1) & ~1;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   /* Update the count with the number of temps actually allocated. */
294bf215546Sopenharmony_ci   *count += temp + num_temps - *next_temp;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   /* Move the next temp pointer. */
297bf215546Sopenharmony_ci   *next_temp = temp + num_temps;
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   assert((temp + num_temps) <=
300bf215546Sopenharmony_ci          (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   return temp;
303bf215546Sopenharmony_ci}
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci/**
306bf215546Sopenharmony_ci * Write a 32-bit constant indexed by the long range.
307bf215546Sopenharmony_ci *
308bf215546Sopenharmony_ci * \param data_block Pointer to data block to write to.
309bf215546Sopenharmony_ci * \param index Index within the data to write to.
310bf215546Sopenharmony_ci * \param dword The 32-bit constant to write.
311bf215546Sopenharmony_ci */
312bf215546Sopenharmony_cistatic void
313bf215546Sopenharmony_cipvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314bf215546Sopenharmony_ci{
315bf215546Sopenharmony_ci   /* Check range. */
316bf215546Sopenharmony_ci   assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317bf215546Sopenharmony_ci                    PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   data_block[index + 0] = dword0;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci/**
325bf215546Sopenharmony_ci * Write a 64-bit constant indexed by the long range.
326bf215546Sopenharmony_ci *
327bf215546Sopenharmony_ci * \param data_block Pointer to data block to write to.
328bf215546Sopenharmony_ci * \param index Index within the data to write to.
329bf215546Sopenharmony_ci * \param dword0 Lower half of the 64 bit constant.
330bf215546Sopenharmony_ci * \param dword1 Upper half of the 64 bit constant.
331bf215546Sopenharmony_ci */
332bf215546Sopenharmony_cistatic void pvr_pds_write_constant64(uint32_t *data_block,
333bf215546Sopenharmony_ci                                     uint32_t index,
334bf215546Sopenharmony_ci                                     uint32_t dword0,
335bf215546Sopenharmony_ci                                     uint32_t dword1)
336bf215546Sopenharmony_ci{
337bf215546Sopenharmony_ci   /* Has to be on 64 bit boundary. */
338bf215546Sopenharmony_ci   assert((index & 1) == 0);
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   /* Check range. */
341bf215546Sopenharmony_ci   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342bf215546Sopenharmony_ci                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   data_block[index + 0] = dword0;
345bf215546Sopenharmony_ci   data_block[index + 1] = dword1;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   PVR_PDS_PRINT_DATA("WriteConstant64",
348bf215546Sopenharmony_ci                      ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349bf215546Sopenharmony_ci                      index);
350bf215546Sopenharmony_ci}
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci/**
353bf215546Sopenharmony_ci * Write a 64-bit constant from a single wide word indexed by the long-range
354bf215546Sopenharmony_ci * number.
355bf215546Sopenharmony_ci *
356bf215546Sopenharmony_ci * \param data_block Pointer to data block to write to.
357bf215546Sopenharmony_ci * \param index Index within the data to write to.
358bf215546Sopenharmony_ci * \param word The 64-bit constant to write.
359bf215546Sopenharmony_ci */
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_cistatic void
362bf215546Sopenharmony_cipvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363bf215546Sopenharmony_ci{
364bf215546Sopenharmony_ci   /* Has to be on 64 bit boundary. */
365bf215546Sopenharmony_ci   assert((index & 1) == 0);
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   /* Check range. */
368bf215546Sopenharmony_ci   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369bf215546Sopenharmony_ci                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   data_block[index + 0] = L32(word);
372bf215546Sopenharmony_ci   data_block[index + 1] = H32(word);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375bf215546Sopenharmony_ci}
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_cistatic void pvr_pds_write_dma_address(uint32_t *data_block,
378bf215546Sopenharmony_ci                                      uint32_t index,
379bf215546Sopenharmony_ci                                      uint64_t address,
380bf215546Sopenharmony_ci                                      bool coherent,
381bf215546Sopenharmony_ci                                      const struct pvr_device_info *dev_info)
382bf215546Sopenharmony_ci{
383bf215546Sopenharmony_ci   /* Has to be on 64 bit boundary. */
384bf215546Sopenharmony_ci   assert((index & 1) == 0);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387bf215546Sopenharmony_ci      address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   /* Check range. */
390bf215546Sopenharmony_ci   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391bf215546Sopenharmony_ci                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci   data_block[index + 0] = L32(address);
394bf215546Sopenharmony_ci   data_block[index + 1] = H32(address);
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397bf215546Sopenharmony_ci}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci/**
400bf215546Sopenharmony_ci * External API to append a 64-bit constant to an existing data segment
401bf215546Sopenharmony_ci * allocation.
402bf215546Sopenharmony_ci *
403bf215546Sopenharmony_ci * \param constants Pointer to start of data segment.
404bf215546Sopenharmony_ci * \param constant_value Value to write to constant.
405bf215546Sopenharmony_ci * \param data_size The number of constants allocated.
406bf215546Sopenharmony_ci * \returns The address of the next constant.
407bf215546Sopenharmony_ci */
408bf215546Sopenharmony_ciuint32_t pvr_pds_append_constant64(uint32_t *constants,
409bf215546Sopenharmony_ci                                   uint64_t constant_value,
410bf215546Sopenharmony_ci                                   uint32_t *data_size)
411bf215546Sopenharmony_ci{
412bf215546Sopenharmony_ci   /* Calculate next constant from current data size. */
413bf215546Sopenharmony_ci   uint32_t next_constant = *data_size;
414bf215546Sopenharmony_ci   uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   /* Set the value. */
417bf215546Sopenharmony_ci   pvr_pds_write_wide_constant(constants, constant, constant_value);
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   return constant;
420bf215546Sopenharmony_ci}
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_civoid pvr_pds_pixel_shader_sa_initialize(
423bf215546Sopenharmony_ci   struct pvr_pds_pixel_shader_sa_program *program)
424bf215546Sopenharmony_ci{
425bf215546Sopenharmony_ci   memset(program, 0, sizeof(*program));
426bf215546Sopenharmony_ci}
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci/**
429bf215546Sopenharmony_ci * Encode a DMA burst.
430bf215546Sopenharmony_ci *
431bf215546Sopenharmony_ci * \param dma_control DMA control words.
432bf215546Sopenharmony_ci * \param dma_address DMA address.
433bf215546Sopenharmony_ci * \param dest_offset Destination offset in the attribute.
434bf215546Sopenharmony_ci * \param dma_size The size of the DMA in words.
435bf215546Sopenharmony_ci * \param src_address Source address for the burst.
436bf215546Sopenharmony_ci * \param dev_info PVR device info structure.
437bf215546Sopenharmony_ci * \returns The number of DMA transfers required.
438bf215546Sopenharmony_ci */
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ciuint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441bf215546Sopenharmony_ci                                  uint64_t *dma_address,
442bf215546Sopenharmony_ci                                  uint32_t dest_offset,
443bf215546Sopenharmony_ci                                  uint32_t dma_size,
444bf215546Sopenharmony_ci                                  uint64_t src_address,
445bf215546Sopenharmony_ci                                  const struct pvr_device_info *dev_info)
446bf215546Sopenharmony_ci{
447bf215546Sopenharmony_ci   /* Simplified for MS2. */
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   /* Force to 1 DMA. */
450bf215546Sopenharmony_ci   const uint32_t num_kicks = 1;
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   dma_control[0] = dma_size
453bf215546Sopenharmony_ci                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
454bf215546Sopenharmony_ci   dma_control[0] |= dest_offset
455bf215546Sopenharmony_ci                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
458bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   dma_address[0] = src_address;
461bf215546Sopenharmony_ci   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
462bf215546Sopenharmony_ci      dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
463bf215546Sopenharmony_ci   }
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   return num_kicks;
466bf215546Sopenharmony_ci}
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci/* FIXME: use the csbgen interface and pvr_csb_pack.
469bf215546Sopenharmony_ci * FIXME: use bool for phase_rate_change.
470bf215546Sopenharmony_ci */
471bf215546Sopenharmony_ci/**
472bf215546Sopenharmony_ci * Sets up the USC control words for a DOUTU.
473bf215546Sopenharmony_ci *
474bf215546Sopenharmony_ci * \param usc_task_control USC task control structure to be setup.
475bf215546Sopenharmony_ci * \param execution_address USC execution virtual address.
476bf215546Sopenharmony_ci * \param usc_temps Number of USC temps.
477bf215546Sopenharmony_ci * \param sample_rate Sample rate for the DOUTU.
478bf215546Sopenharmony_ci * \param phase_rate_change Phase rate change for the DOUTU.
479bf215546Sopenharmony_ci */
480bf215546Sopenharmony_civoid pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
481bf215546Sopenharmony_ci                         uint64_t execution_address,
482bf215546Sopenharmony_ci                         uint32_t usc_temps,
483bf215546Sopenharmony_ci                         uint32_t sample_rate,
484bf215546Sopenharmony_ci                         bool phase_rate_change)
485bf215546Sopenharmony_ci{
486bf215546Sopenharmony_ci   usc_task_control->src0 = UINT64_C(0);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   /* Set the execution address. */
489bf215546Sopenharmony_ci   pvr_set_usc_execution_address64(&(usc_task_control->src0),
490bf215546Sopenharmony_ci                                   execution_address);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   if (usc_temps > 0) {
493bf215546Sopenharmony_ci      /* Temps are allocated in blocks of 4 dwords. */
494bf215546Sopenharmony_ci      usc_temps =
495bf215546Sopenharmony_ci         DIV_ROUND_UP(usc_temps,
496bf215546Sopenharmony_ci                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci      /* Check for losing temps due to too many requested. */
499bf215546Sopenharmony_ci      assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
500bf215546Sopenharmony_ci             usc_temps);
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci      usc_task_control->src0 |=
503bf215546Sopenharmony_ci         ((uint64_t)(usc_temps &
504bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
505bf215546Sopenharmony_ci         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   if (sample_rate > 0) {
509bf215546Sopenharmony_ci      usc_task_control->src0 |=
510bf215546Sopenharmony_ci         ((uint64_t)sample_rate)
511bf215546Sopenharmony_ci         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
512bf215546Sopenharmony_ci   }
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   if (phase_rate_change) {
515bf215546Sopenharmony_ci      usc_task_control->src0 |=
516bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
517bf215546Sopenharmony_ci   }
518bf215546Sopenharmony_ci}
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci/**
521bf215546Sopenharmony_ci * Generates the PDS pixel event program.
522bf215546Sopenharmony_ci *
523bf215546Sopenharmony_ci * \param program Pointer to the PDS pixel event program.
524bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
525bf215546Sopenharmony_ci * \param gen_mode Generate either a data segment or code segment.
526bf215546Sopenharmony_ci * \param dev_info PVR device info structure.
527bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
528bf215546Sopenharmony_ci */
529bf215546Sopenharmony_ciuint32_t *
530bf215546Sopenharmony_cipvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
531bf215546Sopenharmony_ci                             uint32_t *restrict buffer,
532bf215546Sopenharmony_ci                             enum pvr_pds_generate_mode gen_mode,
533bf215546Sopenharmony_ci                             const struct pvr_device_info *dev_info)
534bf215546Sopenharmony_ci{
535bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
536bf215546Sopenharmony_ci   uint32_t *constants = buffer;
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci   uint32_t data_size = 0;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci   /* Copy the DMA control words and USC task control words to constants, then
541bf215546Sopenharmony_ci    * arrange them so that the 64-bit words are together followed by the 32-bit
542bf215546Sopenharmony_ci    * words.
543bf215546Sopenharmony_ci    */
544bf215546Sopenharmony_ci   uint32_t control_constant =
545bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_constant, 2, &data_size);
546bf215546Sopenharmony_ci   uint32_t emit_constant =
547bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_constant,
548bf215546Sopenharmony_ci                            (2 * program->num_emit_word_pairs),
549bf215546Sopenharmony_ci                            &data_size);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   uint32_t control_word_constant =
552bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_constant,
553bf215546Sopenharmony_ci                            program->num_emit_word_pairs,
554bf215546Sopenharmony_ci                            &data_size);
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
557bf215546Sopenharmony_ci      /* Src0 for DOUTU. */
558bf215546Sopenharmony_ci      pvr_pds_write_wide_constant(buffer,
559bf215546Sopenharmony_ci                                  control_constant,
560bf215546Sopenharmony_ci                                  program->task_control.src0); /* DOUTU */
561bf215546Sopenharmony_ci      /* 64-bit Src0. */
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci      /* Emit words for end of tile program. */
564bf215546Sopenharmony_ci      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
565bf215546Sopenharmony_ci         pvr_pds_write_constant64(constants,
566bf215546Sopenharmony_ci                                  emit_constant + (2 * i),
567bf215546Sopenharmony_ci                                  program->emit_words[(2 * i) + 0],
568bf215546Sopenharmony_ci                                  program->emit_words[(2 * i) + 1]);
569bf215546Sopenharmony_ci      }
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci      /* Control words. */
572bf215546Sopenharmony_ci      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
573bf215546Sopenharmony_ci         uint32_t doutw = pvr_pds_encode_doutw_src1(
574bf215546Sopenharmony_ci            (2 * i),
575bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER64,
576bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
577bf215546Sopenharmony_ci            false,
578bf215546Sopenharmony_ci            dev_info);
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci         if (i == (program->num_emit_word_pairs - 1))
581bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci         pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
584bf215546Sopenharmony_ci      }
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
588bf215546Sopenharmony_ci      /* DOUTW the state into the shared register. */
589bf215546Sopenharmony_ci      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
590bf215546Sopenharmony_ci         *buffer++ = pvr_pds_encode_doutw64(
591bf215546Sopenharmony_ci            /* cc */ 0,
592bf215546Sopenharmony_ci            /* END */ 0,
593bf215546Sopenharmony_ci            /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
594bf215546Sopenharmony_ci            /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
595bf215546Sopenharmony_ci                                                         */
596bf215546Sopenharmony_ci      }
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci      /* Kick the USC. */
599bf215546Sopenharmony_ci      *buffer++ = pvr_pds_encode_doutu(
600bf215546Sopenharmony_ci         /* cc */ 0,
601bf215546Sopenharmony_ci         /* END */ 1,
602bf215546Sopenharmony_ci         /* SRC0 */ control_constant >> 1);
603bf215546Sopenharmony_ci   }
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci   uint32_t code_size = 1 + program->num_emit_word_pairs;
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci   /* Save the data segment Pointer and size. */
608bf215546Sopenharmony_ci   program->data_segment = constants;
609bf215546Sopenharmony_ci   program->data_size = data_size;
610bf215546Sopenharmony_ci   program->code_size = code_size;
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
613bf215546Sopenharmony_ci      return (constants + next_constant);
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
616bf215546Sopenharmony_ci      return buffer;
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci   return NULL;
619bf215546Sopenharmony_ci}
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci/**
622bf215546Sopenharmony_ci * Checks if any of the vertex streams contains instance data.
623bf215546Sopenharmony_ci *
624bf215546Sopenharmony_ci * \param streams Streams contained in the vertex shader.
625bf215546Sopenharmony_ci * \param num_streams Number of vertex streams.
626bf215546Sopenharmony_ci * \returns true if one or more of the given vertex streams contains
627bf215546Sopenharmony_ci *          instance data, otherwise false.
628bf215546Sopenharmony_ci */
629bf215546Sopenharmony_cistatic bool pvr_pds_vertex_streams_contains_instance_data(
630bf215546Sopenharmony_ci   const struct pvr_pds_vertex_stream *streams,
631bf215546Sopenharmony_ci   uint32_t num_streams)
632bf215546Sopenharmony_ci{
633bf215546Sopenharmony_ci   for (uint32_t i = 0; i < num_streams; i++) {
634bf215546Sopenharmony_ci      const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
635bf215546Sopenharmony_ci      if (vertex_stream->instance_data)
636bf215546Sopenharmony_ci         return true;
637bf215546Sopenharmony_ci   }
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   return false;
640bf215546Sopenharmony_ci}
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_cistatic uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
643bf215546Sopenharmony_ci                                                 uint32_t *next_constant,
644bf215546Sopenharmony_ci                                                 uint32_t num_constants,
645bf215546Sopenharmony_ci                                                 uint32_t *count)
646bf215546Sopenharmony_ci{
647bf215546Sopenharmony_ci   /* Allocate constant for PDS vertex shader where constant is divided into
648bf215546Sopenharmony_ci    * banks.
649bf215546Sopenharmony_ci    */
650bf215546Sopenharmony_ci   uint32_t constant;
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   assert(num_constants == 1 || num_constants == 2);
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci   if (*next_constant >= (num_backs << 3))
655bf215546Sopenharmony_ci      return pvr_pds_get_constants(next_constant, num_constants, count);
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci   if ((*next_constant % 8) == 0) {
658bf215546Sopenharmony_ci      constant = *next_constant;
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci      if (num_constants == 1)
661bf215546Sopenharmony_ci         *next_constant += 1;
662bf215546Sopenharmony_ci      else
663bf215546Sopenharmony_ci         *next_constant += 8;
664bf215546Sopenharmony_ci   } else if (num_constants == 1) {
665bf215546Sopenharmony_ci      constant = *next_constant;
666bf215546Sopenharmony_ci      *next_constant += 7;
667bf215546Sopenharmony_ci   } else {
668bf215546Sopenharmony_ci      *next_constant += 7;
669bf215546Sopenharmony_ci      constant = *next_constant;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci      if (*next_constant >= (num_backs << 3)) {
672bf215546Sopenharmony_ci         *next_constant += 2;
673bf215546Sopenharmony_ci         *count += 2;
674bf215546Sopenharmony_ci      } else {
675bf215546Sopenharmony_ci         *next_constant += 8;
676bf215546Sopenharmony_ci      }
677bf215546Sopenharmony_ci   }
678bf215546Sopenharmony_ci   return constant;
679bf215546Sopenharmony_ci}
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci/**
682bf215546Sopenharmony_ci * Generates a PDS program to load USC vertex inputs based from one or more
683bf215546Sopenharmony_ci * vertex buffers, each containing potentially multiple elements, and then a
684bf215546Sopenharmony_ci * DOUTU to execute the USC.
685bf215546Sopenharmony_ci *
686bf215546Sopenharmony_ci * \param program Pointer to the description of the program which should be
687bf215546Sopenharmony_ci *                generated.
688bf215546Sopenharmony_ci * \param buffer Pointer to buffer that receives the output of this function.
689bf215546Sopenharmony_ci *               Will either be the data segment or code segment depending on
690bf215546Sopenharmony_ci *               gen_mode.
691bf215546Sopenharmony_ci * \param gen_mode Which part to generate, either data segment or
692bf215546Sopenharmony_ci *                 code segment. If PDS_GENERATE_SIZES is specified, nothing is
693bf215546Sopenharmony_ci *                 written, but size information in program is updated.
694bf215546Sopenharmony_ci * \param dev_info PVR device info structure.
695bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the data - i.e the value
696bf215546Sopenharmony_ci *          of the buffer after writing its contents.
697bf215546Sopenharmony_ci */
698bf215546Sopenharmony_ciuint32_t *
699bf215546Sopenharmony_cipvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700bf215546Sopenharmony_ci                      uint32_t *restrict buffer,
701bf215546Sopenharmony_ci                      enum pvr_pds_generate_mode gen_mode,
702bf215546Sopenharmony_ci                      const struct pvr_device_info *dev_info)
703bf215546Sopenharmony_ci{
704bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705bf215546Sopenharmony_ci   uint32_t next_stream_constant;
706bf215546Sopenharmony_ci   uint32_t next_temp;
707bf215546Sopenharmony_ci   uint32_t usc_control_constant64;
708bf215546Sopenharmony_ci   uint32_t stride_constant32 = 0;
709bf215546Sopenharmony_ci   uint32_t dma_address_constant64 = 0;
710bf215546Sopenharmony_ci   uint32_t dma_control_constant64;
711bf215546Sopenharmony_ci   uint32_t multiplier_constant32 = 0;
712bf215546Sopenharmony_ci   uint32_t base_instance_const32 = 0;
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci   uint32_t temp = 0;
715bf215546Sopenharmony_ci   uint32_t index_temp64 = 0;
716bf215546Sopenharmony_ci   uint32_t num_vertices_temp64 = 0;
717bf215546Sopenharmony_ci   uint32_t pre_index_temp = (uint32_t)(-1);
718bf215546Sopenharmony_ci   bool first_ddmadt = true;
719bf215546Sopenharmony_ci   uint32_t input_register0;
720bf215546Sopenharmony_ci   uint32_t input_register1;
721bf215546Sopenharmony_ci   uint32_t input_register2;
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci   struct pvr_pds_vertex_stream *vertex_stream;
724bf215546Sopenharmony_ci   struct pvr_pds_vertex_element *vertex_element;
725bf215546Sopenharmony_ci   uint32_t shift_2s_comp;
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_ci   uint32_t data_size = 0;
728bf215546Sopenharmony_ci   uint32_t code_size = 0;
729bf215546Sopenharmony_ci   uint32_t temps_used = 0;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   bool direct_writes_needed = false;
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   uint32_t consts_size = 0;
734bf215546Sopenharmony_ci   uint32_t vertex_id_control_word_const32 = 0;
735bf215546Sopenharmony_ci   uint32_t instance_id_control_word_const32 = 0;
736bf215546Sopenharmony_ci   uint32_t instance_id_modifier_word_const32 = 0;
737bf215546Sopenharmony_ci   uint32_t geometry_id_control_word_const64 = 0;
738bf215546Sopenharmony_ci   uint32_t empty_dma_control_constant64 = 0;
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci   bool any_instanced_stream =
741bf215546Sopenharmony_ci      pvr_pds_vertex_streams_contains_instance_data(program->streams,
742bf215546Sopenharmony_ci                                                    program->num_streams);
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   uint32_t base_instance_register = 0;
745bf215546Sopenharmony_ci   uint32_t ddmadt_enables = 0;
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   bool issue_empty_ddmad = false;
748bf215546Sopenharmony_ci   uint32_t last_stream_index = program->num_streams - 1;
749bf215546Sopenharmony_ci   bool current_p0 = false;
750bf215546Sopenharmony_ci   uint32_t skip_stream_flag = 0;
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   /* Generate the PDS vertex shader data. */
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci#if defined(DEBUG)
755bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756bf215546Sopenharmony_ci      for (uint32_t i = 0; i < program->data_size; i++)
757bf215546Sopenharmony_ci         buffer[i] = 0xDEADBEEF;
758bf215546Sopenharmony_ci   }
759bf215546Sopenharmony_ci#endif
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   /* Generate the PDS vertex shader program */
762bf215546Sopenharmony_ci   next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763bf215546Sopenharmony_ci   /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764bf215546Sopenharmony_ci   input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765bf215546Sopenharmony_ci   /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766bf215546Sopenharmony_ci   input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci   if (program->iterate_remap_id)
769bf215546Sopenharmony_ci      input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770bf215546Sopenharmony_ci   else
771bf215546Sopenharmony_ci      input_register2 = 0; /* Not used, but need to silence the compiler. */
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci   /* Generate the PDS vertex shader code. The constants in the data block are
774bf215546Sopenharmony_ci    * arranged as follows:
775bf215546Sopenharmony_ci    *
776bf215546Sopenharmony_ci    * 64 bit bank 0        64 bit bank 1          64 bit bank 2    64 bit bank
777bf215546Sopenharmony_ci    * 3 Not used (tmps)    Stride | Multiplier    Address          Control
778bf215546Sopenharmony_ci    */
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   /* Find out how many constants are needed by streams. */
781bf215546Sopenharmony_ci   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_constant,
783bf215546Sopenharmony_ci                            8 * program->streams[stream].num_elements,
784bf215546Sopenharmony_ci                            &consts_size);
785bf215546Sopenharmony_ci   }
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   /* If there are no vertex streams allocate the first bank for USC Code
788bf215546Sopenharmony_ci    * Address.
789bf215546Sopenharmony_ci    */
790bf215546Sopenharmony_ci   if (consts_size == 0)
791bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_constant, 2, &consts_size);
792bf215546Sopenharmony_ci   else
793bf215546Sopenharmony_ci      next_constant = 8;
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   direct_writes_needed = program->iterate_instance_id ||
796bf215546Sopenharmony_ci                          program->iterate_vtx_id || program->iterate_remap_id;
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799bf215546Sopenharmony_ci      /* Evaluate what config of DDMAD should be used for each stream. */
800bf215546Sopenharmony_ci      for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801bf215546Sopenharmony_ci         vertex_stream = &program->streams[stream];
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci         if (vertex_stream->use_ddmadt) {
804bf215546Sopenharmony_ci            ddmadt_enables |= (1 << stream);
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci            /* The condition for index value is:
807bf215546Sopenharmony_ci             * index * stride + size <= bufferSize (all in unit of byte)
808bf215546Sopenharmony_ci             */
809bf215546Sopenharmony_ci            if (vertex_stream->stride == 0) {
810bf215546Sopenharmony_ci               if (vertex_stream->elements[0].size <=
811bf215546Sopenharmony_ci                   vertex_stream->buffer_size_in_bytes) {
812bf215546Sopenharmony_ci                  /* index can be any value -> no need to use DDMADT. */
813bf215546Sopenharmony_ci                  ddmadt_enables &= (~(1 << stream));
814bf215546Sopenharmony_ci               } else {
815bf215546Sopenharmony_ci                  /* No index works -> no need to issue DDMAD instruction.
816bf215546Sopenharmony_ci                   */
817bf215546Sopenharmony_ci                  skip_stream_flag |= (1 << stream);
818bf215546Sopenharmony_ci               }
819bf215546Sopenharmony_ci            } else {
820bf215546Sopenharmony_ci               /* index * stride + size <= bufferSize
821bf215546Sopenharmony_ci                *
822bf215546Sopenharmony_ci                * can be converted to:
823bf215546Sopenharmony_ci                * index <= (bufferSize - size) / stride
824bf215546Sopenharmony_ci                *
825bf215546Sopenharmony_ci                * where maximum index is:
826bf215546Sopenharmony_ci                * integer((bufferSize - size) / stride).
827bf215546Sopenharmony_ci                */
828bf215546Sopenharmony_ci               if (vertex_stream->buffer_size_in_bytes <
829bf215546Sopenharmony_ci                   vertex_stream->elements[0].size) {
830bf215546Sopenharmony_ci                  /* No index works -> no need to issue DDMAD instruction.
831bf215546Sopenharmony_ci                   */
832bf215546Sopenharmony_ci                  skip_stream_flag |= (1 << stream);
833bf215546Sopenharmony_ci               } else {
834bf215546Sopenharmony_ci                  uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835bf215546Sopenharmony_ci                                        vertex_stream->elements[0].size) /
836bf215546Sopenharmony_ci                                       vertex_stream->stride;
837bf215546Sopenharmony_ci                  if (max_index == 0xFFFFFFFFu) {
838bf215546Sopenharmony_ci                     /* No need to use DDMADT as all possible indices can
839bf215546Sopenharmony_ci                      * pass the test.
840bf215546Sopenharmony_ci                      */
841bf215546Sopenharmony_ci                     ddmadt_enables &= (~(1 << stream));
842bf215546Sopenharmony_ci                  } else {
843bf215546Sopenharmony_ci                     /* In this case, test condition can be changed to
844bf215546Sopenharmony_ci                      * index < max_index + 1.
845bf215546Sopenharmony_ci                      */
846bf215546Sopenharmony_ci                     program->streams[stream].num_vertices =
847bf215546Sopenharmony_ci                        pvr_pds_get_bank_based_constants(program->num_streams,
848bf215546Sopenharmony_ci                                                         &next_constant,
849bf215546Sopenharmony_ci                                                         1,
850bf215546Sopenharmony_ci                                                         &consts_size);
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci                     if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853bf215546Sopenharmony_ci                        pvr_pds_write_constant32(
854bf215546Sopenharmony_ci                           buffer,
855bf215546Sopenharmony_ci                           program->streams[stream].num_vertices,
856bf215546Sopenharmony_ci                           max_index + 1);
857bf215546Sopenharmony_ci                     }
858bf215546Sopenharmony_ci                  }
859bf215546Sopenharmony_ci               }
860bf215546Sopenharmony_ci            }
861bf215546Sopenharmony_ci         }
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci         if ((skip_stream_flag & (1 << stream)) == 0) {
864bf215546Sopenharmony_ci            issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865bf215546Sopenharmony_ci            last_stream_index = stream;
866bf215546Sopenharmony_ci         }
867bf215546Sopenharmony_ci      }
868bf215546Sopenharmony_ci   } else {
869bf215546Sopenharmony_ci      if (program->num_streams > 0 &&
870bf215546Sopenharmony_ci          program->streams[program->num_streams - 1].use_ddmadt) {
871bf215546Sopenharmony_ci         issue_empty_ddmad = true;
872bf215546Sopenharmony_ci      }
873bf215546Sopenharmony_ci   }
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci   if (direct_writes_needed)
876bf215546Sopenharmony_ci      issue_empty_ddmad = false;
877bf215546Sopenharmony_ci
878bf215546Sopenharmony_ci   if (issue_empty_ddmad) {
879bf215546Sopenharmony_ci      /* An empty DMA control const (DMA size = 0) is required in case the
880bf215546Sopenharmony_ci       * last DDMADD is predicated out and last flag does not have any usage.
881bf215546Sopenharmony_ci       */
882bf215546Sopenharmony_ci      empty_dma_control_constant64 =
883bf215546Sopenharmony_ci         pvr_pds_get_bank_based_constants(program->num_streams,
884bf215546Sopenharmony_ci                                          &next_constant,
885bf215546Sopenharmony_ci                                          2,
886bf215546Sopenharmony_ci                                          &consts_size);
887bf215546Sopenharmony_ci   }
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci   /* Assign constants for non stream or base instance if there is any
890bf215546Sopenharmony_ci    * instanced stream.
891bf215546Sopenharmony_ci    */
892bf215546Sopenharmony_ci   if (direct_writes_needed || any_instanced_stream ||
893bf215546Sopenharmony_ci       program->instance_ID_modifier) {
894bf215546Sopenharmony_ci      if (program->iterate_vtx_id) {
895bf215546Sopenharmony_ci         vertex_id_control_word_const32 =
896bf215546Sopenharmony_ci            pvr_pds_get_bank_based_constants(program->num_streams,
897bf215546Sopenharmony_ci                                             &next_constant,
898bf215546Sopenharmony_ci                                             1,
899bf215546Sopenharmony_ci                                             &consts_size);
900bf215546Sopenharmony_ci      }
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_ci      if (program->iterate_instance_id || program->instance_ID_modifier) {
903bf215546Sopenharmony_ci         if (program->instance_ID_modifier == 0) {
904bf215546Sopenharmony_ci            instance_id_control_word_const32 =
905bf215546Sopenharmony_ci               pvr_pds_get_bank_based_constants(program->num_streams,
906bf215546Sopenharmony_ci                                                &next_constant,
907bf215546Sopenharmony_ci                                                1,
908bf215546Sopenharmony_ci                                                &consts_size);
909bf215546Sopenharmony_ci         } else {
910bf215546Sopenharmony_ci            instance_id_modifier_word_const32 =
911bf215546Sopenharmony_ci               pvr_pds_get_bank_based_constants(program->num_streams,
912bf215546Sopenharmony_ci                                                &next_constant,
913bf215546Sopenharmony_ci                                                1,
914bf215546Sopenharmony_ci                                                &consts_size);
915bf215546Sopenharmony_ci            if ((instance_id_modifier_word_const32 % 2) == 0) {
916bf215546Sopenharmony_ci               instance_id_control_word_const32 =
917bf215546Sopenharmony_ci                  pvr_pds_get_bank_based_constants(program->num_streams,
918bf215546Sopenharmony_ci                                                   &next_constant,
919bf215546Sopenharmony_ci                                                   1,
920bf215546Sopenharmony_ci                                                   &consts_size);
921bf215546Sopenharmony_ci            } else {
922bf215546Sopenharmony_ci               instance_id_control_word_const32 =
923bf215546Sopenharmony_ci                  instance_id_modifier_word_const32;
924bf215546Sopenharmony_ci               instance_id_modifier_word_const32 =
925bf215546Sopenharmony_ci                  pvr_pds_get_bank_based_constants(program->num_streams,
926bf215546Sopenharmony_ci                                                   &next_constant,
927bf215546Sopenharmony_ci                                                   1,
928bf215546Sopenharmony_ci                                                   &consts_size);
929bf215546Sopenharmony_ci            }
930bf215546Sopenharmony_ci         }
931bf215546Sopenharmony_ci      }
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci      if (program->base_instance != 0) {
934bf215546Sopenharmony_ci         base_instance_const32 =
935bf215546Sopenharmony_ci            pvr_pds_get_bank_based_constants(program->num_streams,
936bf215546Sopenharmony_ci                                             &next_constant,
937bf215546Sopenharmony_ci                                             1,
938bf215546Sopenharmony_ci                                             &consts_size);
939bf215546Sopenharmony_ci      }
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci      if (program->iterate_remap_id) {
942bf215546Sopenharmony_ci         geometry_id_control_word_const64 =
943bf215546Sopenharmony_ci            pvr_pds_get_bank_based_constants(program->num_streams,
944bf215546Sopenharmony_ci                                             &next_constant,
945bf215546Sopenharmony_ci                                             2,
946bf215546Sopenharmony_ci                                             &consts_size);
947bf215546Sopenharmony_ci      }
948bf215546Sopenharmony_ci   }
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci   if (program->instance_ID_modifier != 0) {
951bf215546Sopenharmony_ci      /* This instanceID modifier is used when a draw array instanced call
952bf215546Sopenharmony_ci       * sourcing from client data cannot fit into vertex buffer and needs to
953bf215546Sopenharmony_ci       * be broken down into several draw calls.
954bf215546Sopenharmony_ci       */
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci      code_size += 1;
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer,
960bf215546Sopenharmony_ci                                  instance_id_modifier_word_const32,
961bf215546Sopenharmony_ci                                  program->instance_ID_modifier);
962bf215546Sopenharmony_ci      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963bf215546Sopenharmony_ci         *buffer++ = pvr_pds_inst_encode_add32(
964bf215546Sopenharmony_ci            /* cc */ 0x0,
965bf215546Sopenharmony_ci            /* ALUM */ 0, /* Unsigned */
966bf215546Sopenharmony_ci            /* SNA */ 0, /* Add */
967bf215546Sopenharmony_ci            /* SRC0 32b */ instance_id_modifier_word_const32,
968bf215546Sopenharmony_ci            /* SRC1 32b */ input_register1,
969bf215546Sopenharmony_ci            /* DST 32b */ input_register1);
970bf215546Sopenharmony_ci      }
971bf215546Sopenharmony_ci   }
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   /* Adjust instanceID if necessary. */
974bf215546Sopenharmony_ci   if (any_instanced_stream || program->iterate_instance_id) {
975bf215546Sopenharmony_ci      if (program->base_instance != 0) {
976bf215546Sopenharmony_ci         assert(!program->draw_indirect);
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer,
980bf215546Sopenharmony_ci                                     base_instance_const32,
981bf215546Sopenharmony_ci                                     program->base_instance);
982bf215546Sopenharmony_ci         }
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci         base_instance_register = base_instance_const32;
985bf215546Sopenharmony_ci      }
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci      if (program->draw_indirect) {
988bf215546Sopenharmony_ci         assert((program->instance_ID_modifier == 0) &&
989bf215546Sopenharmony_ci                (program->base_instance == 0));
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci         base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992bf215546Sopenharmony_ci      }
993bf215546Sopenharmony_ci   }
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci   next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996bf215546Sopenharmony_ci   usc_control_constant64 =
997bf215546Sopenharmony_ci      pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_ci   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000bf215546Sopenharmony_ci      bool instance_data_with_base_instance;
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci      if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003bf215546Sopenharmony_ci          ((skip_stream_flag & (1 << stream)) != 0)) {
1004bf215546Sopenharmony_ci         continue;
1005bf215546Sopenharmony_ci      }
1006bf215546Sopenharmony_ci
1007bf215546Sopenharmony_ci      vertex_stream = &program->streams[stream];
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci      instance_data_with_base_instance =
1010bf215546Sopenharmony_ci         ((vertex_stream->instance_data) &&
1011bf215546Sopenharmony_ci          ((program->base_instance > 0) || (program->draw_indirect)));
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci      /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014bf215546Sopenharmony_ci       * USC constants.
1015bf215546Sopenharmony_ci       */
1016bf215546Sopenharmony_ci      if (stream == 0) {
1017bf215546Sopenharmony_ci         stride_constant32 =
1018bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019bf215546Sopenharmony_ci      } else {
1020bf215546Sopenharmony_ci         next_constant =
1021bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci         /* Skip bank 0. */
1024bf215546Sopenharmony_ci         stride_constant32 = next_constant + 2;
1025bf215546Sopenharmony_ci      }
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci      multiplier_constant32 = stride_constant32 + 1;
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer,
1031bf215546Sopenharmony_ci                                  stride_constant32,
1032bf215546Sopenharmony_ci                                  vertex_stream->stride);
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_ci         /* Vertex stream frequency multiplier. */
1035bf215546Sopenharmony_ci         if (vertex_stream->multiplier)
1036bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer,
1037bf215546Sopenharmony_ci                                     multiplier_constant32,
1038bf215546Sopenharmony_ci                                     vertex_stream->multiplier);
1039bf215546Sopenharmony_ci      }
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci      /* Update the code size count and temps count for the above code
1042bf215546Sopenharmony_ci       * segment.
1043bf215546Sopenharmony_ci       */
1044bf215546Sopenharmony_ci      if (vertex_stream->current_state) {
1045bf215546Sopenharmony_ci         code_size += 1;
1046bf215546Sopenharmony_ci         temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047bf215546Sopenharmony_ci      } else {
1048bf215546Sopenharmony_ci         unsigned int num_temps_required = 0;
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_ci         if (vertex_stream->multiplier) {
1051bf215546Sopenharmony_ci            num_temps_required += 2;
1052bf215546Sopenharmony_ci            code_size += 3;
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci            if (vertex_stream->shift) {
1055bf215546Sopenharmony_ci               code_size += 1;
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_ci               if ((int32_t)vertex_stream->shift > 0)
1058bf215546Sopenharmony_ci                  code_size += 1;
1059bf215546Sopenharmony_ci            }
1060bf215546Sopenharmony_ci         } else if (vertex_stream->shift) {
1061bf215546Sopenharmony_ci            code_size += 1;
1062bf215546Sopenharmony_ci            num_temps_required += 1;
1063bf215546Sopenharmony_ci         } else if (instance_data_with_base_instance) {
1064bf215546Sopenharmony_ci            num_temps_required += 1;
1065bf215546Sopenharmony_ci         }
1066bf215546Sopenharmony_ci
1067bf215546Sopenharmony_ci         if (num_temps_required != 0) {
1068bf215546Sopenharmony_ci            temp = pvr_pds_get_temps(&next_temp,
1069bf215546Sopenharmony_ci                                     num_temps_required,
1070bf215546Sopenharmony_ci                                     &temps_used); /* 64-bit */
1071bf215546Sopenharmony_ci         } else {
1072bf215546Sopenharmony_ci            temp = vertex_stream->instance_data ? input_register1
1073bf215546Sopenharmony_ci                                                : input_register0;
1074bf215546Sopenharmony_ci         }
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci         if (instance_data_with_base_instance)
1077bf215546Sopenharmony_ci            code_size += 1;
1078bf215546Sopenharmony_ci      }
1079bf215546Sopenharmony_ci
1080bf215546Sopenharmony_ci      /* The real code segment. */
1081bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082bf215546Sopenharmony_ci         /* If it's current state stream, then index = 0 always. */
1083bf215546Sopenharmony_ci         if (vertex_stream->current_state) {
1084bf215546Sopenharmony_ci            /* Put zero in temp. */
1085bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086bf215546Sopenharmony_ci         } else if (vertex_stream->multiplier) {
1087bf215546Sopenharmony_ci            /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088bf215546Sopenharmony_ci             * new: Iout = (Iin * Multiplier) >> (shift+31)
1089bf215546Sopenharmony_ci             */
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci            /* Put zero in temp. Need zero for add part of the following
1092bf215546Sopenharmony_ci             * MAD. MAD source is 64 bit, so need two LIMMs.
1093bf215546Sopenharmony_ci             */
1094bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095bf215546Sopenharmony_ci            /* Put zero in temp. Need zero for add part of the following
1096bf215546Sopenharmony_ci             * MAD.
1097bf215546Sopenharmony_ci             */
1098bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_ci            /* old: (Iin * (Multiplier+2^24))
1101bf215546Sopenharmony_ci             * new: (Iin * Multiplier)
1102bf215546Sopenharmony_ci             */
1103bf215546Sopenharmony_ci            *buffer++ = pvr_rogue_inst_encode_mad(
1104bf215546Sopenharmony_ci               0, /* Sign of add is positive. */
1105bf215546Sopenharmony_ci               0, /* Unsigned ALU mode */
1106bf215546Sopenharmony_ci               0, /* Unconditional */
1107bf215546Sopenharmony_ci               multiplier_constant32,
1108bf215546Sopenharmony_ci               vertex_stream->instance_data ? input_register1 : input_register0,
1109bf215546Sopenharmony_ci               temp / 2,
1110bf215546Sopenharmony_ci               temp / 2);
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci            if (vertex_stream->shift) {
1113bf215546Sopenharmony_ci               int32_t shift = (int32_t)vertex_stream->shift;
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_ci               /* new: >> (shift + 31) */
1116bf215546Sopenharmony_ci               shift += 31;
1117bf215546Sopenharmony_ci               shift *= -1;
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci               if (shift < -31) {
1120bf215546Sopenharmony_ci                  /* >> (31) */
1121bf215546Sopenharmony_ci                  shift_2s_comp = 0xFFFE1;
1122bf215546Sopenharmony_ci                  *buffer++ = pvr_pds_inst_encode_stflp64(
1123bf215546Sopenharmony_ci                     /* cc */ 0,
1124bf215546Sopenharmony_ci                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125bf215546Sopenharmony_ci                     /* IM */ 1, /*  enable immediate */
1126bf215546Sopenharmony_ci                     /* SRC0 */ temp / 2,
1127bf215546Sopenharmony_ci                     /* SRC1 */ input_register0, /* This won't be used in
1128bf215546Sopenharmony_ci                                                  * a shift operation.
1129bf215546Sopenharmony_ci                                                  */
1130bf215546Sopenharmony_ci                     /* SRC2 (Shift) */ shift_2s_comp,
1131bf215546Sopenharmony_ci                     /* DST */ temp / 2);
1132bf215546Sopenharmony_ci                  shift += 31;
1133bf215546Sopenharmony_ci               }
1134bf215546Sopenharmony_ci
1135bf215546Sopenharmony_ci               /* old: >> (Shift+24)
1136bf215546Sopenharmony_ci                * new: >> (shift + 31)
1137bf215546Sopenharmony_ci                */
1138bf215546Sopenharmony_ci               shift_2s_comp = *((uint32_t *)&shift);
1139bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_stflp64(
1140bf215546Sopenharmony_ci                  /* cc */ 0,
1141bf215546Sopenharmony_ci                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142bf215546Sopenharmony_ci                  /* IM */ 1, /*enable immediate */
1143bf215546Sopenharmony_ci                  /* SRC0 */ temp / 2,
1144bf215546Sopenharmony_ci                  /* SRC1 */ input_register0, /* This won't be used in
1145bf215546Sopenharmony_ci                                               * a shift operation.
1146bf215546Sopenharmony_ci                                               */
1147bf215546Sopenharmony_ci                  /* SRC2 (Shift) */ shift_2s_comp,
1148bf215546Sopenharmony_ci                  /* DST */ temp / 2);
1149bf215546Sopenharmony_ci            }
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_ci            if (instance_data_with_base_instance) {
1152bf215546Sopenharmony_ci               *buffer++ =
1153bf215546Sopenharmony_ci                  pvr_pds_inst_encode_add32(0, /* cc */
1154bf215546Sopenharmony_ci                                            0, /* ALNUM */
1155bf215546Sopenharmony_ci                                            0, /* SNA */
1156bf215546Sopenharmony_ci                                            base_instance_register, /* src0
1157bf215546Sopenharmony_ci                                                                     */
1158bf215546Sopenharmony_ci                                            temp, /* src1 */
1159bf215546Sopenharmony_ci                                            temp /* dst */
1160bf215546Sopenharmony_ci                  );
1161bf215546Sopenharmony_ci            }
1162bf215546Sopenharmony_ci         } else { /* NOT vertex_stream->multiplier */
1163bf215546Sopenharmony_ci            if (vertex_stream->shift) {
1164bf215546Sopenharmony_ci               /* Shift Index/InstanceNum Right by shift bits. Put result
1165bf215546Sopenharmony_ci                * in a Temp.
1166bf215546Sopenharmony_ci                */
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ci               /* 2's complement of shift as this will be a right shift. */
1169bf215546Sopenharmony_ci               shift_2s_comp = ~(vertex_stream->shift) + 1;
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_stflp32(
1172bf215546Sopenharmony_ci                  /* IM */ 1, /*  enable immediate. */
1173bf215546Sopenharmony_ci                  /* cc */ 0,
1174bf215546Sopenharmony_ci                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175bf215546Sopenharmony_ci                  /* SRC0 */ vertex_stream->instance_data ? input_register1
1176bf215546Sopenharmony_ci                                                          : input_register0,
1177bf215546Sopenharmony_ci                  /* SRC1 */ input_register0, /* This won't be used in
1178bf215546Sopenharmony_ci                                               * a shift operation.
1179bf215546Sopenharmony_ci                                               */
1180bf215546Sopenharmony_ci                  /* SRC2 (Shift) */ shift_2s_comp,
1181bf215546Sopenharmony_ci                  /* DST */ temp);
1182bf215546Sopenharmony_ci
1183bf215546Sopenharmony_ci               if (instance_data_with_base_instance) {
1184bf215546Sopenharmony_ci                  *buffer++ =
1185bf215546Sopenharmony_ci                     pvr_pds_inst_encode_add32(0, /* cc */
1186bf215546Sopenharmony_ci                                               0, /* ALNUM */
1187bf215546Sopenharmony_ci                                               0, /* SNA */
1188bf215546Sopenharmony_ci                                               base_instance_register, /* src0
1189bf215546Sopenharmony_ci                                                                        */
1190bf215546Sopenharmony_ci                                               temp, /* src1 */
1191bf215546Sopenharmony_ci                                               temp /* dst */
1192bf215546Sopenharmony_ci                     );
1193bf215546Sopenharmony_ci               }
1194bf215546Sopenharmony_ci            } else {
1195bf215546Sopenharmony_ci               if (instance_data_with_base_instance) {
1196bf215546Sopenharmony_ci                  *buffer++ =
1197bf215546Sopenharmony_ci                     pvr_pds_inst_encode_add32(0, /* cc */
1198bf215546Sopenharmony_ci                                               0, /* ALNUM */
1199bf215546Sopenharmony_ci                                               0, /* SNA */
1200bf215546Sopenharmony_ci                                               base_instance_register, /* src0
1201bf215546Sopenharmony_ci                                                                        */
1202bf215546Sopenharmony_ci                                               input_register1, /* src1 */
1203bf215546Sopenharmony_ci                                               temp /* dst */
1204bf215546Sopenharmony_ci                     );
1205bf215546Sopenharmony_ci               } else {
1206bf215546Sopenharmony_ci                  /* If the shift instruction doesn't happen, use the IR
1207bf215546Sopenharmony_ci                   * directly into the following MAD.
1208bf215546Sopenharmony_ci                   */
1209bf215546Sopenharmony_ci                  temp = vertex_stream->instance_data ? input_register1
1210bf215546Sopenharmony_ci                                                      : input_register0;
1211bf215546Sopenharmony_ci               }
1212bf215546Sopenharmony_ci            }
1213bf215546Sopenharmony_ci         }
1214bf215546Sopenharmony_ci      }
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217bf215546Sopenharmony_ci         if (vertex_stream->use_ddmadt)
1218bf215546Sopenharmony_ci            ddmadt_enables |= (1 << stream);
1219bf215546Sopenharmony_ci      } else {
1220bf215546Sopenharmony_ci         if ((ddmadt_enables & (1 << stream)) != 0) {
1221bf215546Sopenharmony_ci            /* Emulate what DDMADT does for range checking. */
1222bf215546Sopenharmony_ci            if (first_ddmadt) {
1223bf215546Sopenharmony_ci               /* Get an 64 bits temp such that cmp current index with
1224bf215546Sopenharmony_ci                * allowed vertex number can work.
1225bf215546Sopenharmony_ci                */
1226bf215546Sopenharmony_ci               index_temp64 =
1227bf215546Sopenharmony_ci                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228bf215546Sopenharmony_ci                                                                  */
1229bf215546Sopenharmony_ci               num_vertices_temp64 =
1230bf215546Sopenharmony_ci                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231bf215546Sopenharmony_ci                                                                  */
1232bf215546Sopenharmony_ci
1233bf215546Sopenharmony_ci               index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234bf215546Sopenharmony_ci               num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_ci               code_size += 3;
1237bf215546Sopenharmony_ci               current_p0 = true;
1238bf215546Sopenharmony_ci            }
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ci            code_size += (temp == pre_index_temp ? 1 : 2);
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243bf215546Sopenharmony_ci               if (first_ddmadt) {
1244bf215546Sopenharmony_ci                  /* Set predicate to be P0. */
1245bf215546Sopenharmony_ci                  *buffer++ = pvr_pds_encode_bra(
1246bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247bf215546Sopenharmony_ci                                                        */
1248bf215546Sopenharmony_ci                     0, /* Neg */
1249bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250bf215546Sopenharmony_ci                                                      */
1251bf215546Sopenharmony_ci                     1); /* Addr */
1252bf215546Sopenharmony_ci
1253bf215546Sopenharmony_ci                  *buffer++ =
1254bf215546Sopenharmony_ci                     pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255bf215546Sopenharmony_ci                  *buffer++ =
1256bf215546Sopenharmony_ci                     pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257bf215546Sopenharmony_ci               }
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_ci               if (temp != pre_index_temp) {
1260bf215546Sopenharmony_ci                  *buffer++ = pvr_pds_inst_encode_stflp32(
1261bf215546Sopenharmony_ci                     /* IM */ 1, /*  enable immediate. */
1262bf215546Sopenharmony_ci                     /* cc */ 0,
1263bf215546Sopenharmony_ci                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264bf215546Sopenharmony_ci                     /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265bf215546Sopenharmony_ci                     /* SRC1 */ 0,
1266bf215546Sopenharmony_ci                     /* SRC2 (Shift) */ 0,
1267bf215546Sopenharmony_ci                     /* DST */ index_temp64);
1268bf215546Sopenharmony_ci               }
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_stflp32(
1271bf215546Sopenharmony_ci                  /* IM */ 1, /*  enable immediate. */
1272bf215546Sopenharmony_ci                  /* cc */ 0,
1273bf215546Sopenharmony_ci                  /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274bf215546Sopenharmony_ci                  /* SRC0 */ num_vertices_temp64 + 1,
1275bf215546Sopenharmony_ci                  /* SRC1 */ vertex_stream->num_vertices,
1276bf215546Sopenharmony_ci                  /* SRC2 (Shift) */ 0,
1277bf215546Sopenharmony_ci                  /* DST */ num_vertices_temp64);
1278bf215546Sopenharmony_ci            }
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci            first_ddmadt = false;
1281bf215546Sopenharmony_ci
1282bf215546Sopenharmony_ci            pre_index_temp = temp;
1283bf215546Sopenharmony_ci         }
1284bf215546Sopenharmony_ci      }
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ci      /* Process the elements in the stream. */
1287bf215546Sopenharmony_ci      for (uint32_t element = 0; element < vertex_stream->num_elements;
1288bf215546Sopenharmony_ci           element++) {
1289bf215546Sopenharmony_ci         bool terminate = false;
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci         vertex_element = &vertex_stream->elements[element];
1292bf215546Sopenharmony_ci         /* Check if last DDMAD needs terminate or not. */
1293bf215546Sopenharmony_ci         if ((element == (vertex_stream->num_elements - 1)) &&
1294bf215546Sopenharmony_ci             (stream == last_stream_index)) {
1295bf215546Sopenharmony_ci            terminate = !issue_empty_ddmad && !direct_writes_needed;
1296bf215546Sopenharmony_ci         }
1297bf215546Sopenharmony_ci
1298bf215546Sopenharmony_ci         /* Get a new set of constants for this element. */
1299bf215546Sopenharmony_ci         if (element) {
1300bf215546Sopenharmony_ci            /* Get all 8 32 bit constants at once. */
1301bf215546Sopenharmony_ci            next_constant =
1302bf215546Sopenharmony_ci               pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303bf215546Sopenharmony_ci         }
1304bf215546Sopenharmony_ci
1305bf215546Sopenharmony_ci         dma_address_constant64 = next_constant + 4;
1306bf215546Sopenharmony_ci         dma_control_constant64 = dma_address_constant64 + 2;
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci         if (vertex_element->component_size == 0) {
1309bf215546Sopenharmony_ci            /* Standard DMA.
1310bf215546Sopenharmony_ci             *
1311bf215546Sopenharmony_ci             * Write the DMA transfer control words into the PDS data
1312bf215546Sopenharmony_ci             * section.
1313bf215546Sopenharmony_ci             *
1314bf215546Sopenharmony_ci             * DMA Address is 40-bit.
1315bf215546Sopenharmony_ci             */
1316bf215546Sopenharmony_ci
1317bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318bf215546Sopenharmony_ci               uint32_t dma_control_word;
1319bf215546Sopenharmony_ci               uint64_t dma_control_word64 = 0;
1320bf215546Sopenharmony_ci               uint32_t dma_size;
1321bf215546Sopenharmony_ci
1322bf215546Sopenharmony_ci               /* Write the address to the constant. */
1323bf215546Sopenharmony_ci               pvr_pds_write_dma_address(buffer,
1324bf215546Sopenharmony_ci                                         dma_address_constant64,
1325bf215546Sopenharmony_ci                                         vertex_stream->address +
1326bf215546Sopenharmony_ci                                            (uint64_t)vertex_element->offset,
1327bf215546Sopenharmony_ci                                         false,
1328bf215546Sopenharmony_ci                                         dev_info);
1329bf215546Sopenharmony_ci               {
1330bf215546Sopenharmony_ci                  if (program->stream_patch_offsets) {
1331bf215546Sopenharmony_ci                     program
1332bf215546Sopenharmony_ci                        ->stream_patch_offsets[program->num_stream_patches++] =
1333bf215546Sopenharmony_ci                        (stream << 16) | (dma_address_constant64 >> 1);
1334bf215546Sopenharmony_ci                  }
1335bf215546Sopenharmony_ci               }
1336bf215546Sopenharmony_ci
1337bf215546Sopenharmony_ci               /* Size is in bytes - round up to nearest 32 bit word. */
1338bf215546Sopenharmony_ci               dma_size =
1339bf215546Sopenharmony_ci                  (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340bf215546Sopenharmony_ci                  PVR_PDS_DWORD_SHIFT;
1341bf215546Sopenharmony_ci
1342bf215546Sopenharmony_ci               assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci               /* Set up the dma transfer control word. */
1345bf215546Sopenharmony_ci               dma_control_word =
1346bf215546Sopenharmony_ci                  dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347bf215546Sopenharmony_ci
1348bf215546Sopenharmony_ci               dma_control_word |=
1349bf215546Sopenharmony_ci                  vertex_element->reg
1350bf215546Sopenharmony_ci                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351bf215546Sopenharmony_ci
1352bf215546Sopenharmony_ci               dma_control_word |=
1353bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357bf215546Sopenharmony_ci                  if ((ddmadt_enables & (1 << stream)) != 0) {
1358bf215546Sopenharmony_ci                     assert(
1359bf215546Sopenharmony_ci                        ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360bf215546Sopenharmony_ci                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361bf215546Sopenharmony_ci                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362bf215546Sopenharmony_ci                         PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363bf215546Sopenharmony_ci                        (uint64_t)vertex_stream->buffer_size_in_bytes);
1364bf215546Sopenharmony_ci                     dma_control_word64 =
1365bf215546Sopenharmony_ci                        (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366bf215546Sopenharmony_ci                         (((uint64_t)vertex_stream->buffer_size_in_bytes
1367bf215546Sopenharmony_ci                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368bf215546Sopenharmony_ci                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369bf215546Sopenharmony_ci                  }
1370bf215546Sopenharmony_ci               }
1371bf215546Sopenharmony_ci               /* If this is the last dma then also set the last flag. */
1372bf215546Sopenharmony_ci               if (terminate) {
1373bf215546Sopenharmony_ci                  dma_control_word |=
1374bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375bf215546Sopenharmony_ci               }
1376bf215546Sopenharmony_ci
1377bf215546Sopenharmony_ci               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378bf215546Sopenharmony_ci                * spec.
1379bf215546Sopenharmony_ci                */
1380bf215546Sopenharmony_ci               pvr_pds_write_wide_constant(buffer,
1381bf215546Sopenharmony_ci                                           dma_control_constant64,
1382bf215546Sopenharmony_ci                                           dma_control_word64 |
1383bf215546Sopenharmony_ci                                              (uint64_t)dma_control_word);
1384bf215546Sopenharmony_ci            }
1385bf215546Sopenharmony_ci
1386bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387bf215546Sopenharmony_ci               if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388bf215546Sopenharmony_ci                  if ((ddmadt_enables & (1 << stream)) != 0) {
1389bf215546Sopenharmony_ci                     *buffer++ = pvr_pds_inst_encode_cmp(
1390bf215546Sopenharmony_ci                        0, /* cc enable */
1391bf215546Sopenharmony_ci                        PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392bf215546Sopenharmony_ci                        index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393bf215546Sopenharmony_ci                        (num_vertices_temp64 >> 1) +
1394bf215546Sopenharmony_ci                           PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395bf215546Sopenharmony_ci                                                                      (REGS64)
1396bf215546Sopenharmony_ci                                                                    */
1397bf215546Sopenharmony_ci                  }
1398bf215546Sopenharmony_ci               }
1399bf215546Sopenharmony_ci               /* Multiply by the vertex stream stride and add the base
1400bf215546Sopenharmony_ci                * followed by a DOUTD.
1401bf215546Sopenharmony_ci                *
1402bf215546Sopenharmony_ci                * dmad32 (C0 * T0) + C1, C2
1403bf215546Sopenharmony_ci                * src0 = stride  src1 = index  src2 = baseaddr src3 =
1404bf215546Sopenharmony_ci                * doutd part
1405bf215546Sopenharmony_ci                */
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci               uint32_t cc;
1408bf215546Sopenharmony_ci               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409bf215546Sopenharmony_ci                  cc = 0;
1410bf215546Sopenharmony_ci               else
1411bf215546Sopenharmony_ci                  cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_ddmad(
1414bf215546Sopenharmony_ci                  /* cc */ cc,
1415bf215546Sopenharmony_ci                  /* END */ 0,
1416bf215546Sopenharmony_ci                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417bf215546Sopenharmony_ci                  /* SRC1 */ temp, /* Index 32-bit*/
1418bf215546Sopenharmony_ci                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419bf215546Sopenharmony_ci                                                                  * Address
1420bf215546Sopenharmony_ci                                                                  * +
1421bf215546Sopenharmony_ci                                                                  * Offset
1422bf215546Sopenharmony_ci                                                                  */
1423bf215546Sopenharmony_ci                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424bf215546Sopenharmony_ci                                                                 * Transfer
1425bf215546Sopenharmony_ci                                                                 * Control
1426bf215546Sopenharmony_ci                                                                 * Word.
1427bf215546Sopenharmony_ci                                                                 */
1428bf215546Sopenharmony_ci               );
1429bf215546Sopenharmony_ci            }
1430bf215546Sopenharmony_ci
1431bf215546Sopenharmony_ci            if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432bf215546Sopenharmony_ci                ((ddmadt_enables & (1 << stream)) != 0)) {
1433bf215546Sopenharmony_ci               code_size += 1;
1434bf215546Sopenharmony_ci            }
1435bf215546Sopenharmony_ci            code_size += 1;
1436bf215546Sopenharmony_ci         } else {
1437bf215546Sopenharmony_ci            /* Repeat DMA.
1438bf215546Sopenharmony_ci             *
1439bf215546Sopenharmony_ci             * Write the DMA transfer control words into the PDS data
1440bf215546Sopenharmony_ci             * section.
1441bf215546Sopenharmony_ci             *
1442bf215546Sopenharmony_ci             * DMA address is 40-bit.
1443bf215546Sopenharmony_ci             */
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446bf215546Sopenharmony_ci               uint32_t dma_control_word;
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci               /* Write the address to the constant. */
1449bf215546Sopenharmony_ci               pvr_pds_write_dma_address(buffer,
1450bf215546Sopenharmony_ci                                         dma_address_constant64,
1451bf215546Sopenharmony_ci                                         vertex_stream->address +
1452bf215546Sopenharmony_ci                                            (uint64_t)vertex_element->offset,
1453bf215546Sopenharmony_ci                                         false,
1454bf215546Sopenharmony_ci                                         dev_info);
1455bf215546Sopenharmony_ci
1456bf215546Sopenharmony_ci               /* Set up the DMA transfer control word. */
1457bf215546Sopenharmony_ci               dma_control_word =
1458bf215546Sopenharmony_ci                  vertex_element->size
1459bf215546Sopenharmony_ci                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_ci               dma_control_word |=
1462bf215546Sopenharmony_ci                  vertex_element->reg
1463bf215546Sopenharmony_ci                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464bf215546Sopenharmony_ci
1465bf215546Sopenharmony_ci               switch (vertex_element->component_size) {
1466bf215546Sopenharmony_ci               case 4: {
1467bf215546Sopenharmony_ci                  dma_control_word |=
1468bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469bf215546Sopenharmony_ci                  break;
1470bf215546Sopenharmony_ci               }
1471bf215546Sopenharmony_ci               case 3: {
1472bf215546Sopenharmony_ci                  dma_control_word |=
1473bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474bf215546Sopenharmony_ci                  break;
1475bf215546Sopenharmony_ci               }
1476bf215546Sopenharmony_ci               case 2: {
1477bf215546Sopenharmony_ci                  dma_control_word |=
1478bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479bf215546Sopenharmony_ci                  break;
1480bf215546Sopenharmony_ci               }
1481bf215546Sopenharmony_ci               default: {
1482bf215546Sopenharmony_ci                  dma_control_word |=
1483bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484bf215546Sopenharmony_ci                  break;
1485bf215546Sopenharmony_ci               }
1486bf215546Sopenharmony_ci               }
1487bf215546Sopenharmony_ci
1488bf215546Sopenharmony_ci               dma_control_word |=
1489bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490bf215546Sopenharmony_ci
1491bf215546Sopenharmony_ci               dma_control_word |=
1492bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494bf215546Sopenharmony_ci
1495bf215546Sopenharmony_ci               /* If this is the last dma then also set the last flag. */
1496bf215546Sopenharmony_ci               if (terminate) {
1497bf215546Sopenharmony_ci                  dma_control_word |=
1498bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499bf215546Sopenharmony_ci               }
1500bf215546Sopenharmony_ci
1501bf215546Sopenharmony_ci               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502bf215546Sopenharmony_ci                * spec.
1503bf215546Sopenharmony_ci                */
1504bf215546Sopenharmony_ci               pvr_pds_write_wide_constant(buffer,
1505bf215546Sopenharmony_ci                                           dma_control_constant64,
1506bf215546Sopenharmony_ci                                           (uint64_t)dma_control_word);
1507bf215546Sopenharmony_ci            }
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510bf215546Sopenharmony_ci               /* Multiply by the vertex stream stride and add the base
1511bf215546Sopenharmony_ci                * followed by a DOUTD.
1512bf215546Sopenharmony_ci                *
1513bf215546Sopenharmony_ci                * dmad32 (C0 * T0) + C1, C2
1514bf215546Sopenharmony_ci                * src0 = stride  src1 = index  src2 = baseaddr src3 =
1515bf215546Sopenharmony_ci                * doutd part
1516bf215546Sopenharmony_ci                */
1517bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_ddmad(
1518bf215546Sopenharmony_ci                  /* cc */ 0,
1519bf215546Sopenharmony_ci                  /* END */ 0,
1520bf215546Sopenharmony_ci                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521bf215546Sopenharmony_ci                  /* SRC1 */ temp, /* Index 32-bit*/
1522bf215546Sopenharmony_ci                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523bf215546Sopenharmony_ci                                                                  * Address
1524bf215546Sopenharmony_ci                                                                  * +
1525bf215546Sopenharmony_ci                                                                  * Offset.
1526bf215546Sopenharmony_ci                                                                  */
1527bf215546Sopenharmony_ci                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528bf215546Sopenharmony_ci                                                                 * Transfer
1529bf215546Sopenharmony_ci                                                                 * Control
1530bf215546Sopenharmony_ci                                                                 * Word.
1531bf215546Sopenharmony_ci                                                                 */
1532bf215546Sopenharmony_ci               );
1533bf215546Sopenharmony_ci            }
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci            code_size += 1;
1536bf215546Sopenharmony_ci         } /* End of repeat DMA. */
1537bf215546Sopenharmony_ci      } /* Element loop */
1538bf215546Sopenharmony_ci   } /* Stream loop */
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci   if (issue_empty_ddmad) {
1541bf215546Sopenharmony_ci      /* Issue an empty last DDMAD, always executed. */
1542bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(
1544bf215546Sopenharmony_ci            buffer,
1545bf215546Sopenharmony_ci            empty_dma_control_constant64,
1546bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547bf215546Sopenharmony_ci      }
1548bf215546Sopenharmony_ci
1549bf215546Sopenharmony_ci      code_size += 1;
1550bf215546Sopenharmony_ci
1551bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552bf215546Sopenharmony_ci         *buffer++ = pvr_pds_inst_encode_ddmad(
1553bf215546Sopenharmony_ci            /* cc */ 0,
1554bf215546Sopenharmony_ci            /* END */ 0,
1555bf215546Sopenharmony_ci            /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556bf215546Sopenharmony_ci            /* SRC1 */ temp, /* Index 32-bit*/
1557bf215546Sopenharmony_ci            /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558bf215546Sopenharmony_ci                                                            *Address +
1559bf215546Sopenharmony_ci                                                            *Offset.
1560bf215546Sopenharmony_ci                                                            */
1561bf215546Sopenharmony_ci            /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562bf215546Sopenharmony_ci                                                                 * Transfer
1563bf215546Sopenharmony_ci                                                                 * Control
1564bf215546Sopenharmony_ci                                                                 * Word.
1565bf215546Sopenharmony_ci                                                                 */
1566bf215546Sopenharmony_ci         );
1567bf215546Sopenharmony_ci      }
1568bf215546Sopenharmony_ci   }
1569bf215546Sopenharmony_ci
1570bf215546Sopenharmony_ci   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571bf215546Sopenharmony_ci      if (current_p0) {
1572bf215546Sopenharmony_ci         code_size += 1;
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575bf215546Sopenharmony_ci            /* Revert predicate back to IF0 which is required by DOUTU. */
1576bf215546Sopenharmony_ci            *buffer++ =
1577bf215546Sopenharmony_ci               pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578bf215546Sopenharmony_ci                                                                     */
1579bf215546Sopenharmony_ci                                  0, /* Neg */
1580bf215546Sopenharmony_ci                                  PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581bf215546Sopenharmony_ci                                                                    */
1582bf215546Sopenharmony_ci                                  1); /* Addr */
1583bf215546Sopenharmony_ci         }
1584bf215546Sopenharmony_ci      }
1585bf215546Sopenharmony_ci   }
1586bf215546Sopenharmony_ci   /* Send VertexID if requested. */
1587bf215546Sopenharmony_ci   if (program->iterate_vtx_id) {
1588bf215546Sopenharmony_ci      if (program->draw_indirect) {
1589bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_add32(
1591bf215546Sopenharmony_ci               /* cc */ 0x0,
1592bf215546Sopenharmony_ci               /* ALUM */ 0, /* Unsigned */
1593bf215546Sopenharmony_ci               /* SNA */ 1, /* Minus */
1594bf215546Sopenharmony_ci               /* SRC0 32b */ input_register0, /* vertexID */
1595bf215546Sopenharmony_ci               /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596bf215546Sopenharmony_ci                                                                       * vertexID.
1597bf215546Sopenharmony_ci                                                                       */
1598bf215546Sopenharmony_ci               /* DST 32b */ input_register0);
1599bf215546Sopenharmony_ci         }
1600bf215546Sopenharmony_ci
1601bf215546Sopenharmony_ci         code_size += 1;
1602bf215546Sopenharmony_ci      }
1603bf215546Sopenharmony_ci
1604bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605bf215546Sopenharmony_ci         uint32_t doutw = pvr_pds_encode_doutw_src1(
1606bf215546Sopenharmony_ci            program->vtx_id_register,
1607bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER32,
1608bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609bf215546Sopenharmony_ci            false,
1610bf215546Sopenharmony_ci            dev_info);
1611bf215546Sopenharmony_ci
1612bf215546Sopenharmony_ci         if (!program->iterate_instance_id && !program->iterate_remap_id)
1613bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614bf215546Sopenharmony_ci
1615bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer,
1616bf215546Sopenharmony_ci                                  vertex_id_control_word_const32,
1617bf215546Sopenharmony_ci                                  doutw);
1618bf215546Sopenharmony_ci      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619bf215546Sopenharmony_ci         *buffer++ = pvr_pds_encode_doutw64(
1620bf215546Sopenharmony_ci            /* cc */ 0,
1621bf215546Sopenharmony_ci            /* END */ 0,
1622bf215546Sopenharmony_ci            /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623bf215546Sopenharmony_ci                                                        */
1624bf215546Sopenharmony_ci            /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625bf215546Sopenharmony_ci      }
1626bf215546Sopenharmony_ci
1627bf215546Sopenharmony_ci      code_size += 1;
1628bf215546Sopenharmony_ci   }
1629bf215546Sopenharmony_ci
1630bf215546Sopenharmony_ci   /* Send InstanceID if requested. */
1631bf215546Sopenharmony_ci   if (program->iterate_instance_id) {
1632bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633bf215546Sopenharmony_ci         uint32_t doutw = pvr_pds_encode_doutw_src1(
1634bf215546Sopenharmony_ci            program->instance_id_register,
1635bf215546Sopenharmony_ci            PVR_PDS_DOUTW_UPPER32,
1636bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637bf215546Sopenharmony_ci            true,
1638bf215546Sopenharmony_ci            dev_info);
1639bf215546Sopenharmony_ci
1640bf215546Sopenharmony_ci         if (!program->iterate_remap_id)
1641bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642bf215546Sopenharmony_ci
1643bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer,
1644bf215546Sopenharmony_ci                                  instance_id_control_word_const32,
1645bf215546Sopenharmony_ci                                  doutw);
1646bf215546Sopenharmony_ci      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647bf215546Sopenharmony_ci         *buffer++ = pvr_pds_encode_doutw64(
1648bf215546Sopenharmony_ci            /* cc */ 0,
1649bf215546Sopenharmony_ci            /* END */ 0,
1650bf215546Sopenharmony_ci            /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651bf215546Sopenharmony_ci            /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652bf215546Sopenharmony_ci      }
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci      code_size += 1;
1655bf215546Sopenharmony_ci   }
1656bf215546Sopenharmony_ci
1657bf215546Sopenharmony_ci   /* Send remapped index number to vi0. */
1658bf215546Sopenharmony_ci   if (program->iterate_remap_id) {
1659bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660bf215546Sopenharmony_ci         uint32_t doutw = pvr_pds_encode_doutw_src1(
1661bf215546Sopenharmony_ci            0 /* vi0 */,
1662bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER32,
1663bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665bf215546Sopenharmony_ci            false,
1666bf215546Sopenharmony_ci            dev_info);
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci         pvr_pds_write_constant64(buffer,
1669bf215546Sopenharmony_ci                                  geometry_id_control_word_const64,
1670bf215546Sopenharmony_ci                                  doutw,
1671bf215546Sopenharmony_ci                                  0);
1672bf215546Sopenharmony_ci      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673bf215546Sopenharmony_ci         *buffer++ = pvr_pds_encode_doutw64(
1674bf215546Sopenharmony_ci            /* cc */ 0,
1675bf215546Sopenharmony_ci            /* END */ 0,
1676bf215546Sopenharmony_ci            /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677bf215546Sopenharmony_ci                                                          * Src1
1678bf215546Sopenharmony_ci                                                          */
1679bf215546Sopenharmony_ci            /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680bf215546Sopenharmony_ci      }
1681bf215546Sopenharmony_ci
1682bf215546Sopenharmony_ci      code_size += 1;
1683bf215546Sopenharmony_ci   }
1684bf215546Sopenharmony_ci
1685bf215546Sopenharmony_ci   /* Copy the USC task control words to constants. */
1686bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687bf215546Sopenharmony_ci      pvr_pds_write_wide_constant(buffer,
1688bf215546Sopenharmony_ci                                  usc_control_constant64,
1689bf215546Sopenharmony_ci                                  program->usc_task_control.src0); /* 64-bit
1690bf215546Sopenharmony_ci                                                                    * Src0
1691bf215546Sopenharmony_ci                                                                    */
1692bf215546Sopenharmony_ci      if (program->stream_patch_offsets) {
1693bf215546Sopenharmony_ci         /* USC TaskControl is always the first patch. */
1694bf215546Sopenharmony_ci         program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695bf215546Sopenharmony_ci      }
1696bf215546Sopenharmony_ci   }
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699bf215546Sopenharmony_ci      /* Conditionally (if last in task) issue the task to the USC
1700bf215546Sopenharmony_ci       * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701bf215546Sopenharmony_ci       */
1702bf215546Sopenharmony_ci
1703bf215546Sopenharmony_ci      *buffer++ = pvr_pds_encode_doutu(
1704bf215546Sopenharmony_ci         /* cc */ 1,
1705bf215546Sopenharmony_ci         /* END */ 1,
1706bf215546Sopenharmony_ci         /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_ci      /* End the program if the Dout did not already end it. */
1709bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_halt(0);
1710bf215546Sopenharmony_ci   }
1711bf215546Sopenharmony_ci
1712bf215546Sopenharmony_ci   code_size += 2;
1713bf215546Sopenharmony_ci
1714bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715bf215546Sopenharmony_ci      /* Set the data segment pointer and ensure we return 1 past the buffer
1716bf215546Sopenharmony_ci       * ptr.
1717bf215546Sopenharmony_ci       */
1718bf215546Sopenharmony_ci      program->data_segment = buffer;
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci      buffer += consts_size;
1721bf215546Sopenharmony_ci   }
1722bf215546Sopenharmony_ci
1723bf215546Sopenharmony_ci   program->temps_used = temps_used;
1724bf215546Sopenharmony_ci   program->data_size = consts_size;
1725bf215546Sopenharmony_ci   program->code_size = code_size;
1726bf215546Sopenharmony_ci   program->ddmadt_enables = ddmadt_enables;
1727bf215546Sopenharmony_ci   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728bf215546Sopenharmony_ci      program->skip_stream_flag = skip_stream_flag;
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci   return buffer;
1731bf215546Sopenharmony_ci}
1732bf215546Sopenharmony_ci
1733bf215546Sopenharmony_ci/**
1734bf215546Sopenharmony_ci * Generates a PDS program to load USC compute shader global/local/workgroup
1735bf215546Sopenharmony_ci * sizes/ids and then a DOUTU to execute the USC.
1736bf215546Sopenharmony_ci *
1737bf215546Sopenharmony_ci * \param program Pointer to description of the program that should be
1738bf215546Sopenharmony_ci *                generated.
1739bf215546Sopenharmony_ci * \param buffer Pointer to buffer that receives the output of this function.
1740bf215546Sopenharmony_ci *               This will be either the data segment, or the code depending on
1741bf215546Sopenharmony_ci *               gen_mode.
1742bf215546Sopenharmony_ci * \param gen_mode Which part to generate, either data segment or code segment.
1743bf215546Sopenharmony_ci *                 If PDS_GENERATE_SIZES is specified, nothing is written, but
1744bf215546Sopenharmony_ci *                 size information in program is updated.
1745bf215546Sopenharmony_ci * \param dev_info PVR device info struct.
1746bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747bf215546Sopenharmony_ci *          the buffer after writing its contents.
1748bf215546Sopenharmony_ci */
1749bf215546Sopenharmony_ciuint32_t *
1750bf215546Sopenharmony_cipvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751bf215546Sopenharmony_ci                       uint32_t *restrict buffer,
1752bf215546Sopenharmony_ci                       enum pvr_pds_generate_mode gen_mode,
1753bf215546Sopenharmony_ci                       const struct pvr_device_info *dev_info)
1754bf215546Sopenharmony_ci{
1755bf215546Sopenharmony_ci   uint32_t usc_control_constant64;
1756bf215546Sopenharmony_ci   uint32_t usc_control_constant64_coeff_update = 0;
1757bf215546Sopenharmony_ci   uint32_t zero_constant64 = 0;
1758bf215546Sopenharmony_ci
1759bf215546Sopenharmony_ci   uint32_t data_size = 0;
1760bf215546Sopenharmony_ci   uint32_t code_size = 0;
1761bf215546Sopenharmony_ci   uint32_t temps_used = 0;
1762bf215546Sopenharmony_ci   uint32_t doutw = 0;
1763bf215546Sopenharmony_ci
1764bf215546Sopenharmony_ci   uint32_t barrier_ctrl_word = 0;
1765bf215546Sopenharmony_ci   uint32_t barrier_ctrl_word2 = 0;
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci   /* Even though there are 3 IDs for local and global we only need max one
1768bf215546Sopenharmony_ci    * DOUTW for local, and two for global.
1769bf215546Sopenharmony_ci    */
1770bf215546Sopenharmony_ci   uint32_t work_group_id_ctrl_words[2] = { 0 };
1771bf215546Sopenharmony_ci   uint32_t local_id_ctrl_word = 0;
1772bf215546Sopenharmony_ci   uint32_t local_input_register;
1773bf215546Sopenharmony_ci
1774bf215546Sopenharmony_ci   /* For the constant value to load into ptemp (SW fence). */
1775bf215546Sopenharmony_ci   uint64_t predicate_ld_src0_constant = 0;
1776bf215546Sopenharmony_ci   uint32_t cond_render_negate_constant = 0;
1777bf215546Sopenharmony_ci
1778bf215546Sopenharmony_ci   uint32_t cond_render_pred_temp;
1779bf215546Sopenharmony_ci   uint32_t cond_render_negate_temp;
1780bf215546Sopenharmony_ci
1781bf215546Sopenharmony_ci   /* 2x 64 bit registers that will mask out the Predicate load. */
1782bf215546Sopenharmony_ci   uint32_t cond_render_pred_mask_constant = 0;
1783bf215546Sopenharmony_ci
1784bf215546Sopenharmony_ci#if defined(DEBUG)
1785bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786bf215546Sopenharmony_ci      for (uint32_t j = 0; j < program->data_size; j++)
1787bf215546Sopenharmony_ci         buffer[j] = 0xDEADBEEF;
1788bf215546Sopenharmony_ci   }
1789bf215546Sopenharmony_ci#endif
1790bf215546Sopenharmony_ci
1791bf215546Sopenharmony_ci   /* All the compute input registers are in temps. */
1792bf215546Sopenharmony_ci   temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793bf215546Sopenharmony_ci
1794bf215546Sopenharmony_ci   uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797bf215546Sopenharmony_ci
1798bf215546Sopenharmony_ci   if (program->kick_usc) {
1799bf215546Sopenharmony_ci      /* Copy the USC task control words to constants. */
1800bf215546Sopenharmony_ci      usc_control_constant64 =
1801bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
1802bf215546Sopenharmony_ci   }
1803bf215546Sopenharmony_ci
1804bf215546Sopenharmony_ci   if (program->has_coefficient_update_task) {
1805bf215546Sopenharmony_ci      usc_control_constant64_coeff_update =
1806bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
1807bf215546Sopenharmony_ci   }
1808bf215546Sopenharmony_ci
1809bf215546Sopenharmony_ci   if (program->conditional_render) {
1810bf215546Sopenharmony_ci      predicate_ld_src0_constant =
1811bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
1812bf215546Sopenharmony_ci      cond_render_negate_constant =
1813bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
1814bf215546Sopenharmony_ci      cond_render_pred_mask_constant =
1815bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 4, &data_size);
1816bf215546Sopenharmony_ci
1817bf215546Sopenharmony_ci      /* LD will load a 64 bit value. */
1818bf215546Sopenharmony_ci      cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819bf215546Sopenharmony_ci      cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820bf215546Sopenharmony_ci
1821bf215546Sopenharmony_ci      program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822bf215546Sopenharmony_ci      program->cond_render_pred_temp = cond_render_pred_temp;
1823bf215546Sopenharmony_ci   }
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci   if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1826bf215546Sopenharmony_ci       (program->clear_pds_barrier) ||
1827bf215546Sopenharmony_ci       (program->kick_usc && program->conditional_render)) {
1828bf215546Sopenharmony_ci      zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829bf215546Sopenharmony_ci   }
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci   if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1832bf215546Sopenharmony_ci      barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833bf215546Sopenharmony_ci      if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834bf215546Sopenharmony_ci         barrier_ctrl_word2 =
1835bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 1, &data_size);
1836bf215546Sopenharmony_ci      }
1837bf215546Sopenharmony_ci   }
1838bf215546Sopenharmony_ci
1839bf215546Sopenharmony_ci   if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
1840bf215546Sopenharmony_ci       program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1841bf215546Sopenharmony_ci      work_group_id_ctrl_words[0] =
1842bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 1, &data_size);
1843bf215546Sopenharmony_ci   }
1844bf215546Sopenharmony_ci
1845bf215546Sopenharmony_ci   if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1846bf215546Sopenharmony_ci      work_group_id_ctrl_words[1] =
1847bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 1, &data_size);
1848bf215546Sopenharmony_ci   }
1849bf215546Sopenharmony_ci
1850bf215546Sopenharmony_ci   if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1851bf215546Sopenharmony_ci       (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1852bf215546Sopenharmony_ci       (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1853bf215546Sopenharmony_ci      local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854bf215546Sopenharmony_ci   }
1855bf215546Sopenharmony_ci
1856bf215546Sopenharmony_ci   if (program->add_base_workgroup) {
1857bf215546Sopenharmony_ci      for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858bf215546Sopenharmony_ci           workgroup_component++) {
1859bf215546Sopenharmony_ci         if (program->work_group_input_regs[workgroup_component] !=
1860bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1861bf215546Sopenharmony_ci            program
1862bf215546Sopenharmony_ci               ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863bf215546Sopenharmony_ci               pvr_pds_get_constants(&next_constant, 1, &data_size);
1864bf215546Sopenharmony_ci         }
1865bf215546Sopenharmony_ci      }
1866bf215546Sopenharmony_ci   }
1867bf215546Sopenharmony_ci
1868bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869bf215546Sopenharmony_ci      if (program->kick_usc) {
1870bf215546Sopenharmony_ci         /* Src0 for DOUTU */
1871bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(buffer,
1872bf215546Sopenharmony_ci                                     usc_control_constant64,
1873bf215546Sopenharmony_ci                                     program->usc_task_control.src0); /* 64-bit
1874bf215546Sopenharmony_ci                                                                       * Src0.
1875bf215546Sopenharmony_ci                                                                       */
1876bf215546Sopenharmony_ci      }
1877bf215546Sopenharmony_ci
1878bf215546Sopenharmony_ci      if (program->has_coefficient_update_task) {
1879bf215546Sopenharmony_ci         /* Src0 for DOUTU. */
1880bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(
1881bf215546Sopenharmony_ci            buffer,
1882bf215546Sopenharmony_ci            usc_control_constant64_coeff_update,
1883bf215546Sopenharmony_ci            program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884bf215546Sopenharmony_ci      }
1885bf215546Sopenharmony_ci
1886bf215546Sopenharmony_ci      if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1887bf215546Sopenharmony_ci          (program->clear_pds_barrier) ||
1888bf215546Sopenharmony_ci          (program->kick_usc && program->conditional_render)) {
1889bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890bf215546Sopenharmony_ci                                                                   * Src0
1891bf215546Sopenharmony_ci                                                                   */
1892bf215546Sopenharmony_ci      }
1893bf215546Sopenharmony_ci
1894bf215546Sopenharmony_ci      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1895bf215546Sopenharmony_ci         if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896bf215546Sopenharmony_ci            /* Write the constant for the coefficient register write. */
1897bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
1898bf215546Sopenharmony_ci               program->barrier_coefficient + 4,
1899bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
1900bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901bf215546Sopenharmony_ci               true,
1902bf215546Sopenharmony_ci               dev_info);
1903bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904bf215546Sopenharmony_ci         }
1905bf215546Sopenharmony_ci         /* Write the constant for the coefficient register write. */
1906bf215546Sopenharmony_ci         doutw = pvr_pds_encode_doutw_src1(
1907bf215546Sopenharmony_ci            program->barrier_coefficient,
1908bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER64,
1909bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910bf215546Sopenharmony_ci            true,
1911bf215546Sopenharmony_ci            dev_info);
1912bf215546Sopenharmony_ci
1913bf215546Sopenharmony_ci         /* Check whether the barrier is going to be the last DOUTW done by
1914bf215546Sopenharmony_ci          * the coefficient sync task.
1915bf215546Sopenharmony_ci          */
1916bf215546Sopenharmony_ci         if ((program->work_group_input_regs[0] ==
1917bf215546Sopenharmony_ci              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1918bf215546Sopenharmony_ci             (program->work_group_input_regs[1] ==
1919bf215546Sopenharmony_ci              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1920bf215546Sopenharmony_ci             (program->work_group_input_regs[2] ==
1921bf215546Sopenharmony_ci              PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1922bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1923bf215546Sopenharmony_ci         }
1924bf215546Sopenharmony_ci
1925bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1926bf215546Sopenharmony_ci      }
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci      /* If we want work-group id X, see if we also want work-group id Y. */
1929bf215546Sopenharmony_ci      if (program->work_group_input_regs[0] !=
1930bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
1931bf215546Sopenharmony_ci          program->work_group_input_regs[1] !=
1932bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1933bf215546Sopenharmony_ci         /* Make sure we are going to DOUTW them into adjacent registers
1934bf215546Sopenharmony_ci          * otherwise we can't do it in one.
1935bf215546Sopenharmony_ci          */
1936bf215546Sopenharmony_ci         assert(program->work_group_input_regs[1] ==
1937bf215546Sopenharmony_ci                (program->work_group_input_regs[0] + 1));
1938bf215546Sopenharmony_ci
1939bf215546Sopenharmony_ci         doutw = pvr_pds_encode_doutw_src1(
1940bf215546Sopenharmony_ci            program->work_group_input_regs[0],
1941bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER64,
1942bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1943bf215546Sopenharmony_ci            true,
1944bf215546Sopenharmony_ci            dev_info);
1945bf215546Sopenharmony_ci
1946bf215546Sopenharmony_ci         /* If we don't want the Z work-group id then this is the last one.
1947bf215546Sopenharmony_ci          */
1948bf215546Sopenharmony_ci         if (program->work_group_input_regs[2] ==
1949bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1950bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1951bf215546Sopenharmony_ci         }
1952bf215546Sopenharmony_ci
1953bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1954bf215546Sopenharmony_ci      }
1955bf215546Sopenharmony_ci      /* If we only want one of X or Y then handle them separately. */
1956bf215546Sopenharmony_ci      else {
1957bf215546Sopenharmony_ci         if (program->work_group_input_regs[0] !=
1958bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1959bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
1960bf215546Sopenharmony_ci               program->work_group_input_regs[0],
1961bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER32,
1962bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1963bf215546Sopenharmony_ci               true,
1964bf215546Sopenharmony_ci               dev_info);
1965bf215546Sopenharmony_ci
1966bf215546Sopenharmony_ci            /* If we don't want the Z work-group id then this is the last
1967bf215546Sopenharmony_ci             * one.
1968bf215546Sopenharmony_ci             */
1969bf215546Sopenharmony_ci            if (program->work_group_input_regs[2] ==
1970bf215546Sopenharmony_ci                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1971bf215546Sopenharmony_ci               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1972bf215546Sopenharmony_ci            }
1973bf215546Sopenharmony_ci
1974bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer,
1975bf215546Sopenharmony_ci                                     work_group_id_ctrl_words[0],
1976bf215546Sopenharmony_ci                                     doutw);
1977bf215546Sopenharmony_ci         } else if (program->work_group_input_regs[1] !=
1978bf215546Sopenharmony_ci                    PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1979bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
1980bf215546Sopenharmony_ci               program->work_group_input_regs[1],
1981bf215546Sopenharmony_ci               PVR_PDS_DOUTW_UPPER32,
1982bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1983bf215546Sopenharmony_ci               true,
1984bf215546Sopenharmony_ci               dev_info);
1985bf215546Sopenharmony_ci
1986bf215546Sopenharmony_ci            /* If we don't want the Z work-group id then this is the last
1987bf215546Sopenharmony_ci             * one.
1988bf215546Sopenharmony_ci             */
1989bf215546Sopenharmony_ci            if (program->work_group_input_regs[2] ==
1990bf215546Sopenharmony_ci                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1991bf215546Sopenharmony_ci               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1992bf215546Sopenharmony_ci            }
1993bf215546Sopenharmony_ci
1994bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer,
1995bf215546Sopenharmony_ci                                     work_group_id_ctrl_words[0],
1996bf215546Sopenharmony_ci                                     doutw);
1997bf215546Sopenharmony_ci         }
1998bf215546Sopenharmony_ci      }
1999bf215546Sopenharmony_ci
2000bf215546Sopenharmony_ci      /* Handle work-group id Z. */
2001bf215546Sopenharmony_ci      if (program->work_group_input_regs[2] !=
2002bf215546Sopenharmony_ci          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2003bf215546Sopenharmony_ci         doutw = pvr_pds_encode_doutw_src1(
2004bf215546Sopenharmony_ci            program->work_group_input_regs[2],
2005bf215546Sopenharmony_ci            PVR_PDS_DOUTW_UPPER32,
2006bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
2007bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2008bf215546Sopenharmony_ci            true,
2009bf215546Sopenharmony_ci            dev_info);
2010bf215546Sopenharmony_ci
2011bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
2012bf215546Sopenharmony_ci      }
2013bf215546Sopenharmony_ci
2014bf215546Sopenharmony_ci      /* Handle the local IDs. */
2015bf215546Sopenharmony_ci      if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2016bf215546Sopenharmony_ci          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2017bf215546Sopenharmony_ci         uint32_t dest_reg;
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_ci         /* If we want local id Y and Z make sure the compiler wants them in
2020bf215546Sopenharmony_ci          * the same register.
2021bf215546Sopenharmony_ci          */
2022bf215546Sopenharmony_ci         if (!program->flattened_work_groups) {
2023bf215546Sopenharmony_ci            if ((program->local_input_regs[1] !=
2024bf215546Sopenharmony_ci                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2025bf215546Sopenharmony_ci                (program->local_input_regs[2] !=
2026bf215546Sopenharmony_ci                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2027bf215546Sopenharmony_ci               assert(program->local_input_regs[1] ==
2028bf215546Sopenharmony_ci                      program->local_input_regs[2]);
2029bf215546Sopenharmony_ci            }
2030bf215546Sopenharmony_ci         }
2031bf215546Sopenharmony_ci
2032bf215546Sopenharmony_ci         if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2033bf215546Sopenharmony_ci            dest_reg = program->local_input_regs[1];
2034bf215546Sopenharmony_ci         else
2035bf215546Sopenharmony_ci            dest_reg = program->local_input_regs[2];
2036bf215546Sopenharmony_ci
2037bf215546Sopenharmony_ci         /* If we want local id X and (Y or Z) then we can do that in a
2038bf215546Sopenharmony_ci          * single 64-bit DOUTW.
2039bf215546Sopenharmony_ci          */
2040bf215546Sopenharmony_ci         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2041bf215546Sopenharmony_ci            assert(dest_reg == (program->local_input_regs[0] + 1));
2042bf215546Sopenharmony_ci
2043bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
2044bf215546Sopenharmony_ci               program->local_input_regs[0],
2045bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
2046bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2047bf215546Sopenharmony_ci               true,
2048bf215546Sopenharmony_ci               dev_info);
2049bf215546Sopenharmony_ci
2050bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2051bf215546Sopenharmony_ci
2052bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2053bf215546Sopenharmony_ci         }
2054bf215546Sopenharmony_ci         /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2055bf215546Sopenharmony_ci          */
2056bf215546Sopenharmony_ci         else {
2057bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
2058bf215546Sopenharmony_ci               dest_reg,
2059bf215546Sopenharmony_ci               PVR_PDS_DOUTW_UPPER32,
2060bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2061bf215546Sopenharmony_ci               true,
2062bf215546Sopenharmony_ci               dev_info);
2063bf215546Sopenharmony_ci
2064bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2067bf215546Sopenharmony_ci         }
2068bf215546Sopenharmony_ci      }
2069bf215546Sopenharmony_ci      /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2070bf215546Sopenharmony_ci       */
2071bf215546Sopenharmony_ci      else if (program->local_input_regs[0] !=
2072bf215546Sopenharmony_ci               PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2073bf215546Sopenharmony_ci         doutw = pvr_pds_encode_doutw_src1(
2074bf215546Sopenharmony_ci            program->local_input_regs[0],
2075bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER32,
2076bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2077bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2078bf215546Sopenharmony_ci            true,
2079bf215546Sopenharmony_ci            dev_info);
2080bf215546Sopenharmony_ci
2081bf215546Sopenharmony_ci         pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2082bf215546Sopenharmony_ci      }
2083bf215546Sopenharmony_ci   }
2084bf215546Sopenharmony_ci
2085bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2086bf215546Sopenharmony_ci       gen_mode == PDS_GENERATE_SIZES) {
2087bf215546Sopenharmony_ci      const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2088bf215546Sopenharmony_ci#define APPEND(X)                    \
2089bf215546Sopenharmony_ci   if (encode) {                     \
2090bf215546Sopenharmony_ci      *buffer = X;                   \
2091bf215546Sopenharmony_ci      buffer++;                      \
2092bf215546Sopenharmony_ci   } else {                          \
2093bf215546Sopenharmony_ci      code_size += sizeof(uint32_t); \
2094bf215546Sopenharmony_ci   }
2095bf215546Sopenharmony_ci
2096bf215546Sopenharmony_ci      /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2097bf215546Sopenharmony_ci       * then we will be doing an infinite loop.
2098bf215546Sopenharmony_ci       */
2099bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2100bf215546Sopenharmony_ci         assert(program->coeff_update_task_branch_size > 0);
2101bf215546Sopenharmony_ci
2102bf215546Sopenharmony_ci      /* Test whether this is the coefficient update task or not. */
2103bf215546Sopenharmony_ci      APPEND(
2104bf215546Sopenharmony_ci         pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2105bf215546Sopenharmony_ci                            PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2106bf215546Sopenharmony_ci                            PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2107bf215546Sopenharmony_ci                            program->coeff_update_task_branch_size /* ADDR */));
2108bf215546Sopenharmony_ci
2109bf215546Sopenharmony_ci      /* Do we need to initialize the barrier coefficient? */
2110bf215546Sopenharmony_ci      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2111bf215546Sopenharmony_ci         if (PVR_HAS_QUIRK(dev_info, 51210)) {
2112bf215546Sopenharmony_ci            /* Initialize the second barrier coefficient registers to zero.
2113bf215546Sopenharmony_ci             */
2114bf215546Sopenharmony_ci            APPEND(pvr_pds_encode_doutw64(0, /* cc */
2115bf215546Sopenharmony_ci                                          0, /* END */
2116bf215546Sopenharmony_ci                                          barrier_ctrl_word2, /* SRC1 */
2117bf215546Sopenharmony_ci                                          zero_constant64 >> 1)); /* SRC0 */
2118bf215546Sopenharmony_ci         }
2119bf215546Sopenharmony_ci         /* Initialize the coefficient register to zero. */
2120bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2121bf215546Sopenharmony_ci                                       0, /* END */
2122bf215546Sopenharmony_ci                                       barrier_ctrl_word, /* SRC1 */
2123bf215546Sopenharmony_ci                                       zero_constant64 >> 1)); /* SRC0 */
2124bf215546Sopenharmony_ci      }
2125bf215546Sopenharmony_ci
2126bf215546Sopenharmony_ci      if (program->add_base_workgroup) {
2127bf215546Sopenharmony_ci         const uint32_t temp_values[3] = { 0, 1, 3 };
2128bf215546Sopenharmony_ci         for (uint32_t workgroup_component = 0; workgroup_component < 3;
2129bf215546Sopenharmony_ci              workgroup_component++) {
2130bf215546Sopenharmony_ci            if (program->work_group_input_regs[workgroup_component] ==
2131bf215546Sopenharmony_ci                PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2132bf215546Sopenharmony_ci               continue;
2133bf215546Sopenharmony_ci
2134bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_add32(
2135bf215546Sopenharmony_ci               /* cc */ 0x0,
2136bf215546Sopenharmony_ci               /* ALUM */ 0,
2137bf215546Sopenharmony_ci               /* SNA */ 0,
2138bf215546Sopenharmony_ci               /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2139bf215546Sopenharmony_ci                  program->base_workgroup_constant_offset_in_dwords
2140bf215546Sopenharmony_ci                     [workgroup_component],
2141bf215546Sopenharmony_ci               /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2142bf215546Sopenharmony_ci                  PVR_PDS_CDM_WORK_GROUP_ID_X +
2143bf215546Sopenharmony_ci                  temp_values[workgroup_component],
2144bf215546Sopenharmony_ci               /* DST  (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2145bf215546Sopenharmony_ci                  PVR_PDS_CDM_WORK_GROUP_ID_X +
2146bf215546Sopenharmony_ci                  temp_values[workgroup_component]));
2147bf215546Sopenharmony_ci         }
2148bf215546Sopenharmony_ci      }
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci      /* If we are going to put the work-group IDs in coefficients then we
2151bf215546Sopenharmony_ci       * just need to do the DOUTWs.
2152bf215546Sopenharmony_ci       */
2153bf215546Sopenharmony_ci      if ((program->work_group_input_regs[0] !=
2154bf215546Sopenharmony_ci           PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2155bf215546Sopenharmony_ci          (program->work_group_input_regs[1] !=
2156bf215546Sopenharmony_ci           PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2157bf215546Sopenharmony_ci         uint32_t dest_reg;
2158bf215546Sopenharmony_ci
2159bf215546Sopenharmony_ci         if (program->work_group_input_regs[0] !=
2160bf215546Sopenharmony_ci             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2161bf215546Sopenharmony_ci            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2162bf215546Sopenharmony_ci         } else {
2163bf215546Sopenharmony_ci            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2164bf215546Sopenharmony_ci         }
2165bf215546Sopenharmony_ci
2166bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2167bf215546Sopenharmony_ci                                       0, /* END */
2168bf215546Sopenharmony_ci                                       work_group_id_ctrl_words[0], /* SRC1
2169bf215546Sopenharmony_ci                                                                     */
2170bf215546Sopenharmony_ci                                       dest_reg >> 1)); /* SRC0 */
2171bf215546Sopenharmony_ci      }
2172bf215546Sopenharmony_ci
2173bf215546Sopenharmony_ci      if (program->work_group_input_regs[2] !=
2174bf215546Sopenharmony_ci          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2175bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutw64(
2176bf215546Sopenharmony_ci            0, /* cc */
2177bf215546Sopenharmony_ci            0, /* END */
2178bf215546Sopenharmony_ci            work_group_id_ctrl_words[1], /* SRC1 */
2179bf215546Sopenharmony_ci            (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2180bf215546Sopenharmony_ci               1)); /* SRC0 */
2181bf215546Sopenharmony_ci      }
2182bf215546Sopenharmony_ci
2183bf215546Sopenharmony_ci      /* Issue the task to the USC. */
2184bf215546Sopenharmony_ci      if (program->kick_usc && program->has_coefficient_update_task) {
2185bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutu(0, /* cc */
2186bf215546Sopenharmony_ci                                     1, /* END */
2187bf215546Sopenharmony_ci                                     usc_control_constant64_coeff_update >>
2188bf215546Sopenharmony_ci                                        1)); /* SRC0; DOUTU 64-bit Src0 */
2189bf215546Sopenharmony_ci      }
2190bf215546Sopenharmony_ci
2191bf215546Sopenharmony_ci      /* Encode a HALT */
2192bf215546Sopenharmony_ci      APPEND(pvr_pds_inst_encode_halt(0));
2193bf215546Sopenharmony_ci
2194bf215546Sopenharmony_ci      /* Set the branch size used to skip the coefficient sync task. */
2195bf215546Sopenharmony_ci      program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2196bf215546Sopenharmony_ci
2197bf215546Sopenharmony_ci      /* DOUTW in the local IDs. */
2198bf215546Sopenharmony_ci
2199bf215546Sopenharmony_ci      /* If we want X and Y or Z, we only need one DOUTW. */
2200bf215546Sopenharmony_ci      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2201bf215546Sopenharmony_ci          ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2202bf215546Sopenharmony_ci           (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
2203bf215546Sopenharmony_ci         local_input_register =
2204bf215546Sopenharmony_ci            PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2205bf215546Sopenharmony_ci      } else {
2206bf215546Sopenharmony_ci         /* If we just want X. */
2207bf215546Sopenharmony_ci         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2208bf215546Sopenharmony_ci            local_input_register =
2209bf215546Sopenharmony_ci               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2210bf215546Sopenharmony_ci         }
2211bf215546Sopenharmony_ci         /* If we just want Y or Z. */
2212bf215546Sopenharmony_ci         else if (program->local_input_regs[1] !=
2213bf215546Sopenharmony_ci                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
2214bf215546Sopenharmony_ci                  program->local_input_regs[2] !=
2215bf215546Sopenharmony_ci                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2216bf215546Sopenharmony_ci            local_input_register =
2217bf215546Sopenharmony_ci               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2218bf215546Sopenharmony_ci         }
2219bf215546Sopenharmony_ci      }
2220bf215546Sopenharmony_ci
2221bf215546Sopenharmony_ci      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2222bf215546Sopenharmony_ci          (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2223bf215546Sopenharmony_ci          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2224bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutw64(0, /* cc */
2225bf215546Sopenharmony_ci                                       0, /* END */
2226bf215546Sopenharmony_ci                                       local_id_ctrl_word, /* SRC1 */
2227bf215546Sopenharmony_ci                                       local_input_register >> 1)); /* SRC0
2228bf215546Sopenharmony_ci                                                                     */
2229bf215546Sopenharmony_ci      }
2230bf215546Sopenharmony_ci
2231bf215546Sopenharmony_ci      if (program->clear_pds_barrier) {
2232bf215546Sopenharmony_ci         /* Zero the persistent temp (SW fence for context switch). */
2233bf215546Sopenharmony_ci         APPEND(pvr_pds_inst_encode_add64(
2234bf215546Sopenharmony_ci            0, /* cc */
2235bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2236bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2237bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2238bf215546Sopenharmony_ci               (zero_constant64 >> 1), /* src0 = 0 */
2239bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2240bf215546Sopenharmony_ci               (zero_constant64 >> 1), /* src1 = 0 */
2241bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2242bf215546Sopenharmony_ci                                                             * ptemp64[0]
2243bf215546Sopenharmony_ci                                                             */
2244bf215546Sopenharmony_ci      }
2245bf215546Sopenharmony_ci
2246bf215546Sopenharmony_ci      /* If this is a fence, issue the DOUTC. */
2247bf215546Sopenharmony_ci      if (program->fence) {
2248bf215546Sopenharmony_ci         APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2249bf215546Sopenharmony_ci                                          0 /* END */));
2250bf215546Sopenharmony_ci      }
2251bf215546Sopenharmony_ci
2252bf215546Sopenharmony_ci      if (program->kick_usc) {
2253bf215546Sopenharmony_ci         if (program->conditional_render) {
2254bf215546Sopenharmony_ci            /* Skip if coefficient update task. */
2255bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2256bf215546Sopenharmony_ci                                           0,
2257bf215546Sopenharmony_ci                                           PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2258bf215546Sopenharmony_ci                                           16));
2259bf215546Sopenharmony_ci
2260bf215546Sopenharmony_ci            /* Load the predicate. */
2261bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2262bf215546Sopenharmony_ci
2263bf215546Sopenharmony_ci            /* Load negate constant into temp for CMP. */
2264bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_add64(
2265bf215546Sopenharmony_ci               0, /* cc */
2266bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2267bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2268bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2269bf215546Sopenharmony_ci                  (cond_render_negate_constant >> 1), /* src0 = 0 */
2270bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2271bf215546Sopenharmony_ci                  (zero_constant64 >> 1), /* src1 = 0 */
2272bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2273bf215546Sopenharmony_ci                  (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2274bf215546Sopenharmony_ci                                                     */
2275bf215546Sopenharmony_ci
2276bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_wdf(0));
2277bf215546Sopenharmony_ci
2278bf215546Sopenharmony_ci            for (uint32_t i = 0; i < 4; i++) {
2279bf215546Sopenharmony_ci               APPEND(pvr_pds_inst_encode_stflp32(
2280bf215546Sopenharmony_ci                  1, /* enable immediate */
2281bf215546Sopenharmony_ci                  0, /* cc */
2282bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2283bf215546Sopenharmony_ci                  cond_render_pred_temp + i, /* SRC0 */
2284bf215546Sopenharmony_ci                  cond_render_pred_mask_constant + i, /* SRC1 */
2285bf215546Sopenharmony_ci                  0, /* SRC2 (Shift) */
2286bf215546Sopenharmony_ci                  cond_render_pred_temp + i)); /* DST */
2287bf215546Sopenharmony_ci
2288bf215546Sopenharmony_ci               APPEND(
2289bf215546Sopenharmony_ci                  pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2290bf215546Sopenharmony_ci                                              0, /* cc */
2291bf215546Sopenharmony_ci                                              PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2292bf215546Sopenharmony_ci                                                                         */
2293bf215546Sopenharmony_ci                                              cond_render_pred_temp + i, /* SRC0
2294bf215546Sopenharmony_ci                                                                          */
2295bf215546Sopenharmony_ci                                              cond_render_pred_temp, /* SRC1 */
2296bf215546Sopenharmony_ci                                              0, /* SRC2 (Shift) */
2297bf215546Sopenharmony_ci                                              cond_render_pred_temp)); /* DST */
2298bf215546Sopenharmony_ci            }
2299bf215546Sopenharmony_ci
2300bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2301bf215546Sopenharmony_ci                                            cond_render_pred_temp + 1, /* SRC1
2302bf215546Sopenharmony_ci                                                                        */
2303bf215546Sopenharmony_ci                                            0, /* SRC0 */
2304bf215546Sopenharmony_ci                                            0)); /* GLOBALREG */
2305bf215546Sopenharmony_ci
2306bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2307bf215546Sopenharmony_ci                                               0, /* cc */
2308bf215546Sopenharmony_ci                                               PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2309bf215546Sopenharmony_ci                                                                           */
2310bf215546Sopenharmony_ci                                               cond_render_pred_temp, /* SRC0 */
2311bf215546Sopenharmony_ci                                               cond_render_negate_temp, /* SRC1
2312bf215546Sopenharmony_ci                                                                         */
2313bf215546Sopenharmony_ci                                               0, /* SRC2 (Shift) */
2314bf215546Sopenharmony_ci                                               cond_render_pred_temp)); /* DST
2315bf215546Sopenharmony_ci                                                                         */
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci            /* Check that the predicate is 0. */
2318bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_cmpi(
2319bf215546Sopenharmony_ci               0, /* cc */
2320bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2321bf215546Sopenharmony_ci               (cond_render_pred_temp >> 1) +
2322bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2323bf215546Sopenharmony_ci               0)); /* SRC1 */
2324bf215546Sopenharmony_ci
2325bf215546Sopenharmony_ci            /* If predicate is 0, skip DOUTU. */
2326bf215546Sopenharmony_ci            APPEND(pvr_pds_inst_encode_bra(
2327bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2328bf215546Sopenharmony_ci                                                  P0 */
2329bf215546Sopenharmony_ci               0, /* NEG */
2330bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2331bf215546Sopenharmony_ci                                                    keep
2332bf215546Sopenharmony_ci                                                  */
2333bf215546Sopenharmony_ci               2));
2334bf215546Sopenharmony_ci         }
2335bf215546Sopenharmony_ci
2336bf215546Sopenharmony_ci         /* Issue the task to the USC.
2337bf215546Sopenharmony_ci          * DoutU src1=USC Code Base address, src2=doutu word 2.
2338bf215546Sopenharmony_ci          */
2339bf215546Sopenharmony_ci         APPEND(pvr_pds_encode_doutu(1, /* cc */
2340bf215546Sopenharmony_ci                                     1, /* END */
2341bf215546Sopenharmony_ci                                     usc_control_constant64 >> 1)); /* SRC0;
2342bf215546Sopenharmony_ci                                                                     * DOUTU
2343bf215546Sopenharmony_ci                                                                     * 64-bit
2344bf215546Sopenharmony_ci                                                                     * Src0.
2345bf215546Sopenharmony_ci                                                                     */
2346bf215546Sopenharmony_ci      }
2347bf215546Sopenharmony_ci
2348bf215546Sopenharmony_ci      /* End the program if the Dout did not already end it. */
2349bf215546Sopenharmony_ci      APPEND(pvr_pds_inst_encode_halt(0));
2350bf215546Sopenharmony_ci#undef APPEND
2351bf215546Sopenharmony_ci   }
2352bf215546Sopenharmony_ci
2353bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2354bf215546Sopenharmony_ci      /* Set the data segment pointer and ensure we return 1 past the buffer
2355bf215546Sopenharmony_ci       * ptr.
2356bf215546Sopenharmony_ci       */
2357bf215546Sopenharmony_ci      program->data_segment = buffer;
2358bf215546Sopenharmony_ci
2359bf215546Sopenharmony_ci      buffer += next_constant;
2360bf215546Sopenharmony_ci   }
2361bf215546Sopenharmony_ci
2362bf215546Sopenharmony_ci   /* Require at least one DWORD of PDS data so the program runs. */
2363bf215546Sopenharmony_ci   data_size = MAX2(1, data_size);
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci   program->temps_used = temps_used;
2366bf215546Sopenharmony_ci   program->highest_temp = temps_used;
2367bf215546Sopenharmony_ci   program->data_size = data_size;
2368bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_SIZES)
2369bf215546Sopenharmony_ci      program->code_size = code_size;
2370bf215546Sopenharmony_ci
2371bf215546Sopenharmony_ci   return buffer;
2372bf215546Sopenharmony_ci}
2373bf215546Sopenharmony_ci
2374bf215546Sopenharmony_ci/**
2375bf215546Sopenharmony_ci * Generates the PDS vertex shader data or code block. This program will do a
2376bf215546Sopenharmony_ci * DMA into USC Constants followed by a DOUTU.
2377bf215546Sopenharmony_ci *
2378bf215546Sopenharmony_ci * \param program Pointer to the PDS vertex shader program.
2379bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
2380bf215546Sopenharmony_ci * \param gen_mode Generate code or data.
2381bf215546Sopenharmony_ci * \param dev_info PVR device information struct.
2382bf215546Sopenharmony_ci * \returns Pointer to just beyond the code/data.
2383bf215546Sopenharmony_ci */
2384bf215546Sopenharmony_ciuint32_t *pvr_pds_vertex_shader_sa(
2385bf215546Sopenharmony_ci   struct pvr_pds_vertex_shader_sa_program *restrict program,
2386bf215546Sopenharmony_ci   uint32_t *restrict buffer,
2387bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
2388bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
2389bf215546Sopenharmony_ci{
2390bf215546Sopenharmony_ci   uint32_t next_constant;
2391bf215546Sopenharmony_ci   uint32_t data_size = 0;
2392bf215546Sopenharmony_ci   uint32_t code_size = 0;
2393bf215546Sopenharmony_ci
2394bf215546Sopenharmony_ci   uint32_t usc_control_constant64 = 0;
2395bf215546Sopenharmony_ci   uint32_t dma_address_constant64 = 0;
2396bf215546Sopenharmony_ci   uint32_t dma_control_constant32 = 0;
2397bf215546Sopenharmony_ci   uint32_t doutw_value_constant64 = 0;
2398bf215546Sopenharmony_ci   uint32_t doutw_control_constant32 = 0;
2399bf215546Sopenharmony_ci   uint32_t fence_constant_word = 0;
2400bf215546Sopenharmony_ci   uint32_t *buffer_base;
2401bf215546Sopenharmony_ci   uint32_t kick_index;
2402bf215546Sopenharmony_ci
2403bf215546Sopenharmony_ci   uint32_t total_num_doutw =
2404bf215546Sopenharmony_ci      program->num_dword_doutw + program->num_q_word_doutw;
2405bf215546Sopenharmony_ci   uint32_t total_size_dma =
2406bf215546Sopenharmony_ci      program->num_dword_doutw + 2 * program->num_q_word_doutw;
2407bf215546Sopenharmony_ci
2408bf215546Sopenharmony_ci   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2409bf215546Sopenharmony_ci
2410bf215546Sopenharmony_ci   /* Copy the DMA control words and USC task control words to constants.
2411bf215546Sopenharmony_ci    *
2412bf215546Sopenharmony_ci    * Arrange them so that the 64-bit words are together followed by the 32-bit
2413bf215546Sopenharmony_ci    * words.
2414bf215546Sopenharmony_ci    */
2415bf215546Sopenharmony_ci   if (program->kick_usc) {
2416bf215546Sopenharmony_ci      usc_control_constant64 =
2417bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
2418bf215546Sopenharmony_ci   }
2419bf215546Sopenharmony_ci
2420bf215546Sopenharmony_ci   if (program->clear_pds_barrier) {
2421bf215546Sopenharmony_ci      fence_constant_word =
2422bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
2423bf215546Sopenharmony_ci   }
2424bf215546Sopenharmony_ci   dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2425bf215546Sopenharmony_ci                                                  2 * program->num_dma_kicks,
2426bf215546Sopenharmony_ci                                                  &data_size);
2427bf215546Sopenharmony_ci
2428bf215546Sopenharmony_ci   /* Assign all unaligned constants together to avoid alignment issues caused
2429bf215546Sopenharmony_ci    * by pvr_pds_get_constants with even allocation sizes.
2430bf215546Sopenharmony_ci    */
2431bf215546Sopenharmony_ci   doutw_value_constant64 = pvr_pds_get_constants(
2432bf215546Sopenharmony_ci      &next_constant,
2433bf215546Sopenharmony_ci      total_size_dma + total_num_doutw + program->num_dma_kicks,
2434bf215546Sopenharmony_ci      &data_size);
2435bf215546Sopenharmony_ci   doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2436bf215546Sopenharmony_ci   dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2437bf215546Sopenharmony_ci
2438bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2439bf215546Sopenharmony_ci      buffer_base = buffer;
2440bf215546Sopenharmony_ci
2441bf215546Sopenharmony_ci      if (program->kick_usc) {
2442bf215546Sopenharmony_ci         /* Src0 for DOUTU. */
2443bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(buffer_base,
2444bf215546Sopenharmony_ci                                     usc_control_constant64,
2445bf215546Sopenharmony_ci                                     program->usc_task_control.src0); /* DOUTU
2446bf215546Sopenharmony_ci                                                                       * 64-bit
2447bf215546Sopenharmony_ci                                                                       * Src0.
2448bf215546Sopenharmony_ci                                                                       */
2449bf215546Sopenharmony_ci         buffer += 2;
2450bf215546Sopenharmony_ci      }
2451bf215546Sopenharmony_ci
2452bf215546Sopenharmony_ci      if (program->clear_pds_barrier) {
2453bf215546Sopenharmony_ci         /* Encode the fence constant src0. Fence barrier is initialized to
2454bf215546Sopenharmony_ci          * zero.
2455bf215546Sopenharmony_ci          */
2456bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2457bf215546Sopenharmony_ci         buffer += 2;
2458bf215546Sopenharmony_ci      }
2459bf215546Sopenharmony_ci
2460bf215546Sopenharmony_ci      if (total_num_doutw > 0) {
2461bf215546Sopenharmony_ci         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2462bf215546Sopenharmony_ci            /* Write the constant for the coefficient register write. */
2463bf215546Sopenharmony_ci            pvr_pds_write_constant64(buffer_base,
2464bf215546Sopenharmony_ci                                     doutw_value_constant64,
2465bf215546Sopenharmony_ci                                     program->q_word_doutw_value[2 * i],
2466bf215546Sopenharmony_ci                                     program->q_word_doutw_value[2 * i + 1]);
2467bf215546Sopenharmony_ci            pvr_pds_write_constant32(
2468bf215546Sopenharmony_ci               buffer_base,
2469bf215546Sopenharmony_ci               doutw_control_constant32,
2470bf215546Sopenharmony_ci               program->q_word_doutw_control[i] |
2471bf215546Sopenharmony_ci                  ((!program->num_dma_kicks && i == total_num_doutw - 1)
2472bf215546Sopenharmony_ci                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2473bf215546Sopenharmony_ci                      : 0));
2474bf215546Sopenharmony_ci
2475bf215546Sopenharmony_ci            doutw_value_constant64 += 2;
2476bf215546Sopenharmony_ci            doutw_control_constant32 += 1;
2477bf215546Sopenharmony_ci         }
2478bf215546Sopenharmony_ci
2479bf215546Sopenharmony_ci         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2480bf215546Sopenharmony_ci            /* Write the constant for the coefficient register write. */
2481bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer_base,
2482bf215546Sopenharmony_ci                                     doutw_value_constant64,
2483bf215546Sopenharmony_ci                                     program->dword_doutw_value[i]);
2484bf215546Sopenharmony_ci            pvr_pds_write_constant32(
2485bf215546Sopenharmony_ci               buffer_base,
2486bf215546Sopenharmony_ci               doutw_control_constant32,
2487bf215546Sopenharmony_ci               program->dword_doutw_control[i] |
2488bf215546Sopenharmony_ci                  ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2489bf215546Sopenharmony_ci                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2490bf215546Sopenharmony_ci                      : 0));
2491bf215546Sopenharmony_ci
2492bf215546Sopenharmony_ci            doutw_value_constant64 += 1;
2493bf215546Sopenharmony_ci            doutw_control_constant32 += 1;
2494bf215546Sopenharmony_ci         }
2495bf215546Sopenharmony_ci
2496bf215546Sopenharmony_ci         buffer += total_size_dma + total_num_doutw;
2497bf215546Sopenharmony_ci      }
2498bf215546Sopenharmony_ci
2499bf215546Sopenharmony_ci      if (program->num_dma_kicks == 1) /* Most-common case. */
2500bf215546Sopenharmony_ci      {
2501bf215546Sopenharmony_ci         /* Src0 for DOUTD - Address. */
2502bf215546Sopenharmony_ci         pvr_pds_write_dma_address(buffer_base,
2503bf215546Sopenharmony_ci                                   dma_address_constant64,
2504bf215546Sopenharmony_ci                                   program->dma_address[0],
2505bf215546Sopenharmony_ci                                   false,
2506bf215546Sopenharmony_ci                                   dev_info);
2507bf215546Sopenharmony_ci
2508bf215546Sopenharmony_ci         /* Src1 for DOUTD - Control Word. */
2509bf215546Sopenharmony_ci         pvr_pds_write_constant32(
2510bf215546Sopenharmony_ci            buffer_base,
2511bf215546Sopenharmony_ci            dma_control_constant32,
2512bf215546Sopenharmony_ci            program->dma_control[0] |
2513bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2514bf215546Sopenharmony_ci
2515bf215546Sopenharmony_ci         /* Move the buffer ptr along as we will return 1 past the buffer. */
2516bf215546Sopenharmony_ci         buffer += 3;
2517bf215546Sopenharmony_ci      } else if (program->num_dma_kicks > 1) {
2518bf215546Sopenharmony_ci         for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2519bf215546Sopenharmony_ci              kick_index++) {
2520bf215546Sopenharmony_ci            /* Src0 for DOUTD - Address. */
2521bf215546Sopenharmony_ci            pvr_pds_write_dma_address(buffer_base,
2522bf215546Sopenharmony_ci                                      dma_address_constant64,
2523bf215546Sopenharmony_ci                                      program->dma_address[kick_index],
2524bf215546Sopenharmony_ci                                      false,
2525bf215546Sopenharmony_ci                                      dev_info);
2526bf215546Sopenharmony_ci
2527bf215546Sopenharmony_ci            /* Src1 for DOUTD - Control Word. */
2528bf215546Sopenharmony_ci            pvr_pds_write_constant32(buffer_base,
2529bf215546Sopenharmony_ci                                     dma_control_constant32,
2530bf215546Sopenharmony_ci                                     program->dma_control[kick_index]);
2531bf215546Sopenharmony_ci            dma_address_constant64 += 2;
2532bf215546Sopenharmony_ci            dma_control_constant32 += 1;
2533bf215546Sopenharmony_ci         }
2534bf215546Sopenharmony_ci
2535bf215546Sopenharmony_ci         /* Src0 for DOUTD - Address. */
2536bf215546Sopenharmony_ci         pvr_pds_write_dma_address(buffer_base,
2537bf215546Sopenharmony_ci                                   dma_address_constant64,
2538bf215546Sopenharmony_ci                                   program->dma_address[kick_index],
2539bf215546Sopenharmony_ci                                   false,
2540bf215546Sopenharmony_ci                                   dev_info);
2541bf215546Sopenharmony_ci
2542bf215546Sopenharmony_ci         /* Src1 for DOUTD - Control Word. */
2543bf215546Sopenharmony_ci         pvr_pds_write_constant32(
2544bf215546Sopenharmony_ci            buffer_base,
2545bf215546Sopenharmony_ci            dma_control_constant32,
2546bf215546Sopenharmony_ci            program->dma_control[kick_index] |
2547bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2548bf215546Sopenharmony_ci
2549bf215546Sopenharmony_ci         buffer += 3 * program->num_dma_kicks;
2550bf215546Sopenharmony_ci      }
2551bf215546Sopenharmony_ci   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2552bf215546Sopenharmony_ci      if (program->clear_pds_barrier) {
2553bf215546Sopenharmony_ci         /* Zero the persistent temp (SW fence for context switch). */
2554bf215546Sopenharmony_ci         *buffer++ = pvr_pds_inst_encode_add64(
2555bf215546Sopenharmony_ci            0, /* cc */
2556bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2557bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2558bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2559bf215546Sopenharmony_ci               (fence_constant_word >> 1), /* src0 = 0 */
2560bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2561bf215546Sopenharmony_ci               (fence_constant_word >> 1), /* src1 = 0 */
2562bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2563bf215546Sopenharmony_ci                                                            * ptemp[0]
2564bf215546Sopenharmony_ci                                                            */
2565bf215546Sopenharmony_ci      }
2566bf215546Sopenharmony_ci
2567bf215546Sopenharmony_ci      if (total_num_doutw > 0) {
2568bf215546Sopenharmony_ci         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2569bf215546Sopenharmony_ci            /* Set the coefficient register to data value. */
2570bf215546Sopenharmony_ci            *buffer++ = pvr_pds_encode_doutw64(
2571bf215546Sopenharmony_ci               /* cc */ 0,
2572bf215546Sopenharmony_ci               /* END */ !program->num_dma_kicks && !program->kick_usc &&
2573bf215546Sopenharmony_ci                  (i == total_num_doutw - 1),
2574bf215546Sopenharmony_ci               /* SRC1 */ doutw_control_constant32,
2575bf215546Sopenharmony_ci               /* SRC0 */ doutw_value_constant64 >> 1);
2576bf215546Sopenharmony_ci
2577bf215546Sopenharmony_ci            doutw_value_constant64 += 2;
2578bf215546Sopenharmony_ci            doutw_control_constant32 += 1;
2579bf215546Sopenharmony_ci         }
2580bf215546Sopenharmony_ci
2581bf215546Sopenharmony_ci         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2582bf215546Sopenharmony_ci            /* Set the coefficient register to data value. */
2583bf215546Sopenharmony_ci            *buffer++ = pvr_pds_encode_doutw64(
2584bf215546Sopenharmony_ci               /* cc */ 0,
2585bf215546Sopenharmony_ci               /* END */ !program->num_dma_kicks && !program->kick_usc &&
2586bf215546Sopenharmony_ci                  (i == program->num_dword_doutw - 1),
2587bf215546Sopenharmony_ci               /* SRC1 */ doutw_control_constant32,
2588bf215546Sopenharmony_ci               /* SRC0 */ doutw_value_constant64 >> 1);
2589bf215546Sopenharmony_ci
2590bf215546Sopenharmony_ci            doutw_value_constant64 += 1;
2591bf215546Sopenharmony_ci            doutw_control_constant32 += 1;
2592bf215546Sopenharmony_ci         }
2593bf215546Sopenharmony_ci      }
2594bf215546Sopenharmony_ci
2595bf215546Sopenharmony_ci      if (program->num_dma_kicks != 0) {
2596bf215546Sopenharmony_ci         /* DMA the state into the secondary attributes. */
2597bf215546Sopenharmony_ci
2598bf215546Sopenharmony_ci         if (program->num_dma_kicks == 1) /* Most-common case. */
2599bf215546Sopenharmony_ci         {
2600bf215546Sopenharmony_ci            *buffer++ = pvr_pds_encode_doutd(
2601bf215546Sopenharmony_ci               /* cc */ 0,
2602bf215546Sopenharmony_ci               /* END */ !program->kick_usc,
2603bf215546Sopenharmony_ci               /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2604bf215546Sopenharmony_ci               /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2605bf215546Sopenharmony_ci                                                         * Src0.
2606bf215546Sopenharmony_ci                                                         */
2607bf215546Sopenharmony_ci         } else {
2608bf215546Sopenharmony_ci            for (kick_index = 0; kick_index < program->num_dma_kicks;
2609bf215546Sopenharmony_ci                 kick_index++) {
2610bf215546Sopenharmony_ci               *buffer++ = pvr_pds_encode_doutd(
2611bf215546Sopenharmony_ci                  /* cc */ 0,
2612bf215546Sopenharmony_ci                  /* END */ (!program->kick_usc) &&
2613bf215546Sopenharmony_ci                     (kick_index + 1 == program->num_dma_kicks),
2614bf215546Sopenharmony_ci                  /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2615bf215546Sopenharmony_ci                                                      * Src1.
2616bf215546Sopenharmony_ci                                                      */
2617bf215546Sopenharmony_ci                  /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2618bf215546Sopenharmony_ci                                                            * 64-bit
2619bf215546Sopenharmony_ci                                                            * Src0.
2620bf215546Sopenharmony_ci                                                            */
2621bf215546Sopenharmony_ci               dma_address_constant64 += 2;
2622bf215546Sopenharmony_ci               dma_control_constant32 += 1;
2623bf215546Sopenharmony_ci            }
2624bf215546Sopenharmony_ci         }
2625bf215546Sopenharmony_ci      }
2626bf215546Sopenharmony_ci
2627bf215546Sopenharmony_ci      if (program->kick_usc) {
2628bf215546Sopenharmony_ci         /* Kick the USC. */
2629bf215546Sopenharmony_ci         *buffer++ = pvr_pds_encode_doutu(
2630bf215546Sopenharmony_ci            /* cc */ 0,
2631bf215546Sopenharmony_ci            /* END */ 1,
2632bf215546Sopenharmony_ci            /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2633bf215546Sopenharmony_ci                                                      */
2634bf215546Sopenharmony_ci      }
2635bf215546Sopenharmony_ci
2636bf215546Sopenharmony_ci      if (!program->kick_usc && program->num_dma_kicks == 0 &&
2637bf215546Sopenharmony_ci          total_num_doutw == 0) {
2638bf215546Sopenharmony_ci         *buffer++ = pvr_pds_inst_encode_halt(0);
2639bf215546Sopenharmony_ci      }
2640bf215546Sopenharmony_ci   }
2641bf215546Sopenharmony_ci
2642bf215546Sopenharmony_ci   code_size = program->num_dma_kicks + total_num_doutw;
2643bf215546Sopenharmony_ci   if (program->clear_pds_barrier)
2644bf215546Sopenharmony_ci      code_size++; /* ADD64 instruction. */
2645bf215546Sopenharmony_ci
2646bf215546Sopenharmony_ci   if (program->kick_usc)
2647bf215546Sopenharmony_ci      code_size++;
2648bf215546Sopenharmony_ci
2649bf215546Sopenharmony_ci   /* If there are no DMAs and no USC kick then code is HALT only. */
2650bf215546Sopenharmony_ci   if (code_size == 0)
2651bf215546Sopenharmony_ci      code_size = 1;
2652bf215546Sopenharmony_ci
2653bf215546Sopenharmony_ci   program->data_size = data_size;
2654bf215546Sopenharmony_ci   program->code_size = code_size;
2655bf215546Sopenharmony_ci
2656bf215546Sopenharmony_ci   return buffer;
2657bf215546Sopenharmony_ci}
2658bf215546Sopenharmony_ci
2659bf215546Sopenharmony_ci/**
2660bf215546Sopenharmony_ci * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2661bf215546Sopenharmony_ci * program.
2662bf215546Sopenharmony_ci *
2663bf215546Sopenharmony_ci * \param program Pointer to the PDS pixel shader secondary attributes program.
2664bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the code/data.
2665bf215546Sopenharmony_ci * \param gen_mode Either code or data can be generated or sizes only updated.
2666bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program/data.
2667bf215546Sopenharmony_ci */
2668bf215546Sopenharmony_ciuint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2669bf215546Sopenharmony_ci   struct pvr_pds_pixel_shader_sa_program *restrict program,
2670bf215546Sopenharmony_ci   uint32_t *restrict buffer,
2671bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode)
2672bf215546Sopenharmony_ci{
2673bf215546Sopenharmony_ci   uint32_t *instruction;
2674bf215546Sopenharmony_ci   uint32_t code_size = 0;
2675bf215546Sopenharmony_ci   uint32_t data_size = 0;
2676bf215546Sopenharmony_ci   uint32_t temps_used = 0;
2677bf215546Sopenharmony_ci   uint32_t next_constant;
2678bf215546Sopenharmony_ci
2679bf215546Sopenharmony_ci   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2680bf215546Sopenharmony_ci          0);
2681bf215546Sopenharmony_ci
2682bf215546Sopenharmony_ci   assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
2683bf215546Sopenharmony_ci
2684bf215546Sopenharmony_ci   /* clang-format off */
2685bf215546Sopenharmony_ci   /* Shape of code segment (note: clear is different)
2686bf215546Sopenharmony_ci    *
2687bf215546Sopenharmony_ci    *      Code
2688bf215546Sopenharmony_ci    *    +------------+
2689bf215546Sopenharmony_ci    *    | BRA if0    |
2690bf215546Sopenharmony_ci    *    | DOUTD      |
2691bf215546Sopenharmony_ci    *    |  ...       |
2692bf215546Sopenharmony_ci    *    | DOUTD.halt |
2693bf215546Sopenharmony_ci    *    | uniform    |
2694bf215546Sopenharmony_ci    *    | DOUTD      |
2695bf215546Sopenharmony_ci    *    |  ...       |
2696bf215546Sopenharmony_ci    *    |  ...       |
2697bf215546Sopenharmony_ci    *    | DOUTW      |
2698bf215546Sopenharmony_ci    *    |  ...       |
2699bf215546Sopenharmony_ci    *    |  ...       |
2700bf215546Sopenharmony_ci    *    | DOUTU.halt |
2701bf215546Sopenharmony_ci    *    | HALT       |
2702bf215546Sopenharmony_ci    *    +------------+
2703bf215546Sopenharmony_ci    */
2704bf215546Sopenharmony_ci   /* clang-format on */
2705bf215546Sopenharmony_ci   instruction = buffer;
2706bf215546Sopenharmony_ci
2707bf215546Sopenharmony_ci   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2708bf215546Sopenharmony_ci
2709bf215546Sopenharmony_ci   /* The clear color can arrive packed in the right form in the first (or
2710bf215546Sopenharmony_ci    * first 2) dwords of the shared registers and the program will issue a
2711bf215546Sopenharmony_ci    * single doutw for this.
2712bf215546Sopenharmony_ci    */
2713bf215546Sopenharmony_ci   if (program->clear && program->packed_clear) {
2714bf215546Sopenharmony_ci      uint32_t color_constant1 =
2715bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
2716bf215546Sopenharmony_ci
2717bf215546Sopenharmony_ci      uint32_t control_word_constant1 =
2718bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
2719bf215546Sopenharmony_ci
2720bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2721bf215546Sopenharmony_ci         /* DOUTW the clear color to the USC constants. Predicate with
2722bf215546Sopenharmony_ci          * uniform loading flag (IF0).
2723bf215546Sopenharmony_ci          */
2724bf215546Sopenharmony_ci         *instruction++ = pvr_pds_encode_doutw64(
2725bf215546Sopenharmony_ci            /* cc */ 1, /* Only for uniform loading program. */
2726bf215546Sopenharmony_ci            /* END */ program->kick_usc ? 0 : 1, /* Last
2727bf215546Sopenharmony_ci                                                  * instruction
2728bf215546Sopenharmony_ci                                                  * for a clear.
2729bf215546Sopenharmony_ci                                                  */
2730bf215546Sopenharmony_ci            /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2731bf215546Sopenharmony_ci            /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2732bf215546Sopenharmony_ci
2733bf215546Sopenharmony_ci         code_size += 1;
2734bf215546Sopenharmony_ci      }
2735bf215546Sopenharmony_ci   } else if (program->clear) {
2736bf215546Sopenharmony_ci      uint32_t color_constant1, color_constant2;
2737bf215546Sopenharmony_ci
2738bf215546Sopenharmony_ci      if (program->clear_color_dest_reg & 0x1) {
2739bf215546Sopenharmony_ci         uint32_t color_constant3, control_word_constant1,
2740bf215546Sopenharmony_ci            control_word_constant2, color_constant4;
2741bf215546Sopenharmony_ci
2742bf215546Sopenharmony_ci         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2743bf215546Sopenharmony_ci         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2744bf215546Sopenharmony_ci         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2745bf215546Sopenharmony_ci
2746bf215546Sopenharmony_ci         control_word_constant1 =
2747bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2748bf215546Sopenharmony_ci         control_word_constant2 =
2749bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2750bf215546Sopenharmony_ci         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2751bf215546Sopenharmony_ci
2752bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2753bf215546Sopenharmony_ci            /* DOUTW the clear color to the USSE constants. Predicate with
2754bf215546Sopenharmony_ci             * uniform loading flag (IF0).
2755bf215546Sopenharmony_ci             */
2756bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutw64(
2757bf215546Sopenharmony_ci               /* cc */ 1, /* Only for Uniform Loading program */
2758bf215546Sopenharmony_ci               /* END */ 0,
2759bf215546Sopenharmony_ci               /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2760bf215546Sopenharmony_ci               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2761bf215546Sopenharmony_ci
2762bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutw64(
2763bf215546Sopenharmony_ci               /* cc */ 1, /* Only for Uniform Loading program */
2764bf215546Sopenharmony_ci               /* END */ 0,
2765bf215546Sopenharmony_ci               /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2766bf215546Sopenharmony_ci               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2767bf215546Sopenharmony_ci
2768bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutw64(
2769bf215546Sopenharmony_ci               /* cc */ 1, /* Only for uniform loading program */
2770bf215546Sopenharmony_ci               /* END */ program->kick_usc ? 0 : 1, /* Last
2771bf215546Sopenharmony_ci                                                     * instruction
2772bf215546Sopenharmony_ci                                                     * for a clear.
2773bf215546Sopenharmony_ci                                                     */
2774bf215546Sopenharmony_ci               /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2775bf215546Sopenharmony_ci               /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2776bf215546Sopenharmony_ci         }
2777bf215546Sopenharmony_ci
2778bf215546Sopenharmony_ci         code_size += 3;
2779bf215546Sopenharmony_ci      } else {
2780bf215546Sopenharmony_ci         uint32_t control_word_constant, control_word_last_constant;
2781bf215546Sopenharmony_ci
2782bf215546Sopenharmony_ci         /* Put the clear color and control words into the first 8
2783bf215546Sopenharmony_ci          * constants.
2784bf215546Sopenharmony_ci          */
2785bf215546Sopenharmony_ci         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2786bf215546Sopenharmony_ci         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2787bf215546Sopenharmony_ci         control_word_constant =
2788bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2789bf215546Sopenharmony_ci         control_word_last_constant =
2790bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2791bf215546Sopenharmony_ci
2792bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2793bf215546Sopenharmony_ci            /* DOUTW the clear color to the USSE constants. Predicate with
2794bf215546Sopenharmony_ci             * uniform loading flag (IF0).
2795bf215546Sopenharmony_ci             */
2796bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutw64(
2797bf215546Sopenharmony_ci               /* cc */ 1, /* Only for Uniform Loading program */
2798bf215546Sopenharmony_ci               /* END */ 0,
2799bf215546Sopenharmony_ci               /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2800bf215546Sopenharmony_ci               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2801bf215546Sopenharmony_ci
2802bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutw64(
2803bf215546Sopenharmony_ci               /* cc */ 1, /* Only for uniform loading program */
2804bf215546Sopenharmony_ci               /* END */ program->kick_usc ? 0 : 1, /* Last
2805bf215546Sopenharmony_ci                                                     * instruction
2806bf215546Sopenharmony_ci                                                     * for a clear.
2807bf215546Sopenharmony_ci                                                     */
2808bf215546Sopenharmony_ci               /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2809bf215546Sopenharmony_ci                                                       */
2810bf215546Sopenharmony_ci               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2811bf215546Sopenharmony_ci         }
2812bf215546Sopenharmony_ci
2813bf215546Sopenharmony_ci         code_size += 2;
2814bf215546Sopenharmony_ci      }
2815bf215546Sopenharmony_ci
2816bf215546Sopenharmony_ci      if (program->kick_usc) {
2817bf215546Sopenharmony_ci         uint32_t doutu_constant64;
2818bf215546Sopenharmony_ci
2819bf215546Sopenharmony_ci         doutu_constant64 =
2820bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2821bf215546Sopenharmony_ci
2822bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2823bf215546Sopenharmony_ci            /* Issue the task to the USC.
2824bf215546Sopenharmony_ci             *
2825bf215546Sopenharmony_ci             * dout ds1[constant_use], ds0[constant_use],
2826bf215546Sopenharmony_ci             * ds1[constant_use], emit
2827bf215546Sopenharmony_ci             */
2828bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutu(
2829bf215546Sopenharmony_ci               /* cc */ 0,
2830bf215546Sopenharmony_ci               /* END */ 1,
2831bf215546Sopenharmony_ci               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2832bf215546Sopenharmony_ci                                                   */
2833bf215546Sopenharmony_ci         }
2834bf215546Sopenharmony_ci
2835bf215546Sopenharmony_ci         code_size += 1;
2836bf215546Sopenharmony_ci      }
2837bf215546Sopenharmony_ci
2838bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2839bf215546Sopenharmony_ci         /* End the program. */
2840bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_halt(0);
2841bf215546Sopenharmony_ci      }
2842bf215546Sopenharmony_ci      code_size += 1;
2843bf215546Sopenharmony_ci   } else {
2844bf215546Sopenharmony_ci      uint32_t total_num_doutw =
2845bf215546Sopenharmony_ci         program->num_dword_doutw + program->num_q_word_doutw;
2846bf215546Sopenharmony_ci      bool both_textures_and_uniforms =
2847bf215546Sopenharmony_ci         ((program->num_texture_dma_kicks > 0) &&
2848bf215546Sopenharmony_ci          ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2849bf215546Sopenharmony_ci           program->kick_usc));
2850bf215546Sopenharmony_ci      uint32_t doutu_constant64 = 0;
2851bf215546Sopenharmony_ci
2852bf215546Sopenharmony_ci      if (both_textures_and_uniforms) {
2853bf215546Sopenharmony_ci         /* If the size of a PDS data section is 0, the hardware won't run
2854bf215546Sopenharmony_ci          * it. We therefore don't need to branch when there is only a
2855bf215546Sopenharmony_ci          * texture OR a uniform update program.
2856bf215546Sopenharmony_ci          */
2857bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2858bf215546Sopenharmony_ci            uint32_t branch_address =
2859bf215546Sopenharmony_ci               MAX2(1 + program->num_texture_dma_kicks, 2);
2860bf215546Sopenharmony_ci
2861bf215546Sopenharmony_ci            /* Use If0 to BRAnch to uniform code. */
2862bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_bra(
2863bf215546Sopenharmony_ci               /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2864bf215546Sopenharmony_ci               /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2865bf215546Sopenharmony_ci               /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2866bf215546Sopenharmony_ci               /* ADDR */ branch_address);
2867bf215546Sopenharmony_ci         }
2868bf215546Sopenharmony_ci
2869bf215546Sopenharmony_ci         code_size += 1;
2870bf215546Sopenharmony_ci      }
2871bf215546Sopenharmony_ci
2872bf215546Sopenharmony_ci      if (program->num_texture_dma_kicks > 0) {
2873bf215546Sopenharmony_ci         uint32_t dma_address_constant64;
2874bf215546Sopenharmony_ci         uint32_t dma_control_constant32;
2875bf215546Sopenharmony_ci         /* Allocate 3 constant spaces for each kick. The 64-bit constants
2876bf215546Sopenharmony_ci          * come first followed by the 32-bit constants.
2877bf215546Sopenharmony_ci          */
2878bf215546Sopenharmony_ci         dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2879bf215546Sopenharmony_ci         dma_control_constant32 =
2880bf215546Sopenharmony_ci            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2881bf215546Sopenharmony_ci
2882bf215546Sopenharmony_ci         for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2883bf215546Sopenharmony_ci            code_size += 1;
2884bf215546Sopenharmony_ci            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2885bf215546Sopenharmony_ci               continue;
2886bf215546Sopenharmony_ci
2887bf215546Sopenharmony_ci            /* DMA the state into the secondary attributes. */
2888bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutd(
2889bf215546Sopenharmony_ci               /* cc */ 0,
2890bf215546Sopenharmony_ci               /* END */ dma == (program->num_texture_dma_kicks - 1),
2891bf215546Sopenharmony_ci               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2892bf215546Sopenharmony_ci               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2893bf215546Sopenharmony_ci                                                         * 64-bit
2894bf215546Sopenharmony_ci                                                         * Src0
2895bf215546Sopenharmony_ci                                                         */
2896bf215546Sopenharmony_ci            dma_address_constant64 += 2;
2897bf215546Sopenharmony_ci            dma_control_constant32 += 1;
2898bf215546Sopenharmony_ci         }
2899bf215546Sopenharmony_ci      } else if (both_textures_and_uniforms) {
2900bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2901bf215546Sopenharmony_ci            /* End the program. */
2902bf215546Sopenharmony_ci            *instruction++ = pvr_pds_inst_encode_halt(0);
2903bf215546Sopenharmony_ci         }
2904bf215546Sopenharmony_ci
2905bf215546Sopenharmony_ci         code_size += 1;
2906bf215546Sopenharmony_ci      }
2907bf215546Sopenharmony_ci
2908bf215546Sopenharmony_ci      /* Reserve space at the beginning of the data segment for the DOUTU Task
2909bf215546Sopenharmony_ci       * Control if one is needed.
2910bf215546Sopenharmony_ci       */
2911bf215546Sopenharmony_ci      if (program->kick_usc) {
2912bf215546Sopenharmony_ci         doutu_constant64 =
2913bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
2914bf215546Sopenharmony_ci      }
2915bf215546Sopenharmony_ci
2916bf215546Sopenharmony_ci      /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2917bf215546Sopenharmony_ci       * 64-bit constants come first followed by the 32-bit constants.
2918bf215546Sopenharmony_ci       */
2919bf215546Sopenharmony_ci      uint32_t total_size_dma =
2920bf215546Sopenharmony_ci         program->num_dword_doutw + 2 * program->num_q_word_doutw;
2921bf215546Sopenharmony_ci
2922bf215546Sopenharmony_ci      uint32_t dma_address_constant64 = pvr_pds_get_constants(
2923bf215546Sopenharmony_ci         &next_constant,
2924bf215546Sopenharmony_ci         program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2925bf215546Sopenharmony_ci         &data_size);
2926bf215546Sopenharmony_ci      uint32_t doutw_value_constant64 =
2927bf215546Sopenharmony_ci         dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2928bf215546Sopenharmony_ci      uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2929bf215546Sopenharmony_ci      uint32_t doutw_control_constant32 =
2930bf215546Sopenharmony_ci         dma_control_constant32 + program->num_uniform_dma_kicks;
2931bf215546Sopenharmony_ci
2932bf215546Sopenharmony_ci      if (total_num_doutw > 0) {
2933bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 0, &data_size);
2934bf215546Sopenharmony_ci
2935bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2936bf215546Sopenharmony_ci            for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2937bf215546Sopenharmony_ci               /* Set the coefficient register to data value. */
2938bf215546Sopenharmony_ci               *instruction++ = pvr_pds_encode_doutw64(
2939bf215546Sopenharmony_ci                  /* cc */ 0,
2940bf215546Sopenharmony_ci                  /* END */ !program->num_uniform_dma_kicks &&
2941bf215546Sopenharmony_ci                     !program->kick_usc && (i == total_num_doutw - 1),
2942bf215546Sopenharmony_ci                  /* SRC1 */ doutw_control_constant32,
2943bf215546Sopenharmony_ci                  /* SRC0 */ doutw_value_constant64 >> 1);
2944bf215546Sopenharmony_ci
2945bf215546Sopenharmony_ci               doutw_value_constant64 += 2;
2946bf215546Sopenharmony_ci               doutw_control_constant32 += 1;
2947bf215546Sopenharmony_ci            }
2948bf215546Sopenharmony_ci
2949bf215546Sopenharmony_ci            for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2950bf215546Sopenharmony_ci               /* Set the coefficient register to data value. */
2951bf215546Sopenharmony_ci               *instruction++ = pvr_pds_encode_doutw64(
2952bf215546Sopenharmony_ci                  /* cc */ 0,
2953bf215546Sopenharmony_ci                  /* END */ !program->num_uniform_dma_kicks &&
2954bf215546Sopenharmony_ci                     !program->kick_usc && (i == program->num_dword_doutw - 1),
2955bf215546Sopenharmony_ci                  /* SRC1 */ doutw_control_constant32,
2956bf215546Sopenharmony_ci                  /* SRC0 */ doutw_value_constant64 >> 1);
2957bf215546Sopenharmony_ci
2958bf215546Sopenharmony_ci               doutw_value_constant64 += 1;
2959bf215546Sopenharmony_ci               doutw_control_constant32 += 1;
2960bf215546Sopenharmony_ci            }
2961bf215546Sopenharmony_ci         }
2962bf215546Sopenharmony_ci         code_size += total_num_doutw;
2963bf215546Sopenharmony_ci      }
2964bf215546Sopenharmony_ci
2965bf215546Sopenharmony_ci      if (program->num_uniform_dma_kicks > 0) {
2966bf215546Sopenharmony_ci         for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2967bf215546Sopenharmony_ci            code_size += 1;
2968bf215546Sopenharmony_ci
2969bf215546Sopenharmony_ci            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2970bf215546Sopenharmony_ci               continue;
2971bf215546Sopenharmony_ci
2972bf215546Sopenharmony_ci            bool last_instruction = false;
2973bf215546Sopenharmony_ci            if (!program->kick_usc &&
2974bf215546Sopenharmony_ci                (dma == program->num_uniform_dma_kicks - 1)) {
2975bf215546Sopenharmony_ci               last_instruction = true;
2976bf215546Sopenharmony_ci            }
2977bf215546Sopenharmony_ci            /* DMA the state into the secondary attributes. */
2978bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutd(
2979bf215546Sopenharmony_ci               /* cc */ 0,
2980bf215546Sopenharmony_ci               /* END */ last_instruction,
2981bf215546Sopenharmony_ci               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2982bf215546Sopenharmony_ci                                                   */
2983bf215546Sopenharmony_ci               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2984bf215546Sopenharmony_ci                                                         * 64-bit
2985bf215546Sopenharmony_ci                                                         * Src0
2986bf215546Sopenharmony_ci                                                         */
2987bf215546Sopenharmony_ci            dma_address_constant64 += 2;
2988bf215546Sopenharmony_ci            dma_control_constant32 += 1;
2989bf215546Sopenharmony_ci         }
2990bf215546Sopenharmony_ci      }
2991bf215546Sopenharmony_ci
2992bf215546Sopenharmony_ci      if (program->kick_usc) {
2993bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2994bf215546Sopenharmony_ci            /* Issue the task to the USC.
2995bf215546Sopenharmony_ci             *
2996bf215546Sopenharmony_ci             * dout ds1[constant_use], ds0[constant_use],
2997bf215546Sopenharmony_ci             * ds1[constant_use], emit
2998bf215546Sopenharmony_ci             */
2999bf215546Sopenharmony_ci
3000bf215546Sopenharmony_ci            *instruction++ = pvr_pds_encode_doutu(
3001bf215546Sopenharmony_ci               /* cc */ 0,
3002bf215546Sopenharmony_ci               /* END */ 1,
3003bf215546Sopenharmony_ci               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
3004bf215546Sopenharmony_ci         }
3005bf215546Sopenharmony_ci
3006bf215546Sopenharmony_ci         code_size += 1;
3007bf215546Sopenharmony_ci      } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
3008bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3009bf215546Sopenharmony_ci            /* End the program. */
3010bf215546Sopenharmony_ci            *instruction++ = pvr_pds_inst_encode_halt(0);
3011bf215546Sopenharmony_ci         }
3012bf215546Sopenharmony_ci
3013bf215546Sopenharmony_ci         code_size += 1;
3014bf215546Sopenharmony_ci      }
3015bf215546Sopenharmony_ci   }
3016bf215546Sopenharmony_ci
3017bf215546Sopenharmony_ci   /* Minimum temp count is 1. */
3018bf215546Sopenharmony_ci   program->temps_used = MAX2(temps_used, 1);
3019bf215546Sopenharmony_ci   program->code_size = code_size;
3020bf215546Sopenharmony_ci
3021bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3022bf215546Sopenharmony_ci      return instruction;
3023bf215546Sopenharmony_ci   else
3024bf215546Sopenharmony_ci      return NULL;
3025bf215546Sopenharmony_ci}
3026bf215546Sopenharmony_ci
3027bf215546Sopenharmony_ci/**
3028bf215546Sopenharmony_ci * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3029bf215546Sopenharmony_ci * program.
3030bf215546Sopenharmony_ci *
3031bf215546Sopenharmony_ci * \param program Pointer to the PDS pixel shader secondary attributes program.
3032bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the code/data.
3033bf215546Sopenharmony_ci * \param gen_mode Either code or data can be generated or sizes only updated.
3034bf215546Sopenharmony_ci * \param dev_info PVR device information struct.
3035bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program/data.
3036bf215546Sopenharmony_ci */
3037bf215546Sopenharmony_ciuint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3038bf215546Sopenharmony_ci   struct pvr_pds_pixel_shader_sa_program *restrict program,
3039bf215546Sopenharmony_ci   uint32_t *restrict buffer,
3040bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
3041bf215546Sopenharmony_ci   bool uniform,
3042bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
3043bf215546Sopenharmony_ci{
3044bf215546Sopenharmony_ci   uint32_t *constants = buffer;
3045bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3046bf215546Sopenharmony_ci   uint32_t temps_used = 0;
3047bf215546Sopenharmony_ci   uint32_t data_size = 0;
3048bf215546Sopenharmony_ci
3049bf215546Sopenharmony_ci   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3050bf215546Sopenharmony_ci          0);
3051bf215546Sopenharmony_ci
3052bf215546Sopenharmony_ci   assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3053bf215546Sopenharmony_ci
3054bf215546Sopenharmony_ci   /* Shape of data segment (note: clear is different).
3055bf215546Sopenharmony_ci    *
3056bf215546Sopenharmony_ci    *        Uniform            Texture
3057bf215546Sopenharmony_ci    *    +--------------+   +-------------+
3058bf215546Sopenharmony_ci    *    | USC Task   L |   | USC Task  L |
3059bf215546Sopenharmony_ci    *    |            H |   |           H |
3060bf215546Sopenharmony_ci    *    | DMA1 Src0  L |   | DMA1 Src0 L |
3061bf215546Sopenharmony_ci    *    |            H |   |           H |
3062bf215546Sopenharmony_ci    *    | DMA2 Src0  L |   |             |
3063bf215546Sopenharmony_ci    *    |            H |   |             |
3064bf215546Sopenharmony_ci    *    | DMA1 Src1    |   | DMA1 Src1   |
3065bf215546Sopenharmony_ci    *    | DMA2 Src1    |   |             |
3066bf215546Sopenharmony_ci    *    | DOUTW0 Src1  |   |             |
3067bf215546Sopenharmony_ci    *    | DOUTW1 Src1  |   |             |
3068bf215546Sopenharmony_ci    *    |   ...        |   |             |
3069bf215546Sopenharmony_ci    *    | DOUTWn Srcn  |   |             |
3070bf215546Sopenharmony_ci    *    | other data   |   |             |
3071bf215546Sopenharmony_ci    *    +--------------+   +-------------+
3072bf215546Sopenharmony_ci    */
3073bf215546Sopenharmony_ci
3074bf215546Sopenharmony_ci   /* Generate the PDS pixel shader secondary attributes data.
3075bf215546Sopenharmony_ci    *
3076bf215546Sopenharmony_ci    * Packed Clear
3077bf215546Sopenharmony_ci    * The clear color can arrive packed in the right form in the first (or
3078bf215546Sopenharmony_ci    * first 2) dwords of the shared registers and the program will issue a
3079bf215546Sopenharmony_ci    * single DOUTW for this.
3080bf215546Sopenharmony_ci    */
3081bf215546Sopenharmony_ci   if (program->clear && uniform && program->packed_clear) {
3082bf215546Sopenharmony_ci      uint32_t color_constant1 =
3083bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3084bf215546Sopenharmony_ci
3085bf215546Sopenharmony_ci      uint32_t control_word_constant1 =
3086bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3087bf215546Sopenharmony_ci
3088bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3089bf215546Sopenharmony_ci         uint32_t doutw;
3090bf215546Sopenharmony_ci
3091bf215546Sopenharmony_ci         pvr_pds_write_constant64(constants,
3092bf215546Sopenharmony_ci                                  color_constant1,
3093bf215546Sopenharmony_ci                                  program->clear_color[0],
3094bf215546Sopenharmony_ci                                  program->clear_color[1]);
3095bf215546Sopenharmony_ci
3096bf215546Sopenharmony_ci         /* Load into first constant in common store. */
3097bf215546Sopenharmony_ci         doutw = pvr_pds_encode_doutw_src1(
3098bf215546Sopenharmony_ci            program->clear_color_dest_reg,
3099bf215546Sopenharmony_ci            PVR_PDS_DOUTW_LOWER64,
3100bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3101bf215546Sopenharmony_ci            false,
3102bf215546Sopenharmony_ci            dev_info);
3103bf215546Sopenharmony_ci
3104bf215546Sopenharmony_ci         /* Set the last flag. */
3105bf215546Sopenharmony_ci         doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3106bf215546Sopenharmony_ci         pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3107bf215546Sopenharmony_ci      }
3108bf215546Sopenharmony_ci   } else if (program->clear && uniform) {
3109bf215546Sopenharmony_ci      uint32_t color_constant1, color_constant2;
3110bf215546Sopenharmony_ci
3111bf215546Sopenharmony_ci      if (program->clear_color_dest_reg & 0x1) {
3112bf215546Sopenharmony_ci         uint32_t color_constant3, control_word_constant1,
3113bf215546Sopenharmony_ci            control_word_constant2, color_constant4;
3114bf215546Sopenharmony_ci
3115bf215546Sopenharmony_ci         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3116bf215546Sopenharmony_ci         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3117bf215546Sopenharmony_ci         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3118bf215546Sopenharmony_ci
3119bf215546Sopenharmony_ci         control_word_constant1 =
3120bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
3121bf215546Sopenharmony_ci         control_word_constant2 =
3122bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
3123bf215546Sopenharmony_ci         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3124bf215546Sopenharmony_ci
3125bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3126bf215546Sopenharmony_ci            uint32_t doutw;
3127bf215546Sopenharmony_ci
3128bf215546Sopenharmony_ci            pvr_pds_write_constant32(constants,
3129bf215546Sopenharmony_ci                                     color_constant1,
3130bf215546Sopenharmony_ci                                     program->clear_color[0]);
3131bf215546Sopenharmony_ci
3132bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3133bf215546Sopenharmony_ci                                     color_constant2,
3134bf215546Sopenharmony_ci                                     program->clear_color[1],
3135bf215546Sopenharmony_ci                                     program->clear_color[2]);
3136bf215546Sopenharmony_ci
3137bf215546Sopenharmony_ci            pvr_pds_write_constant32(constants,
3138bf215546Sopenharmony_ci                                     color_constant3,
3139bf215546Sopenharmony_ci                                     program->clear_color[3]);
3140bf215546Sopenharmony_ci
3141bf215546Sopenharmony_ci            /* Load into first constant in common store. */
3142bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3143bf215546Sopenharmony_ci               program->clear_color_dest_reg,
3144bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER32,
3145bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3146bf215546Sopenharmony_ci               false,
3147bf215546Sopenharmony_ci               dev_info);
3148bf215546Sopenharmony_ci
3149bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3150bf215546Sopenharmony_ci                                     control_word_constant1,
3151bf215546Sopenharmony_ci                                     doutw,
3152bf215546Sopenharmony_ci                                     0);
3153bf215546Sopenharmony_ci
3154bf215546Sopenharmony_ci            /* Move the destination register along. */
3155bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3156bf215546Sopenharmony_ci               program->clear_color_dest_reg + 1,
3157bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
3158bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3159bf215546Sopenharmony_ci               false,
3160bf215546Sopenharmony_ci               dev_info);
3161bf215546Sopenharmony_ci
3162bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3163bf215546Sopenharmony_ci                                     control_word_constant2,
3164bf215546Sopenharmony_ci                                     doutw,
3165bf215546Sopenharmony_ci                                     0);
3166bf215546Sopenharmony_ci
3167bf215546Sopenharmony_ci            /* Move the destination register along. */
3168bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3169bf215546Sopenharmony_ci               program->clear_color_dest_reg + 3,
3170bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER32,
3171bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3172bf215546Sopenharmony_ci               false,
3173bf215546Sopenharmony_ci               dev_info);
3174bf215546Sopenharmony_ci
3175bf215546Sopenharmony_ci            /* Set the last flag. */
3176bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3177bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3178bf215546Sopenharmony_ci         }
3179bf215546Sopenharmony_ci      } else {
3180bf215546Sopenharmony_ci         uint32_t control_word_constant, control_word_last_constant;
3181bf215546Sopenharmony_ci
3182bf215546Sopenharmony_ci         /* Put the clear color and control words into the first 8
3183bf215546Sopenharmony_ci          * constants.
3184bf215546Sopenharmony_ci          */
3185bf215546Sopenharmony_ci         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3186bf215546Sopenharmony_ci         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3187bf215546Sopenharmony_ci         control_word_constant =
3188bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
3189bf215546Sopenharmony_ci         control_word_last_constant =
3190bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
3191bf215546Sopenharmony_ci
3192bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3193bf215546Sopenharmony_ci            uint32_t doutw;
3194bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3195bf215546Sopenharmony_ci                                     color_constant1,
3196bf215546Sopenharmony_ci                                     program->clear_color[0],
3197bf215546Sopenharmony_ci                                     program->clear_color[1]);
3198bf215546Sopenharmony_ci
3199bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3200bf215546Sopenharmony_ci                                     color_constant2,
3201bf215546Sopenharmony_ci                                     program->clear_color[2],
3202bf215546Sopenharmony_ci                                     program->clear_color[3]);
3203bf215546Sopenharmony_ci
3204bf215546Sopenharmony_ci            /* Load into first constant in common store. */
3205bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3206bf215546Sopenharmony_ci               program->clear_color_dest_reg,
3207bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
3208bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3209bf215546Sopenharmony_ci               false,
3210bf215546Sopenharmony_ci               dev_info);
3211bf215546Sopenharmony_ci
3212bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3213bf215546Sopenharmony_ci
3214bf215546Sopenharmony_ci            /* Move the destination register along. */
3215bf215546Sopenharmony_ci            doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3216bf215546Sopenharmony_ci            doutw |= (program->clear_color_dest_reg + 2)
3217bf215546Sopenharmony_ci                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3218bf215546Sopenharmony_ci
3219bf215546Sopenharmony_ci            /* Set the last flag. */
3220bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3221bf215546Sopenharmony_ci            pvr_pds_write_constant64(constants,
3222bf215546Sopenharmony_ci                                     control_word_last_constant,
3223bf215546Sopenharmony_ci                                     doutw,
3224bf215546Sopenharmony_ci                                     0);
3225bf215546Sopenharmony_ci         }
3226bf215546Sopenharmony_ci      }
3227bf215546Sopenharmony_ci
3228bf215546Sopenharmony_ci      /* Constants for the DOUTU Task Control, if needed. */
3229bf215546Sopenharmony_ci      if (program->kick_usc) {
3230bf215546Sopenharmony_ci         uint32_t doutu_constant64 =
3231bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant, 2, &data_size);
3232bf215546Sopenharmony_ci
3233bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3234bf215546Sopenharmony_ci            pvr_pds_write_wide_constant(
3235bf215546Sopenharmony_ci               constants,
3236bf215546Sopenharmony_ci               doutu_constant64,
3237bf215546Sopenharmony_ci               program->usc_task_control.src0); /* 64-bit
3238bf215546Sopenharmony_ci                                                 */
3239bf215546Sopenharmony_ci            /* Src0 */
3240bf215546Sopenharmony_ci         }
3241bf215546Sopenharmony_ci      }
3242bf215546Sopenharmony_ci   } else {
3243bf215546Sopenharmony_ci      if (uniform) {
3244bf215546Sopenharmony_ci         /* Reserve space at the beginning of the data segment for the DOUTU
3245bf215546Sopenharmony_ci          * Task Control if one is needed.
3246bf215546Sopenharmony_ci          */
3247bf215546Sopenharmony_ci         if (program->kick_usc) {
3248bf215546Sopenharmony_ci            uint32_t doutu_constant64 =
3249bf215546Sopenharmony_ci               pvr_pds_get_constants(&next_constant, 2, &data_size);
3250bf215546Sopenharmony_ci
3251bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3252bf215546Sopenharmony_ci               pvr_pds_write_wide_constant(
3253bf215546Sopenharmony_ci                  constants,
3254bf215546Sopenharmony_ci                  doutu_constant64,
3255bf215546Sopenharmony_ci                  program->usc_task_control.src0); /* 64-bit Src0 */
3256bf215546Sopenharmony_ci            }
3257bf215546Sopenharmony_ci         }
3258bf215546Sopenharmony_ci
3259bf215546Sopenharmony_ci         uint32_t total_num_doutw =
3260bf215546Sopenharmony_ci            program->num_dword_doutw + program->num_q_word_doutw;
3261bf215546Sopenharmony_ci         uint32_t total_size_dma =
3262bf215546Sopenharmony_ci            program->num_dword_doutw + 2 * program->num_q_word_doutw;
3263bf215546Sopenharmony_ci
3264bf215546Sopenharmony_ci         /* Allocate 3 constant spaces for each kick. The 64-bit constants
3265bf215546Sopenharmony_ci          * come first followed by the 32-bit constants.
3266bf215546Sopenharmony_ci          */
3267bf215546Sopenharmony_ci         uint32_t dma_address_constant64 =
3268bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant,
3269bf215546Sopenharmony_ci                                  program->num_uniform_dma_kicks * 3 +
3270bf215546Sopenharmony_ci                                     total_size_dma + total_num_doutw,
3271bf215546Sopenharmony_ci                                  &data_size);
3272bf215546Sopenharmony_ci         uint32_t doutw_value_constant64 =
3273bf215546Sopenharmony_ci            dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3274bf215546Sopenharmony_ci         uint32_t dma_control_constant32 =
3275bf215546Sopenharmony_ci            doutw_value_constant64 + total_size_dma;
3276bf215546Sopenharmony_ci         uint32_t doutw_control_constant32 =
3277bf215546Sopenharmony_ci            dma_control_constant32 + program->num_uniform_dma_kicks;
3278bf215546Sopenharmony_ci
3279bf215546Sopenharmony_ci         if (total_num_doutw > 0) {
3280bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3281bf215546Sopenharmony_ci               for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3282bf215546Sopenharmony_ci                  pvr_pds_write_constant64(
3283bf215546Sopenharmony_ci                     constants,
3284bf215546Sopenharmony_ci                     doutw_value_constant64,
3285bf215546Sopenharmony_ci                     program->q_word_doutw_value[2 * i],
3286bf215546Sopenharmony_ci                     program->q_word_doutw_value[2 * i + 1]);
3287bf215546Sopenharmony_ci                  pvr_pds_write_constant32(
3288bf215546Sopenharmony_ci                     constants,
3289bf215546Sopenharmony_ci                     doutw_control_constant32,
3290bf215546Sopenharmony_ci                     program->q_word_doutw_control[i] |
3291bf215546Sopenharmony_ci                        ((!program->num_uniform_dma_kicks &&
3292bf215546Sopenharmony_ci                          i == total_num_doutw - 1)
3293bf215546Sopenharmony_ci                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3294bf215546Sopenharmony_ci                            : 0));
3295bf215546Sopenharmony_ci
3296bf215546Sopenharmony_ci                  doutw_value_constant64 += 2;
3297bf215546Sopenharmony_ci                  doutw_control_constant32 += 1;
3298bf215546Sopenharmony_ci               }
3299bf215546Sopenharmony_ci
3300bf215546Sopenharmony_ci               for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3301bf215546Sopenharmony_ci                  pvr_pds_write_constant32(constants,
3302bf215546Sopenharmony_ci                                           doutw_value_constant64,
3303bf215546Sopenharmony_ci                                           program->dword_doutw_value[i]);
3304bf215546Sopenharmony_ci                  pvr_pds_write_constant32(
3305bf215546Sopenharmony_ci                     constants,
3306bf215546Sopenharmony_ci                     doutw_control_constant32,
3307bf215546Sopenharmony_ci                     program->dword_doutw_control[i] |
3308bf215546Sopenharmony_ci                        ((!program->num_uniform_dma_kicks &&
3309bf215546Sopenharmony_ci                          i == program->num_dword_doutw - 1)
3310bf215546Sopenharmony_ci                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3311bf215546Sopenharmony_ci                            : 0));
3312bf215546Sopenharmony_ci
3313bf215546Sopenharmony_ci                  doutw_value_constant64 += 1;
3314bf215546Sopenharmony_ci                  doutw_control_constant32 += 1;
3315bf215546Sopenharmony_ci               }
3316bf215546Sopenharmony_ci            }
3317bf215546Sopenharmony_ci         }
3318bf215546Sopenharmony_ci
3319bf215546Sopenharmony_ci         if (program->num_uniform_dma_kicks > 0) {
3320bf215546Sopenharmony_ci            uint32_t kick;
3321bf215546Sopenharmony_ci
3322bf215546Sopenharmony_ci            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3323bf215546Sopenharmony_ci               for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3324bf215546Sopenharmony_ci                    kick++) {
3325bf215546Sopenharmony_ci                  /* Copy the dma control words to constants. */
3326bf215546Sopenharmony_ci                  pvr_pds_write_dma_address(constants,
3327bf215546Sopenharmony_ci                                            dma_address_constant64,
3328bf215546Sopenharmony_ci                                            program->uniform_dma_address[kick],
3329bf215546Sopenharmony_ci                                            false,
3330bf215546Sopenharmony_ci                                            dev_info);
3331bf215546Sopenharmony_ci                  pvr_pds_write_constant32(constants,
3332bf215546Sopenharmony_ci                                           dma_control_constant32,
3333bf215546Sopenharmony_ci                                           program->uniform_dma_control[kick]);
3334bf215546Sopenharmony_ci
3335bf215546Sopenharmony_ci                  dma_address_constant64 += 2;
3336bf215546Sopenharmony_ci                  dma_control_constant32 += 1;
3337bf215546Sopenharmony_ci               }
3338bf215546Sopenharmony_ci
3339bf215546Sopenharmony_ci               pvr_pds_write_dma_address(constants,
3340bf215546Sopenharmony_ci                                         dma_address_constant64,
3341bf215546Sopenharmony_ci                                         program->uniform_dma_address[kick],
3342bf215546Sopenharmony_ci                                         false,
3343bf215546Sopenharmony_ci                                         dev_info);
3344bf215546Sopenharmony_ci               pvr_pds_write_constant32(
3345bf215546Sopenharmony_ci                  constants,
3346bf215546Sopenharmony_ci                  dma_control_constant32,
3347bf215546Sopenharmony_ci                  program->uniform_dma_control[kick] |
3348bf215546Sopenharmony_ci                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3349bf215546Sopenharmony_ci            }
3350bf215546Sopenharmony_ci         }
3351bf215546Sopenharmony_ci
3352bf215546Sopenharmony_ci      } else if (program->num_texture_dma_kicks > 0) {
3353bf215546Sopenharmony_ci         /* Allocate 3 constant spaces for each kick. The 64-bit constants
3354bf215546Sopenharmony_ci          * come first followed by the 32-bit constants.
3355bf215546Sopenharmony_ci          */
3356bf215546Sopenharmony_ci         uint32_t dma_address_constant64 =
3357bf215546Sopenharmony_ci            pvr_pds_get_constants(&next_constant,
3358bf215546Sopenharmony_ci                                  program->num_texture_dma_kicks * 3,
3359bf215546Sopenharmony_ci                                  &data_size);
3360bf215546Sopenharmony_ci         uint32_t dma_control_constant32 =
3361bf215546Sopenharmony_ci            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3362bf215546Sopenharmony_ci
3363bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3364bf215546Sopenharmony_ci            uint32_t kick;
3365bf215546Sopenharmony_ci            for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3366bf215546Sopenharmony_ci               /* Copy the DMA control words to constants. */
3367bf215546Sopenharmony_ci               pvr_pds_write_dma_address(constants,
3368bf215546Sopenharmony_ci                                         dma_address_constant64,
3369bf215546Sopenharmony_ci                                         program->texture_dma_address[kick],
3370bf215546Sopenharmony_ci                                         false,
3371bf215546Sopenharmony_ci                                         dev_info);
3372bf215546Sopenharmony_ci
3373bf215546Sopenharmony_ci               pvr_pds_write_constant32(constants,
3374bf215546Sopenharmony_ci                                        dma_control_constant32,
3375bf215546Sopenharmony_ci                                        program->texture_dma_control[kick]);
3376bf215546Sopenharmony_ci
3377bf215546Sopenharmony_ci               dma_address_constant64 += 2;
3378bf215546Sopenharmony_ci               dma_control_constant32 += 1;
3379bf215546Sopenharmony_ci            }
3380bf215546Sopenharmony_ci
3381bf215546Sopenharmony_ci            pvr_pds_write_dma_address(constants,
3382bf215546Sopenharmony_ci                                      dma_address_constant64,
3383bf215546Sopenharmony_ci                                      program->texture_dma_address[kick],
3384bf215546Sopenharmony_ci                                      false,
3385bf215546Sopenharmony_ci                                      dev_info);
3386bf215546Sopenharmony_ci
3387bf215546Sopenharmony_ci            pvr_pds_write_constant32(
3388bf215546Sopenharmony_ci               constants,
3389bf215546Sopenharmony_ci               dma_control_constant32,
3390bf215546Sopenharmony_ci               program->texture_dma_control[kick] |
3391bf215546Sopenharmony_ci                  PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3392bf215546Sopenharmony_ci         }
3393bf215546Sopenharmony_ci      }
3394bf215546Sopenharmony_ci   }
3395bf215546Sopenharmony_ci
3396bf215546Sopenharmony_ci   /* Save the data segment pointer and size. */
3397bf215546Sopenharmony_ci   program->data_segment = constants;
3398bf215546Sopenharmony_ci
3399bf215546Sopenharmony_ci   /* Minimum temp count is 1. */
3400bf215546Sopenharmony_ci   program->temps_used = MAX2(temps_used, 1);
3401bf215546Sopenharmony_ci   program->data_size = data_size;
3402bf215546Sopenharmony_ci
3403bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3404bf215546Sopenharmony_ci      return (constants + next_constant);
3405bf215546Sopenharmony_ci   else
3406bf215546Sopenharmony_ci      return NULL;
3407bf215546Sopenharmony_ci}
3408bf215546Sopenharmony_ci
3409bf215546Sopenharmony_ci/**
3410bf215546Sopenharmony_ci * Generates generic DOUTC PDS program.
3411bf215546Sopenharmony_ci *
3412bf215546Sopenharmony_ci * \param program Pointer to the PDS kick USC.
3413bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3414bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated, or sizes only updated.
3415bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the code or program segment.
3416bf215546Sopenharmony_ci */
3417bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3418bf215546Sopenharmony_ci                                 uint32_t *restrict buffer,
3419bf215546Sopenharmony_ci                                 enum pvr_pds_generate_mode gen_mode)
3420bf215546Sopenharmony_ci{
3421bf215546Sopenharmony_ci   uint32_t constant = 0;
3422bf215546Sopenharmony_ci
3423bf215546Sopenharmony_ci   /* Automatically get a data size of 1x 128bit chunks. */
3424bf215546Sopenharmony_ci   uint32_t data_size = 0, code_size = 0;
3425bf215546Sopenharmony_ci
3426bf215546Sopenharmony_ci   /* Setup the data part. */
3427bf215546Sopenharmony_ci   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3428bf215546Sopenharmony_ci   uint32_t *instruction = buffer;
3429bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3430bf215546Sopenharmony_ci                                                           * dwords.
3431bf215546Sopenharmony_ci                                                           */
3432bf215546Sopenharmony_ci
3433bf215546Sopenharmony_ci   /* Update the program sizes. */
3434bf215546Sopenharmony_ci   program->data_size = data_size;
3435bf215546Sopenharmony_ci   program->code_size = code_size;
3436bf215546Sopenharmony_ci   program->data_segment = constants;
3437bf215546Sopenharmony_ci
3438bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_SIZES)
3439bf215546Sopenharmony_ci      return NULL;
3440bf215546Sopenharmony_ci
3441bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3442bf215546Sopenharmony_ci      /* Copy the USC task control words to constants. */
3443bf215546Sopenharmony_ci
3444bf215546Sopenharmony_ci      constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3445bf215546Sopenharmony_ci      pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3446bf215546Sopenharmony_ci                                                                * Src0
3447bf215546Sopenharmony_ci                                                                */
3448bf215546Sopenharmony_ci
3449bf215546Sopenharmony_ci      uint32_t control_word_constant =
3450bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3451bf215546Sopenharmony_ci      pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3452bf215546Sopenharmony_ci                                                                         * Src1
3453bf215546Sopenharmony_ci                                                                         */
3454bf215546Sopenharmony_ci
3455bf215546Sopenharmony_ci      program->data_size = data_size;
3456bf215546Sopenharmony_ci      buffer += data_size;
3457bf215546Sopenharmony_ci
3458bf215546Sopenharmony_ci      return buffer;
3459bf215546Sopenharmony_ci   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3460bf215546Sopenharmony_ci      *instruction++ = pvr_pds_inst_encode_doutc(
3461bf215546Sopenharmony_ci         /* cc */ 0,
3462bf215546Sopenharmony_ci         /* END */ 0);
3463bf215546Sopenharmony_ci
3464bf215546Sopenharmony_ci      code_size++;
3465bf215546Sopenharmony_ci
3466bf215546Sopenharmony_ci      /* End the program. */
3467bf215546Sopenharmony_ci      *instruction++ = pvr_pds_inst_encode_halt(0);
3468bf215546Sopenharmony_ci      code_size++;
3469bf215546Sopenharmony_ci
3470bf215546Sopenharmony_ci      program->code_size = code_size;
3471bf215546Sopenharmony_ci   }
3472bf215546Sopenharmony_ci
3473bf215546Sopenharmony_ci   return instruction;
3474bf215546Sopenharmony_ci}
3475bf215546Sopenharmony_ci
3476bf215546Sopenharmony_ci/**
3477bf215546Sopenharmony_ci * Generates generic kick DOUTU PDS program in a single data+code block.
3478bf215546Sopenharmony_ci *
3479bf215546Sopenharmony_ci * \param control Pointer to the PDS kick USC.
3480bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3481bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
3482bf215546Sopenharmony_ci * \param dev_info PVR device information structure.
3483bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the code or program segment.
3484bf215546Sopenharmony_ci */
3485bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3486bf215546Sopenharmony_ci                                 uint32_t *restrict buffer,
3487bf215546Sopenharmony_ci                                 enum pvr_pds_generate_mode gen_mode,
3488bf215546Sopenharmony_ci                                 const struct pvr_device_info *dev_info)
3489bf215546Sopenharmony_ci{
3490bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3491bf215546Sopenharmony_ci   uint32_t doutw;
3492bf215546Sopenharmony_ci   uint32_t data_size = 0, code_size = 0;
3493bf215546Sopenharmony_ci   uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3494bf215546Sopenharmony_ci   uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3495bf215546Sopenharmony_ci
3496bf215546Sopenharmony_ci   /* Assert if buffer is exceeded. */
3497bf215546Sopenharmony_ci   assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3498bf215546Sopenharmony_ci
3499bf215546Sopenharmony_ci   uint32_t *constants = buffer;
3500bf215546Sopenharmony_ci   uint32_t *instruction = buffer;
3501bf215546Sopenharmony_ci
3502bf215546Sopenharmony_ci   /* Put the constants and control words interleaved in the data region. */
3503bf215546Sopenharmony_ci   for (uint32_t const_pair = 0; const_pair < control->num_const64;
3504bf215546Sopenharmony_ci        const_pair++) {
3505bf215546Sopenharmony_ci      constant[const_pair] =
3506bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3507bf215546Sopenharmony_ci      control_word_constant[const_pair] =
3508bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3509bf215546Sopenharmony_ci   }
3510bf215546Sopenharmony_ci
3511bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3512bf215546Sopenharmony_ci      /* Data segment points to start of constants. */
3513bf215546Sopenharmony_ci      control->data_segment = constants;
3514bf215546Sopenharmony_ci
3515bf215546Sopenharmony_ci      for (uint32_t const_pair = 0; const_pair < control->num_const64;
3516bf215546Sopenharmony_ci           const_pair++) {
3517bf215546Sopenharmony_ci         pvr_pds_write_constant64(constants,
3518bf215546Sopenharmony_ci                                  constant[const_pair],
3519bf215546Sopenharmony_ci                                  H32(control->doutw_data[const_pair]),
3520bf215546Sopenharmony_ci                                  L32(control->doutw_data[const_pair]));
3521bf215546Sopenharmony_ci
3522bf215546Sopenharmony_ci         /* Start loading at offset 0. */
3523bf215546Sopenharmony_ci         if (control->dest_store == PDS_COMMON_STORE) {
3524bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3525bf215546Sopenharmony_ci               (2 * const_pair),
3526bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
3527bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3528bf215546Sopenharmony_ci               false,
3529bf215546Sopenharmony_ci               dev_info);
3530bf215546Sopenharmony_ci         } else {
3531bf215546Sopenharmony_ci            doutw = pvr_pds_encode_doutw_src1(
3532bf215546Sopenharmony_ci               (2 * const_pair),
3533bf215546Sopenharmony_ci               PVR_PDS_DOUTW_LOWER64,
3534bf215546Sopenharmony_ci               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3535bf215546Sopenharmony_ci               false,
3536bf215546Sopenharmony_ci               dev_info);
3537bf215546Sopenharmony_ci         }
3538bf215546Sopenharmony_ci
3539bf215546Sopenharmony_ci         if (const_pair + 1 == control->num_const64) {
3540bf215546Sopenharmony_ci            /* Set the last flag for the MCU (assume there are no following
3541bf215546Sopenharmony_ci             * DOUTD's).
3542bf215546Sopenharmony_ci             */
3543bf215546Sopenharmony_ci            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3544bf215546Sopenharmony_ci         }
3545bf215546Sopenharmony_ci         pvr_pds_write_constant64(constants,
3546bf215546Sopenharmony_ci                                  control_word_constant[const_pair],
3547bf215546Sopenharmony_ci                                  doutw,
3548bf215546Sopenharmony_ci                                  0);
3549bf215546Sopenharmony_ci      }
3550bf215546Sopenharmony_ci
3551bf215546Sopenharmony_ci      control->data_size = data_size;
3552bf215546Sopenharmony_ci   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3553bf215546Sopenharmony_ci      /* Code section. */
3554bf215546Sopenharmony_ci
3555bf215546Sopenharmony_ci      for (uint32_t const_pair = 0; const_pair < control->num_const64;
3556bf215546Sopenharmony_ci           const_pair++) {
3557bf215546Sopenharmony_ci         /* DOUTW the PDS data to the USC constants. */
3558bf215546Sopenharmony_ci         *instruction++ = pvr_pds_encode_doutw64(
3559bf215546Sopenharmony_ci            /* cc */ 0,
3560bf215546Sopenharmony_ci            /* END */ control->last_instruction &&
3561bf215546Sopenharmony_ci               (const_pair + 1 == control->num_const64),
3562bf215546Sopenharmony_ci            /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3563bf215546Sopenharmony_ci                                                           * Src1.
3564bf215546Sopenharmony_ci                                                           */
3565bf215546Sopenharmony_ci            /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3566bf215546Sopenharmony_ci
3567bf215546Sopenharmony_ci         code_size++;
3568bf215546Sopenharmony_ci      }
3569bf215546Sopenharmony_ci
3570bf215546Sopenharmony_ci      if (control->last_instruction) {
3571bf215546Sopenharmony_ci         /* End the program. */
3572bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_halt(0);
3573bf215546Sopenharmony_ci         code_size++;
3574bf215546Sopenharmony_ci      }
3575bf215546Sopenharmony_ci
3576bf215546Sopenharmony_ci      control->code_size = code_size;
3577bf215546Sopenharmony_ci   }
3578bf215546Sopenharmony_ci
3579bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3580bf215546Sopenharmony_ci      return (constants + next_constant);
3581bf215546Sopenharmony_ci   else
3582bf215546Sopenharmony_ci      return instruction;
3583bf215546Sopenharmony_ci}
3584bf215546Sopenharmony_ci
3585bf215546Sopenharmony_ci/**
3586bf215546Sopenharmony_ci * Generates generic kick DOUTU PDS program in a single data+code block.
3587bf215546Sopenharmony_ci *
3588bf215546Sopenharmony_ci * \param program Pointer to the PDS kick USC.
3589bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3590bf215546Sopenharmony_ci * \param start_next_constant Next constant in data segment. Non-zero if another
3591bf215546Sopenharmony_ci *                            instruction precedes the DOUTU.
3592bf215546Sopenharmony_ci * \param cc_enabled If true then the DOUTU is predicated (cc set).
3593bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
3594bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the code or program segment.
3595bf215546Sopenharmony_ci */
3596bf215546Sopenharmony_ciuint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3597bf215546Sopenharmony_ci                           uint32_t *restrict buffer,
3598bf215546Sopenharmony_ci                           uint32_t start_next_constant,
3599bf215546Sopenharmony_ci                           bool cc_enabled,
3600bf215546Sopenharmony_ci                           enum pvr_pds_generate_mode gen_mode)
3601bf215546Sopenharmony_ci{
3602bf215546Sopenharmony_ci   uint32_t constant = 0;
3603bf215546Sopenharmony_ci
3604bf215546Sopenharmony_ci   /* Automatically get a data size of 2 128bit chunks. */
3605bf215546Sopenharmony_ci   uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3606bf215546Sopenharmony_ci   uint32_t code_size = 1; /* Single doutu */
3607bf215546Sopenharmony_ci   uint32_t dummy_count = 0;
3608bf215546Sopenharmony_ci
3609bf215546Sopenharmony_ci   /* Setup the data part. */
3610bf215546Sopenharmony_ci   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3611bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3612bf215546Sopenharmony_ci                                                           * dwords.
3613bf215546Sopenharmony_ci                                                           */
3614bf215546Sopenharmony_ci
3615bf215546Sopenharmony_ci   /* Update the program sizes. */
3616bf215546Sopenharmony_ci   program->data_size = data_size;
3617bf215546Sopenharmony_ci   program->code_size = code_size;
3618bf215546Sopenharmony_ci   program->data_segment = constants;
3619bf215546Sopenharmony_ci
3620bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_SIZES)
3621bf215546Sopenharmony_ci      return NULL;
3622bf215546Sopenharmony_ci
3623bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3624bf215546Sopenharmony_ci       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3625bf215546Sopenharmony_ci      /* Copy the USC task control words to constants. */
3626bf215546Sopenharmony_ci
3627bf215546Sopenharmony_ci      constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3628bf215546Sopenharmony_ci
3629bf215546Sopenharmony_ci      pvr_pds_write_wide_constant(constants,
3630bf215546Sopenharmony_ci                                  constant + 0,
3631bf215546Sopenharmony_ci                                  program->usc_task_control.src0); /* 64-bit
3632bf215546Sopenharmony_ci                                                                    * Src0.
3633bf215546Sopenharmony_ci                                                                    */
3634bf215546Sopenharmony_ci      buffer += data_size;
3635bf215546Sopenharmony_ci
3636bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3637bf215546Sopenharmony_ci         return buffer;
3638bf215546Sopenharmony_ci   }
3639bf215546Sopenharmony_ci
3640bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3641bf215546Sopenharmony_ci       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3642bf215546Sopenharmony_ci      /* Generate the PDS pixel shader code. */
3643bf215546Sopenharmony_ci
3644bf215546Sopenharmony_ci      /* Setup the instruction pointer. */
3645bf215546Sopenharmony_ci      uint32_t *instruction = buffer;
3646bf215546Sopenharmony_ci
3647bf215546Sopenharmony_ci      /* Issue the task to the USC.
3648bf215546Sopenharmony_ci       *
3649bf215546Sopenharmony_ci       * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3650bf215546Sopenharmony_ci       * halt halt
3651bf215546Sopenharmony_ci       */
3652bf215546Sopenharmony_ci
3653bf215546Sopenharmony_ci      *instruction++ = pvr_pds_encode_doutu(
3654bf215546Sopenharmony_ci         /* cc */ cc_enabled,
3655bf215546Sopenharmony_ci         /* END */ 1,
3656bf215546Sopenharmony_ci         /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3657bf215546Sopenharmony_ci                                                             * 64-bit Src0
3658bf215546Sopenharmony_ci                                                             */
3659bf215546Sopenharmony_ci
3660bf215546Sopenharmony_ci      /* Return pointer to just after last instruction. */
3661bf215546Sopenharmony_ci      return instruction;
3662bf215546Sopenharmony_ci   }
3663bf215546Sopenharmony_ci
3664bf215546Sopenharmony_ci   /* Execution should never reach here; keep compiler happy. */
3665bf215546Sopenharmony_ci   return NULL;
3666bf215546Sopenharmony_ci}
3667bf215546Sopenharmony_ci
3668bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_compute_barrier_conditional(
3669bf215546Sopenharmony_ci   uint32_t *buffer,
3670bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode)
3671bf215546Sopenharmony_ci{
3672bf215546Sopenharmony_ci   /* Compute barriers supported. Need to test for coeff sync task. */
3673bf215546Sopenharmony_ci
3674bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3675bf215546Sopenharmony_ci      return buffer; /* No data segment. */
3676bf215546Sopenharmony_ci
3677bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3678bf215546Sopenharmony_ci      /* Test whether this is the coefficient update task or not. */
3679bf215546Sopenharmony_ci      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3680bf215546Sopenharmony_ci                                                                       */
3681bf215546Sopenharmony_ci                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3682bf215546Sopenharmony_ci                                                                         */
3683bf215546Sopenharmony_ci                                     PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3684bf215546Sopenharmony_ci                                                                       */
3685bf215546Sopenharmony_ci                                     1 /* ADDR */);
3686bf215546Sopenharmony_ci
3687bf215546Sopenharmony_ci      /* Encode a HALT. */
3688bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_halt(1);
3689bf215546Sopenharmony_ci
3690bf215546Sopenharmony_ci      /* Reset the default predicate to IF0. */
3691bf215546Sopenharmony_ci      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3692bf215546Sopenharmony_ci                                                                       */
3693bf215546Sopenharmony_ci                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3694bf215546Sopenharmony_ci                                                                         */
3695bf215546Sopenharmony_ci                                     PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3696bf215546Sopenharmony_ci                                                                       */
3697bf215546Sopenharmony_ci                                     1 /* ADDR */);
3698bf215546Sopenharmony_ci   }
3699bf215546Sopenharmony_ci
3700bf215546Sopenharmony_ci   return buffer;
3701bf215546Sopenharmony_ci}
3702bf215546Sopenharmony_ci
3703bf215546Sopenharmony_ci/**
3704bf215546Sopenharmony_ci * Generates program to kick the USC task to store shared.
3705bf215546Sopenharmony_ci *
3706bf215546Sopenharmony_ci * \param program Pointer to the PDS shared register.
3707bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3708bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
3709bf215546Sopenharmony_ci * \param dev_info PVR device information structure.
3710bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
3711bf215546Sopenharmony_ci */
3712bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_shared_storing_program(
3713bf215546Sopenharmony_ci   struct pvr_pds_shared_storing_program *restrict program,
3714bf215546Sopenharmony_ci   uint32_t *restrict buffer,
3715bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
3716bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
3717bf215546Sopenharmony_ci{
3718bf215546Sopenharmony_ci   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3719bf215546Sopenharmony_ci   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3720bf215546Sopenharmony_ci
3721bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_SIZES)
3722bf215546Sopenharmony_ci      return NULL;
3723bf215546Sopenharmony_ci
3724bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3725bf215546Sopenharmony_ci      uint32_t *constants = buffer;
3726bf215546Sopenharmony_ci
3727bf215546Sopenharmony_ci      constants =
3728bf215546Sopenharmony_ci         pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3729bf215546Sopenharmony_ci      program->data_size = doutw_control->data_size;
3730bf215546Sopenharmony_ci
3731bf215546Sopenharmony_ci      constants = pvr_pds_kick_usc(kick_usc_program,
3732bf215546Sopenharmony_ci                                   constants,
3733bf215546Sopenharmony_ci                                   0,
3734bf215546Sopenharmony_ci                                   program->cc_enable,
3735bf215546Sopenharmony_ci                                   gen_mode);
3736bf215546Sopenharmony_ci      program->data_size += kick_usc_program->data_size;
3737bf215546Sopenharmony_ci
3738bf215546Sopenharmony_ci      return constants;
3739bf215546Sopenharmony_ci   }
3740bf215546Sopenharmony_ci
3741bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3742bf215546Sopenharmony_ci      /* Generate PDS code segment. */
3743bf215546Sopenharmony_ci      uint32_t *instruction = buffer;
3744bf215546Sopenharmony_ci
3745bf215546Sopenharmony_ci      /* doutw	vi1, vi0
3746bf215546Sopenharmony_ci       * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3747bf215546Sopenharmony_ci       * emit
3748bf215546Sopenharmony_ci       */
3749bf215546Sopenharmony_ci      instruction =
3750bf215546Sopenharmony_ci         pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3751bf215546Sopenharmony_ci      program->code_size = doutw_control->code_size;
3752bf215546Sopenharmony_ci
3753bf215546Sopenharmony_ci      /* Offset into data segment follows on from doutw data segment. */
3754bf215546Sopenharmony_ci      instruction = pvr_pds_kick_usc(kick_usc_program,
3755bf215546Sopenharmony_ci                                     instruction,
3756bf215546Sopenharmony_ci                                     doutw_control->data_size,
3757bf215546Sopenharmony_ci                                     program->cc_enable,
3758bf215546Sopenharmony_ci                                     gen_mode);
3759bf215546Sopenharmony_ci      program->code_size += kick_usc_program->code_size;
3760bf215546Sopenharmony_ci
3761bf215546Sopenharmony_ci      return instruction;
3762bf215546Sopenharmony_ci   }
3763bf215546Sopenharmony_ci
3764bf215546Sopenharmony_ci   /* Execution should never reach here. */
3765bf215546Sopenharmony_ci   return NULL;
3766bf215546Sopenharmony_ci}
3767bf215546Sopenharmony_ci
3768bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_fence_terminate_program(
3769bf215546Sopenharmony_ci   struct pvr_pds_fence_program *restrict program,
3770bf215546Sopenharmony_ci   uint32_t *restrict buffer,
3771bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
3772bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
3773bf215546Sopenharmony_ci{
3774bf215546Sopenharmony_ci   uint32_t data_size = 0;
3775bf215546Sopenharmony_ci   uint32_t code_size = 0;
3776bf215546Sopenharmony_ci
3777bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3778bf215546Sopenharmony_ci      /* Data segment. */
3779bf215546Sopenharmony_ci      uint32_t *constants, *constants_base;
3780bf215546Sopenharmony_ci
3781bf215546Sopenharmony_ci      constants = constants_base = (uint32_t *)buffer;
3782bf215546Sopenharmony_ci
3783bf215546Sopenharmony_ci      /* DOUTC sources are not used, but they must be valid. */
3784bf215546Sopenharmony_ci      pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3785bf215546Sopenharmony_ci      data_size += program->data_size;
3786bf215546Sopenharmony_ci
3787bf215546Sopenharmony_ci      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3788bf215546Sopenharmony_ci         /* Append a 64-bit constant with value 1. Used to increment ptemp.
3789bf215546Sopenharmony_ci          * Return the offset into the data segment.
3790bf215546Sopenharmony_ci          */
3791bf215546Sopenharmony_ci         program->fence_constant_word =
3792bf215546Sopenharmony_ci            pvr_pds_append_constant64(constants_base, 1, &data_size);
3793bf215546Sopenharmony_ci      }
3794bf215546Sopenharmony_ci
3795bf215546Sopenharmony_ci      program->data_size = data_size;
3796bf215546Sopenharmony_ci      return constants;
3797bf215546Sopenharmony_ci   }
3798bf215546Sopenharmony_ci
3799bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3800bf215546Sopenharmony_ci      /* Code segment. */
3801bf215546Sopenharmony_ci      uint32_t *instruction = (uint32_t *)buffer;
3802bf215546Sopenharmony_ci
3803bf215546Sopenharmony_ci      instruction = pvr_pds_generate_compute_barrier_conditional(
3804bf215546Sopenharmony_ci         instruction,
3805bf215546Sopenharmony_ci         PDS_GENERATE_CODE_SEGMENT);
3806bf215546Sopenharmony_ci      code_size += 3;
3807bf215546Sopenharmony_ci
3808bf215546Sopenharmony_ci      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3809bf215546Sopenharmony_ci         /* lock */
3810bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3811bf215546Sopenharmony_ci
3812bf215546Sopenharmony_ci         /* add64	pt[0], pt[0], #1 */
3813bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_add64(
3814bf215546Sopenharmony_ci            0, /* cc */
3815bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3816bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3817bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3818bf215546Sopenharmony_ci                                                         */
3819bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3820bf215546Sopenharmony_ci               (program->fence_constant_word >> 1), /* src1 = 1 */
3821bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3822bf215546Sopenharmony_ci                                                            * ptemp[0]
3823bf215546Sopenharmony_ci                                                            */
3824bf215546Sopenharmony_ci
3825bf215546Sopenharmony_ci         /* release */
3826bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3827bf215546Sopenharmony_ci
3828bf215546Sopenharmony_ci         /* cmp		pt[0] EQ 0x4 == Number of USC clusters per phantom */
3829bf215546Sopenharmony_ci         *instruction++ = pvr_pds_inst_encode_cmpi(
3830bf215546Sopenharmony_ci            0, /* cc */
3831bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_COP_EQ,
3832bf215546Sopenharmony_ci            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3833bf215546Sopenharmony_ci                                                           * = ptemp[0]
3834bf215546Sopenharmony_ci                                                           */
3835bf215546Sopenharmony_ci            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3836bf215546Sopenharmony_ci
3837bf215546Sopenharmony_ci         /* bra		-1 */
3838bf215546Sopenharmony_ci         *instruction++ =
3839bf215546Sopenharmony_ci            pvr_pds_encode_bra(0, /* cc */
3840bf215546Sopenharmony_ci                               1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3841bf215546Sopenharmony_ci                                   */
3842bf215546Sopenharmony_ci                               0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3843bf215546Sopenharmony_ci                                   */
3844bf215546Sopenharmony_ci                               -1); /* bra PC */
3845bf215546Sopenharmony_ci         code_size += 5;
3846bf215546Sopenharmony_ci      }
3847bf215546Sopenharmony_ci
3848bf215546Sopenharmony_ci      /* DOUTC */
3849bf215546Sopenharmony_ci      instruction = pvr_pds_generate_doutc(program,
3850bf215546Sopenharmony_ci                                           instruction,
3851bf215546Sopenharmony_ci                                           PDS_GENERATE_CODE_SEGMENT);
3852bf215546Sopenharmony_ci      code_size += program->code_size;
3853bf215546Sopenharmony_ci
3854bf215546Sopenharmony_ci      program->code_size = code_size;
3855bf215546Sopenharmony_ci      return instruction;
3856bf215546Sopenharmony_ci   }
3857bf215546Sopenharmony_ci
3858bf215546Sopenharmony_ci   /* Execution should never reach here. */
3859bf215546Sopenharmony_ci   return NULL;
3860bf215546Sopenharmony_ci}
3861bf215546Sopenharmony_ci
3862bf215546Sopenharmony_ci/**
3863bf215546Sopenharmony_ci * Generates program to kick the USC task to load shared registers from memory.
3864bf215546Sopenharmony_ci *
3865bf215546Sopenharmony_ci * \param program Pointer to the PDS shared register.
3866bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3867bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
3868bf215546Sopenharmony_ci * \param dev_info PVR device information struct.
3869bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
3870bf215546Sopenharmony_ci */
3871bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_compute_shared_loading_program(
3872bf215546Sopenharmony_ci   struct pvr_pds_shared_storing_program *restrict program,
3873bf215546Sopenharmony_ci   uint32_t *restrict buffer,
3874bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
3875bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
3876bf215546Sopenharmony_ci{
3877bf215546Sopenharmony_ci   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3878bf215546Sopenharmony_ci   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3879bf215546Sopenharmony_ci
3880bf215546Sopenharmony_ci   uint32_t next_constant;
3881bf215546Sopenharmony_ci   uint32_t data_size = 0;
3882bf215546Sopenharmony_ci   uint32_t code_size = 0;
3883bf215546Sopenharmony_ci
3884bf215546Sopenharmony_ci   /* This needs to persist to the CODE_SEGMENT call. */
3885bf215546Sopenharmony_ci   static uint32_t fence_constant_word = 0;
3886bf215546Sopenharmony_ci   uint64_t zero_constant64 = 0;
3887bf215546Sopenharmony_ci
3888bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_SIZES)
3889bf215546Sopenharmony_ci      return NULL;
3890bf215546Sopenharmony_ci
3891bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3892bf215546Sopenharmony_ci      uint32_t *constants = buffer;
3893bf215546Sopenharmony_ci
3894bf215546Sopenharmony_ci      constants = pvr_pds_generate_doutw(doutw_control,
3895bf215546Sopenharmony_ci                                         constants,
3896bf215546Sopenharmony_ci                                         PDS_GENERATE_DATA_SEGMENT,
3897bf215546Sopenharmony_ci                                         dev_info);
3898bf215546Sopenharmony_ci      data_size += doutw_control->data_size;
3899bf215546Sopenharmony_ci
3900bf215546Sopenharmony_ci      constants = pvr_pds_kick_usc(kick_usc_program,
3901bf215546Sopenharmony_ci                                   constants,
3902bf215546Sopenharmony_ci                                   0,
3903bf215546Sopenharmony_ci                                   program->cc_enable,
3904bf215546Sopenharmony_ci                                   gen_mode);
3905bf215546Sopenharmony_ci      data_size += kick_usc_program->data_size;
3906bf215546Sopenharmony_ci
3907bf215546Sopenharmony_ci      /* Copy the fence constant value (64-bit). */
3908bf215546Sopenharmony_ci      next_constant = data_size; /* Assumes data words fully packed. */
3909bf215546Sopenharmony_ci      fence_constant_word =
3910bf215546Sopenharmony_ci         pvr_pds_get_constants(&next_constant, 2, &data_size);
3911bf215546Sopenharmony_ci
3912bf215546Sopenharmony_ci      /* Encode the fence constant src0 (offset measured from start of data
3913bf215546Sopenharmony_ci       * buffer). Fence barrier is initialized to zero.
3914bf215546Sopenharmony_ci       */
3915bf215546Sopenharmony_ci      pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3916bf215546Sopenharmony_ci      /* Update the const size. */
3917bf215546Sopenharmony_ci      data_size += 2;
3918bf215546Sopenharmony_ci      constants += 2;
3919bf215546Sopenharmony_ci
3920bf215546Sopenharmony_ci      program->data_size = data_size;
3921bf215546Sopenharmony_ci      return constants;
3922bf215546Sopenharmony_ci   }
3923bf215546Sopenharmony_ci
3924bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3925bf215546Sopenharmony_ci      /* Generate PDS code segment. */
3926bf215546Sopenharmony_ci      uint32_t *instruction = buffer;
3927bf215546Sopenharmony_ci
3928bf215546Sopenharmony_ci      /* add64	pt0, c0, c0
3929bf215546Sopenharmony_ci       * IF [2x Phantoms]
3930bf215546Sopenharmony_ci       * add64	pt1, c0, c0
3931bf215546Sopenharmony_ci       * st		[constant_mem_addr], pt0, 4
3932bf215546Sopenharmony_ci       * ENDIF
3933bf215546Sopenharmony_ci       * doutw	vi1, vi0
3934bf215546Sopenharmony_ci       * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3935bf215546Sopenharmony_ci       * emit
3936bf215546Sopenharmony_ci       *
3937bf215546Sopenharmony_ci       * Zero the persistent temp (SW fence for context switch).
3938bf215546Sopenharmony_ci       */
3939bf215546Sopenharmony_ci      *instruction++ = pvr_pds_inst_encode_add64(
3940bf215546Sopenharmony_ci         0, /* cc */
3941bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3942bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3943bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3944bf215546Sopenharmony_ci            (fence_constant_word >> 1), /* src0
3945bf215546Sopenharmony_ci                                         *  = 0
3946bf215546Sopenharmony_ci                                         */
3947bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3948bf215546Sopenharmony_ci            (fence_constant_word >> 1), /* src1
3949bf215546Sopenharmony_ci                                         * = 0
3950bf215546Sopenharmony_ci                                         */
3951bf215546Sopenharmony_ci         PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3952bf215546Sopenharmony_ci                                                         */
3953bf215546Sopenharmony_ci      code_size++;
3954bf215546Sopenharmony_ci
3955bf215546Sopenharmony_ci      instruction = pvr_pds_generate_doutw(doutw_control,
3956bf215546Sopenharmony_ci                                           instruction,
3957bf215546Sopenharmony_ci                                           PDS_GENERATE_CODE_SEGMENT,
3958bf215546Sopenharmony_ci                                           dev_info);
3959bf215546Sopenharmony_ci      code_size += doutw_control->code_size;
3960bf215546Sopenharmony_ci
3961bf215546Sopenharmony_ci      /* Offset into data segment follows on from doutw data segment. */
3962bf215546Sopenharmony_ci      instruction = pvr_pds_kick_usc(kick_usc_program,
3963bf215546Sopenharmony_ci                                     instruction,
3964bf215546Sopenharmony_ci                                     doutw_control->data_size,
3965bf215546Sopenharmony_ci                                     program->cc_enable,
3966bf215546Sopenharmony_ci                                     gen_mode);
3967bf215546Sopenharmony_ci      code_size += kick_usc_program->code_size;
3968bf215546Sopenharmony_ci
3969bf215546Sopenharmony_ci      program->code_size = code_size;
3970bf215546Sopenharmony_ci      return instruction;
3971bf215546Sopenharmony_ci   }
3972bf215546Sopenharmony_ci
3973bf215546Sopenharmony_ci   /* Execution should never reach here. */
3974bf215546Sopenharmony_ci   return NULL;
3975bf215546Sopenharmony_ci}
3976bf215546Sopenharmony_ci
3977bf215546Sopenharmony_ci/**
3978bf215546Sopenharmony_ci * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3979bf215546Sopenharmony_ci * Relies on num_fpu_iterators being initialized for size calculation.
3980bf215546Sopenharmony_ci * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3981bf215546Sopenharmony_ci * initialized for program generation.
3982bf215546Sopenharmony_ci *
3983bf215546Sopenharmony_ci * \param program Pointer to the PDS pixel shader program.
3984bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
3985bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
3986bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
3987bf215546Sopenharmony_ci */
3988bf215546Sopenharmony_ciuint32_t *pvr_pds_coefficient_loading(
3989bf215546Sopenharmony_ci   struct pvr_pds_coeff_loading_program *restrict program,
3990bf215546Sopenharmony_ci   uint32_t *restrict buffer,
3991bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode)
3992bf215546Sopenharmony_ci{
3993bf215546Sopenharmony_ci   uint32_t constant;
3994bf215546Sopenharmony_ci   uint32_t *instruction;
3995bf215546Sopenharmony_ci   uint32_t total_data_size, code_size;
3996bf215546Sopenharmony_ci
3997bf215546Sopenharmony_ci   /* Place constants at the front of the buffer. */
3998bf215546Sopenharmony_ci   uint32_t *constants = buffer;
3999bf215546Sopenharmony_ci   /* Start counting constants from 0. */
4000bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4001bf215546Sopenharmony_ci
4002bf215546Sopenharmony_ci   /* Save the data segment pointer and size. */
4003bf215546Sopenharmony_ci   program->data_segment = constants;
4004bf215546Sopenharmony_ci
4005bf215546Sopenharmony_ci   total_data_size = 0;
4006bf215546Sopenharmony_ci   code_size = 0;
4007bf215546Sopenharmony_ci
4008bf215546Sopenharmony_ci   total_data_size += 2 * program->num_fpu_iterators;
4009bf215546Sopenharmony_ci   code_size += program->num_fpu_iterators;
4010bf215546Sopenharmony_ci
4011bf215546Sopenharmony_ci   /* Instructions start where constants finished, but we must take note of
4012bf215546Sopenharmony_ci    * alignment.
4013bf215546Sopenharmony_ci    *
4014bf215546Sopenharmony_ci    * 128-bit boundary = 4 dwords.
4015bf215546Sopenharmony_ci    */
4016bf215546Sopenharmony_ci   total_data_size = ALIGN_POT(total_data_size, 4);
4017bf215546Sopenharmony_ci   if (gen_mode != PDS_GENERATE_SIZES) {
4018bf215546Sopenharmony_ci      uint32_t data_size = 0;
4019bf215546Sopenharmony_ci      uint32_t iterator = 0;
4020bf215546Sopenharmony_ci
4021bf215546Sopenharmony_ci      instruction = buffer + total_data_size;
4022bf215546Sopenharmony_ci
4023bf215546Sopenharmony_ci      while (iterator < program->num_fpu_iterators) {
4024bf215546Sopenharmony_ci         uint64_t iterator_word;
4025bf215546Sopenharmony_ci
4026bf215546Sopenharmony_ci         /* Copy the USC task control words to constants. */
4027bf215546Sopenharmony_ci         constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4028bf215546Sopenharmony_ci
4029bf215546Sopenharmony_ci         /* Write the first iterator. */
4030bf215546Sopenharmony_ci         iterator_word =
4031bf215546Sopenharmony_ci            (uint64_t)program->FPU_iterators[iterator]
4032bf215546Sopenharmony_ci            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4033bf215546Sopenharmony_ci
4034bf215546Sopenharmony_ci         /* Write the destination. */
4035bf215546Sopenharmony_ci         iterator_word |=
4036bf215546Sopenharmony_ci            (uint64_t)program->destination[iterator++]
4037bf215546Sopenharmony_ci            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4038bf215546Sopenharmony_ci
4039bf215546Sopenharmony_ci         /* If this is the last DOUTI word the "Last Issue" bit should be
4040bf215546Sopenharmony_ci          * set.
4041bf215546Sopenharmony_ci          */
4042bf215546Sopenharmony_ci         if (iterator >= program->num_fpu_iterators) {
4043bf215546Sopenharmony_ci            iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4044bf215546Sopenharmony_ci         }
4045bf215546Sopenharmony_ci
4046bf215546Sopenharmony_ci         /* Write the word to the buffer. */
4047bf215546Sopenharmony_ci         pvr_pds_write_wide_constant(constants,
4048bf215546Sopenharmony_ci                                     constant,
4049bf215546Sopenharmony_ci                                     iterator_word); /* 64-bit
4050bf215546Sopenharmony_ci                                                        Src0
4051bf215546Sopenharmony_ci                                                      */
4052bf215546Sopenharmony_ci
4053bf215546Sopenharmony_ci         /* Write the DOUT instruction. */
4054bf215546Sopenharmony_ci         *instruction++ = pvr_pds_encode_douti(
4055bf215546Sopenharmony_ci            /* cc */ 0,
4056bf215546Sopenharmony_ci            /* END */ 0,
4057bf215546Sopenharmony_ci            /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4058bf215546Sopenharmony_ci      }
4059bf215546Sopenharmony_ci
4060bf215546Sopenharmony_ci      /* Update the last DOUTI instruction to have the END flag set. */
4061bf215546Sopenharmony_ci      *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4062bf215546Sopenharmony_ci   } else {
4063bf215546Sopenharmony_ci      instruction = NULL;
4064bf215546Sopenharmony_ci   }
4065bf215546Sopenharmony_ci
4066bf215546Sopenharmony_ci   /* Update the data size and code size. Minimum temp count is 1. */
4067bf215546Sopenharmony_ci   program->temps_used = 1;
4068bf215546Sopenharmony_ci   program->data_size = total_data_size;
4069bf215546Sopenharmony_ci   program->code_size = code_size;
4070bf215546Sopenharmony_ci
4071bf215546Sopenharmony_ci   return instruction;
4072bf215546Sopenharmony_ci}
4073bf215546Sopenharmony_ci
4074bf215546Sopenharmony_ci/**
4075bf215546Sopenharmony_ci * Generate a single ld/st instruction. This can correspond to one or more
4076bf215546Sopenharmony_ci * real ld/st instructions based on the value of count.
4077bf215546Sopenharmony_ci *
4078bf215546Sopenharmony_ci * \param ld true to generate load, false to generate store.
4079bf215546Sopenharmony_ci * \param control Cache mode control.
4080bf215546Sopenharmony_ci * \param temp_index Dest temp for load/source temp for store, in 32bits
4081bf215546Sopenharmony_ci *                   register index.
4082bf215546Sopenharmony_ci * \param address Source for load/dest for store in bytes.
4083bf215546Sopenharmony_ci * \param count Number of dwords for load/store.
4084bf215546Sopenharmony_ci * \param next_constant
4085bf215546Sopenharmony_ci * \param total_data_size
4086bf215546Sopenharmony_ci * \param total_code_size
4087bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
4088bf215546Sopenharmony_ci * \param data_fence Issue data fence.
4089bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
4090bf215546Sopenharmony_ci * \param dev_info PVR device information structure.
4091bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
4092bf215546Sopenharmony_ci */
4093bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_single_ldst_instruction(
4094bf215546Sopenharmony_ci   bool ld,
4095bf215546Sopenharmony_ci   const struct pvr_pds_ldst_control *control,
4096bf215546Sopenharmony_ci   uint32_t temp_index,
4097bf215546Sopenharmony_ci   uint64_t address,
4098bf215546Sopenharmony_ci   uint32_t count,
4099bf215546Sopenharmony_ci   uint32_t *next_constant,
4100bf215546Sopenharmony_ci   uint32_t *total_data_size,
4101bf215546Sopenharmony_ci   uint32_t *total_code_size,
4102bf215546Sopenharmony_ci   uint32_t *restrict buffer,
4103bf215546Sopenharmony_ci   bool data_fence,
4104bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
4105bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
4106bf215546Sopenharmony_ci{
4107bf215546Sopenharmony_ci   /* A single ld/ST here does NOT actually correspond to a single ld/ST
4108bf215546Sopenharmony_ci    * instruction, but may needs multiple ld/ST instructions because each ld/ST
4109bf215546Sopenharmony_ci    * instruction can only ld/ST a restricted max number of dwords which may
4110bf215546Sopenharmony_ci    * less than count passed here.
4111bf215546Sopenharmony_ci    */
4112bf215546Sopenharmony_ci
4113bf215546Sopenharmony_ci   uint32_t num_inst;
4114bf215546Sopenharmony_ci   uint32_t constant;
4115bf215546Sopenharmony_ci
4116bf215546Sopenharmony_ci   if (ld) {
4117bf215546Sopenharmony_ci      /* ld must operate on 64bits unit, and it needs to load from and to 128
4118bf215546Sopenharmony_ci       * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4119bf215546Sopenharmony_ci       * 1, 2, ...) times 64bits unit.
4120bf215546Sopenharmony_ci       */
4121bf215546Sopenharmony_ci      uint32_t per_inst_count = 0;
4122bf215546Sopenharmony_ci      uint32_t last_inst_count;
4123bf215546Sopenharmony_ci
4124bf215546Sopenharmony_ci      assert((gen_mode == PDS_GENERATE_SIZES) ||
4125bf215546Sopenharmony_ci             (((count % 2) == 0) && ((address % 16) == 0) &&
4126bf215546Sopenharmony_ci              (temp_index % 2) == 0));
4127bf215546Sopenharmony_ci
4128bf215546Sopenharmony_ci      count >>= 1;
4129bf215546Sopenharmony_ci      temp_index >>= 1;
4130bf215546Sopenharmony_ci
4131bf215546Sopenharmony_ci      /* Found out how many ld instructions are needed and ld size for the all
4132bf215546Sopenharmony_ci       * possible ld instructions.
4133bf215546Sopenharmony_ci       */
4134bf215546Sopenharmony_ci      if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4135bf215546Sopenharmony_ci         num_inst = 1;
4136bf215546Sopenharmony_ci         last_inst_count = count;
4137bf215546Sopenharmony_ci      } else {
4138bf215546Sopenharmony_ci         per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4139bf215546Sopenharmony_ci         if ((per_inst_count % 2) != 0)
4140bf215546Sopenharmony_ci            per_inst_count -= 1;
4141bf215546Sopenharmony_ci
4142bf215546Sopenharmony_ci         num_inst = count / per_inst_count;
4143bf215546Sopenharmony_ci         last_inst_count = count - per_inst_count * num_inst;
4144bf215546Sopenharmony_ci         num_inst += 1;
4145bf215546Sopenharmony_ci      }
4146bf215546Sopenharmony_ci
4147bf215546Sopenharmony_ci      /* Generate all the instructions. */
4148bf215546Sopenharmony_ci      for (uint32_t i = 0; i < num_inst; i++) {
4149bf215546Sopenharmony_ci         if ((i == (num_inst - 1)) && (last_inst_count == 0))
4150bf215546Sopenharmony_ci            break;
4151bf215546Sopenharmony_ci
4152bf215546Sopenharmony_ci         /* A single load instruction. */
4153bf215546Sopenharmony_ci         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4154bf215546Sopenharmony_ci
4155bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4156bf215546Sopenharmony_ci            uint64_t ld_src0 = 0;
4157bf215546Sopenharmony_ci
4158bf215546Sopenharmony_ci            ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4159bf215546Sopenharmony_ci                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4160bf215546Sopenharmony_ci            ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4161bf215546Sopenharmony_ci                                                        : per_inst_count) &
4162bf215546Sopenharmony_ci                         PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4163bf215546Sopenharmony_ci                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4164bf215546Sopenharmony_ci            ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4165bf215546Sopenharmony_ci                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4166bf215546Sopenharmony_ci
4167bf215546Sopenharmony_ci            if (!control) {
4168bf215546Sopenharmony_ci               ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4169bf215546Sopenharmony_ci
4170bf215546Sopenharmony_ci               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4171bf215546Sopenharmony_ci                  ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4172bf215546Sopenharmony_ci
4173bf215546Sopenharmony_ci            } else {
4174bf215546Sopenharmony_ci               ld_src0 |= control->cache_control_const;
4175bf215546Sopenharmony_ci            }
4176bf215546Sopenharmony_ci
4177bf215546Sopenharmony_ci            /* Write it to the constant. */
4178bf215546Sopenharmony_ci            pvr_pds_write_constant64(buffer,
4179bf215546Sopenharmony_ci                                     constant,
4180bf215546Sopenharmony_ci                                     (uint32_t)(ld_src0),
4181bf215546Sopenharmony_ci                                     (uint32_t)(ld_src0 >> 32));
4182bf215546Sopenharmony_ci
4183bf215546Sopenharmony_ci            /* Adjust value for next ld instruction. */
4184bf215546Sopenharmony_ci            temp_index += per_inst_count;
4185bf215546Sopenharmony_ci            address += (((uint64_t)(per_inst_count)) << 3);
4186bf215546Sopenharmony_ci         }
4187bf215546Sopenharmony_ci
4188bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4189bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4190bf215546Sopenharmony_ci
4191bf215546Sopenharmony_ci            if (data_fence)
4192bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_wdf(0);
4193bf215546Sopenharmony_ci         }
4194bf215546Sopenharmony_ci      }
4195bf215546Sopenharmony_ci   } else {
4196bf215546Sopenharmony_ci      /* ST needs source memory address to be 32bits aligned. */
4197bf215546Sopenharmony_ci      assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4198bf215546Sopenharmony_ci
4199bf215546Sopenharmony_ci      /* Found out how many ST instructions are needed, each ST can only store
4200bf215546Sopenharmony_ci       * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4201bf215546Sopenharmony_ci       */
4202bf215546Sopenharmony_ci      num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4203bf215546Sopenharmony_ci      num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4204bf215546Sopenharmony_ci
4205bf215546Sopenharmony_ci      /* Generate all the instructions. */
4206bf215546Sopenharmony_ci      for (uint32_t i = 0; i < num_inst; i++) {
4207bf215546Sopenharmony_ci         /* A single store instruction. */
4208bf215546Sopenharmony_ci         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4209bf215546Sopenharmony_ci
4210bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4211bf215546Sopenharmony_ci            uint32_t per_inst_count =
4212bf215546Sopenharmony_ci               (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4213bf215546Sopenharmony_ci                   ? count
4214bf215546Sopenharmony_ci                   : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4215bf215546Sopenharmony_ci            uint64_t st_src0 = 0;
4216bf215546Sopenharmony_ci
4217bf215546Sopenharmony_ci            st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4218bf215546Sopenharmony_ci                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4219bf215546Sopenharmony_ci            st_src0 |=
4220bf215546Sopenharmony_ci               (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4221bf215546Sopenharmony_ci                << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4222bf215546Sopenharmony_ci            st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4223bf215546Sopenharmony_ci                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4224bf215546Sopenharmony_ci
4225bf215546Sopenharmony_ci            if (!control) {
4226bf215546Sopenharmony_ci               st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4227bf215546Sopenharmony_ci
4228bf215546Sopenharmony_ci               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4229bf215546Sopenharmony_ci                  st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4230bf215546Sopenharmony_ci               }
4231bf215546Sopenharmony_ci
4232bf215546Sopenharmony_ci            } else {
4233bf215546Sopenharmony_ci               st_src0 |= control->cache_control_const;
4234bf215546Sopenharmony_ci            }
4235bf215546Sopenharmony_ci
4236bf215546Sopenharmony_ci            /* Write it to the constant. */
4237bf215546Sopenharmony_ci            pvr_pds_write_constant64(buffer,
4238bf215546Sopenharmony_ci                                     constant,
4239bf215546Sopenharmony_ci                                     (uint32_t)(st_src0),
4240bf215546Sopenharmony_ci                                     (uint32_t)(st_src0 >> 32));
4241bf215546Sopenharmony_ci
4242bf215546Sopenharmony_ci            /* Adjust value for next ST instruction. */
4243bf215546Sopenharmony_ci            temp_index += per_inst_count;
4244bf215546Sopenharmony_ci            count -= per_inst_count;
4245bf215546Sopenharmony_ci            address += (((uint64_t)(per_inst_count)) << 2);
4246bf215546Sopenharmony_ci         }
4247bf215546Sopenharmony_ci
4248bf215546Sopenharmony_ci         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4249bf215546Sopenharmony_ci            *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4250bf215546Sopenharmony_ci
4251bf215546Sopenharmony_ci            if (data_fence)
4252bf215546Sopenharmony_ci               *buffer++ = pvr_pds_inst_encode_wdf(0);
4253bf215546Sopenharmony_ci         }
4254bf215546Sopenharmony_ci      }
4255bf215546Sopenharmony_ci   }
4256bf215546Sopenharmony_ci
4257bf215546Sopenharmony_ci   (*total_code_size) += num_inst;
4258bf215546Sopenharmony_ci   if (data_fence)
4259bf215546Sopenharmony_ci      (*total_code_size) += num_inst;
4260bf215546Sopenharmony_ci
4261bf215546Sopenharmony_ci   if (gen_mode != PDS_GENERATE_SIZES)
4262bf215546Sopenharmony_ci      return buffer;
4263bf215546Sopenharmony_ci   return NULL;
4264bf215546Sopenharmony_ci}
4265bf215546Sopenharmony_ci
4266bf215546Sopenharmony_ci/**
4267bf215546Sopenharmony_ci * Generate programs used to prepare stream out, i.e., clear stream out buffer
4268bf215546Sopenharmony_ci * overflow flags and update Persistent temps by a ld instruction.
4269bf215546Sopenharmony_ci *
4270bf215546Sopenharmony_ci * This must be used in PPP state update.
4271bf215546Sopenharmony_ci *
4272bf215546Sopenharmony_ci * \param program Pointer to the stream out program.
4273bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
4274bf215546Sopenharmony_ci * \param store_mode If true then the data is stored to memory. If false then
4275bf215546Sopenharmony_ci *                   the data is loaded from memory.
4276bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
4277bf215546Sopenharmony_ci * \param dev_info PVR device information structure.
4278bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
4279bf215546Sopenharmony_ci */
4280bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_stream_out_init_program(
4281bf215546Sopenharmony_ci   struct pvr_pds_stream_out_init_program *restrict program,
4282bf215546Sopenharmony_ci   uint32_t *restrict buffer,
4283bf215546Sopenharmony_ci   bool store_mode,
4284bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
4285bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
4286bf215546Sopenharmony_ci{
4287bf215546Sopenharmony_ci   uint32_t total_data_size = 0;
4288bf215546Sopenharmony_ci   uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4289bf215546Sopenharmony_ci
4290bf215546Sopenharmony_ci   /* Start counting constants from 0. */
4291bf215546Sopenharmony_ci   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4292bf215546Sopenharmony_ci
4293bf215546Sopenharmony_ci   uint32_t total_code_size = 1;
4294bf215546Sopenharmony_ci
4295bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4296bf215546Sopenharmony_ci      /* We only need to clear global stream out predicate, other predicates
4297bf215546Sopenharmony_ci       * are not used during the stream out buffer overflow test.
4298bf215546Sopenharmony_ci       */
4299bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4300bf215546Sopenharmony_ci   }
4301bf215546Sopenharmony_ci
4302bf215546Sopenharmony_ci   for (uint32_t index = 0; index < program->num_buffers; index++) {
4303bf215546Sopenharmony_ci      if (program->dev_address_for_buffer_data[index] != 0) {
4304bf215546Sopenharmony_ci         /* Generate load/store program to load/store persistent temps. */
4305bf215546Sopenharmony_ci
4306bf215546Sopenharmony_ci         /* NOTE: store_mode == true case should be handled by
4307bf215546Sopenharmony_ci          * StreamOutTerminate.
4308bf215546Sopenharmony_ci          */
4309bf215546Sopenharmony_ci         buffer = pvr_pds_generate_single_ldst_instruction(
4310bf215546Sopenharmony_ci            !store_mode,
4311bf215546Sopenharmony_ci            NULL,
4312bf215546Sopenharmony_ci            PTDst,
4313bf215546Sopenharmony_ci            program->dev_address_for_buffer_data[index],
4314bf215546Sopenharmony_ci            program->pds_buffer_data_size[index],
4315bf215546Sopenharmony_ci            &next_constant,
4316bf215546Sopenharmony_ci            &total_data_size,
4317bf215546Sopenharmony_ci            &total_code_size,
4318bf215546Sopenharmony_ci            buffer,
4319bf215546Sopenharmony_ci            false,
4320bf215546Sopenharmony_ci            gen_mode,
4321bf215546Sopenharmony_ci            dev_info);
4322bf215546Sopenharmony_ci      }
4323bf215546Sopenharmony_ci
4324bf215546Sopenharmony_ci      PTDst += program->pds_buffer_data_size[index];
4325bf215546Sopenharmony_ci   }
4326bf215546Sopenharmony_ci
4327bf215546Sopenharmony_ci   total_code_size += 2;
4328bf215546Sopenharmony_ci
4329bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4330bf215546Sopenharmony_ci      /* We need to fence the loading. */
4331bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_wdf(0);
4332bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_halt(0);
4333bf215546Sopenharmony_ci   }
4334bf215546Sopenharmony_ci
4335bf215546Sopenharmony_ci   /* Save size information to program */
4336bf215546Sopenharmony_ci   program->stream_out_init_pds_data_size =
4337bf215546Sopenharmony_ci      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4338bf215546Sopenharmony_ci   /* PDS program code size. */
4339bf215546Sopenharmony_ci   program->stream_out_init_pds_code_size = total_code_size;
4340bf215546Sopenharmony_ci
4341bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4342bf215546Sopenharmony_ci      return buffer + program->stream_out_init_pds_data_size;
4343bf215546Sopenharmony_ci   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4344bf215546Sopenharmony_ci      return buffer;
4345bf215546Sopenharmony_ci
4346bf215546Sopenharmony_ci   return NULL;
4347bf215546Sopenharmony_ci}
4348bf215546Sopenharmony_ci
4349bf215546Sopenharmony_ci/**
4350bf215546Sopenharmony_ci * Generate stream out terminate program for stream out.
4351bf215546Sopenharmony_ci *
4352bf215546Sopenharmony_ci * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4353bf215546Sopenharmony_ci * will be stored.
4354bf215546Sopenharmony_ci *
4355bf215546Sopenharmony_ci * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4356bf215546Sopenharmony_ci * will be stored into memory.
4357bf215546Sopenharmony_ci *
4358bf215546Sopenharmony_ci * The stream out terminate program is used to update the PPP state and the data
4359bf215546Sopenharmony_ci * and code section cannot be separate.
4360bf215546Sopenharmony_ci *
4361bf215546Sopenharmony_ci * \param program Pointer to the stream out program.
4362bf215546Sopenharmony_ci * \param buffer Pointer to the buffer for the program.
4363bf215546Sopenharmony_ci * \param gen_mode Either code and data can be generated or sizes only updated.
4364bf215546Sopenharmony_ci * \param dev_info PVR device info structure.
4365bf215546Sopenharmony_ci * \returns Pointer to just beyond the buffer for the program.
4366bf215546Sopenharmony_ci */
4367bf215546Sopenharmony_ciuint32_t *pvr_pds_generate_stream_out_terminate_program(
4368bf215546Sopenharmony_ci   struct pvr_pds_stream_out_terminate_program *restrict program,
4369bf215546Sopenharmony_ci   uint32_t *restrict buffer,
4370bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
4371bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
4372bf215546Sopenharmony_ci{
4373bf215546Sopenharmony_ci   uint32_t next_constant;
4374bf215546Sopenharmony_ci   uint32_t total_data_size = 0, total_code_size = 0;
4375bf215546Sopenharmony_ci
4376bf215546Sopenharmony_ci   /* Start counting constants from 0. */
4377bf215546Sopenharmony_ci   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4378bf215546Sopenharmony_ci
4379bf215546Sopenharmony_ci   /* Generate store program to store persistent temps. */
4380bf215546Sopenharmony_ci   buffer = pvr_pds_generate_single_ldst_instruction(
4381bf215546Sopenharmony_ci      false,
4382bf215546Sopenharmony_ci      NULL,
4383bf215546Sopenharmony_ci      PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4384bf215546Sopenharmony_ci      program->dev_address_for_storing_persistent_temp,
4385bf215546Sopenharmony_ci      program->pds_persistent_temp_size_to_store,
4386bf215546Sopenharmony_ci      &next_constant,
4387bf215546Sopenharmony_ci      &total_data_size,
4388bf215546Sopenharmony_ci      &total_code_size,
4389bf215546Sopenharmony_ci      buffer,
4390bf215546Sopenharmony_ci      false,
4391bf215546Sopenharmony_ci      gen_mode,
4392bf215546Sopenharmony_ci      dev_info);
4393bf215546Sopenharmony_ci
4394bf215546Sopenharmony_ci   total_code_size += 2;
4395bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4396bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_wdf(0);
4397bf215546Sopenharmony_ci      *buffer++ = pvr_pds_inst_encode_halt(0);
4398bf215546Sopenharmony_ci   }
4399bf215546Sopenharmony_ci
4400bf215546Sopenharmony_ci   /* Save size information to program. */
4401bf215546Sopenharmony_ci   program->stream_out_terminate_pds_data_size =
4402bf215546Sopenharmony_ci      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4403bf215546Sopenharmony_ci   /* PDS program code size. */
4404bf215546Sopenharmony_ci   program->stream_out_terminate_pds_code_size = total_code_size;
4405bf215546Sopenharmony_ci
4406bf215546Sopenharmony_ci   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4407bf215546Sopenharmony_ci      return buffer + program->stream_out_terminate_pds_data_size;
4408bf215546Sopenharmony_ci   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4409bf215546Sopenharmony_ci      return buffer;
4410bf215546Sopenharmony_ci
4411bf215546Sopenharmony_ci   return NULL;
4412bf215546Sopenharmony_ci}
4413bf215546Sopenharmony_ci
4414bf215546Sopenharmony_ci/* DrawArrays works in several steps:
4415bf215546Sopenharmony_ci *
4416bf215546Sopenharmony_ci * 1) load data from draw_indirect buffer
4417bf215546Sopenharmony_ci * 2) tweak data to match hardware formats
4418bf215546Sopenharmony_ci * 3) write data to indexblock
4419bf215546Sopenharmony_ci * 4) signal the VDM to continue
4420bf215546Sopenharmony_ci *
4421bf215546Sopenharmony_ci * This is complicated by HW limitations on alignment, as well as a HWBRN.
4422bf215546Sopenharmony_ci *
4423bf215546Sopenharmony_ci * 1) Load data.
4424bf215546Sopenharmony_ci * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4425bf215546Sopenharmony_ci * spec we must deal with this by choosing an appropriate earlier address and
4426bf215546Sopenharmony_ci * loading enough dwords that we load the entirety of the buffer.
4427bf215546Sopenharmony_ci *
4428bf215546Sopenharmony_ci * if addr & 0xf:
4429bf215546Sopenharmony_ci *   load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4430bf215546Sopenharmony_ci *   data = tmp[0 + (uiAddr & 0xf) >> 2]...
4431bf215546Sopenharmony_ci * else
4432bf215546Sopenharmony_ci *   load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4433bf215546Sopenharmony_ci *   data = tmp[0]...
4434bf215546Sopenharmony_ci *
4435bf215546Sopenharmony_ci *
4436bf215546Sopenharmony_ci * 2) Tweak data.
4437bf215546Sopenharmony_ci * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4438bf215546Sopenharmony_ci * the VDM control stream. We must subtract 1 from the loaded primCount.
4439bf215546Sopenharmony_ci *
4440bf215546Sopenharmony_ci * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4441bf215546Sopenharmony_ci * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4442bf215546Sopenharmony_ci * into another tmp that has the correct alignment. Note: this is only required
4443bf215546Sopenharmony_ci * when data = tmp[even], as primCount is data+1:
4444bf215546Sopenharmony_ci *
4445bf215546Sopenharmony_ci * if data = tmp[even]:
4446bf215546Sopenharmony_ci *   primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4447bf215546Sopenharmony_ci * else:
4448bf215546Sopenharmony_ci *   primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4449bf215546Sopenharmony_ci *
4450bf215546Sopenharmony_ci * This boils down to:
4451bf215546Sopenharmony_ci *
4452bf215546Sopenharmony_ci * primCount = data[1]
4453bf215546Sopenharmony_ci * primCountSrc = data[1]
4454bf215546Sopenharmony_ci * if brn_present && (data is even):
4455bf215546Sopenharmony_ci *   mov scratch, primCount
4456bf215546Sopenharmony_ci *   primCountSrc = scratch
4457bf215546Sopenharmony_ci * endif
4458bf215546Sopenharmony_ci * sub primCount, primCountSrc, 1
4459bf215546Sopenharmony_ci *
4460bf215546Sopenharmony_ci * 3) Store Data.
4461bf215546Sopenharmony_ci * Write the now-tweaked data over the top of the indexblock.
4462bf215546Sopenharmony_ci * To ensure the write completes before the VDM re-reads the data, we must cause
4463bf215546Sopenharmony_ci * a data hazard by doing a dummy (dummy meaning we don't care about the
4464bf215546Sopenharmony_ci * returned data) load from the same addresses. Again, because the ld must
4465bf215546Sopenharmony_ci * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4466bf215546Sopenharmony_ci * index block is 128-bit aligned. This is the client driver's responsibility.
4467bf215546Sopenharmony_ci *
4468bf215546Sopenharmony_ci * st data[0, 1, 2] -> (idxblock + 4)
4469bf215546Sopenharmony_ci * load [idxblock] 4 dwords
4470bf215546Sopenharmony_ci *
4471bf215546Sopenharmony_ci * 4) Signal the VDM
4472bf215546Sopenharmony_ci * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4473bf215546Sopenharmony_ci * where it is currently fenced on a dummy idxblock that has been inserted by
4474bf215546Sopenharmony_ci * the driver.
4475bf215546Sopenharmony_ci */
4476bf215546Sopenharmony_ci
4477bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays0.h"
4478bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays1.h"
4479bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays2.h"
4480bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays3.h"
4481bf215546Sopenharmony_ci
4482bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance0.h"
4483bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance1.h"
4484bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance2.h"
4485bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance3.h"
4486bf215546Sopenharmony_ci
4487bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4488bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4489bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4490bf215546Sopenharmony_ci#include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4491bf215546Sopenharmony_ci
4492bf215546Sopenharmony_ci#define ENABLE_SLC_MCU_CACHE_CONTROLS(device)        \
4493bf215546Sopenharmony_ci   ((device)->features.has_slc_mcu_cache_controls    \
4494bf215546Sopenharmony_ci       ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4495bf215546Sopenharmony_ci       : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4496bf215546Sopenharmony_ci
4497bf215546Sopenharmony_civoid pvr_pds_generate_draw_arrays_indirect(
4498bf215546Sopenharmony_ci   struct pvr_pds_drawindirect_program *restrict program,
4499bf215546Sopenharmony_ci   uint32_t *restrict buffer,
4500bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
4501bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
4502bf215546Sopenharmony_ci{
4503bf215546Sopenharmony_ci   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4504bf215546Sopenharmony_ci       (gen_mode == PDS_GENERATE_SIZES)) {
4505bf215546Sopenharmony_ci      const struct pvr_psc_program_output *psc_program = NULL;
4506bf215546Sopenharmony_ci      switch ((program->arg_buffer >> 2) % 4) {
4507bf215546Sopenharmony_ci      case 0:
4508bf215546Sopenharmony_ci         if (program->support_base_instance) {
4509bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4510bf215546Sopenharmony_ci               psc_program =
4511bf215546Sopenharmony_ci                  &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4512bf215546Sopenharmony_ci            } else {
4513bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4514bf215546Sopenharmony_ci            }
4515bf215546Sopenharmony_ci         } else {
4516bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_arrays0_program;
4517bf215546Sopenharmony_ci         }
4518bf215546Sopenharmony_ci         break;
4519bf215546Sopenharmony_ci      case 1:
4520bf215546Sopenharmony_ci         if (program->support_base_instance) {
4521bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4522bf215546Sopenharmony_ci               psc_program =
4523bf215546Sopenharmony_ci                  &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4524bf215546Sopenharmony_ci            } else {
4525bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4526bf215546Sopenharmony_ci            }
4527bf215546Sopenharmony_ci         } else {
4528bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_arrays1_program;
4529bf215546Sopenharmony_ci         }
4530bf215546Sopenharmony_ci         break;
4531bf215546Sopenharmony_ci      case 2:
4532bf215546Sopenharmony_ci         if (program->support_base_instance) {
4533bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4534bf215546Sopenharmony_ci               psc_program =
4535bf215546Sopenharmony_ci                  &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4536bf215546Sopenharmony_ci            } else {
4537bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4538bf215546Sopenharmony_ci            }
4539bf215546Sopenharmony_ci         } else {
4540bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_arrays2_program;
4541bf215546Sopenharmony_ci         }
4542bf215546Sopenharmony_ci         break;
4543bf215546Sopenharmony_ci      case 3:
4544bf215546Sopenharmony_ci         if (program->support_base_instance) {
4545bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4546bf215546Sopenharmony_ci               psc_program =
4547bf215546Sopenharmony_ci                  &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4548bf215546Sopenharmony_ci            } else {
4549bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4550bf215546Sopenharmony_ci            }
4551bf215546Sopenharmony_ci         } else {
4552bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_arrays3_program;
4553bf215546Sopenharmony_ci         }
4554bf215546Sopenharmony_ci         break;
4555bf215546Sopenharmony_ci      }
4556bf215546Sopenharmony_ci
4557bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4558bf215546Sopenharmony_ci         memcpy(buffer,
4559bf215546Sopenharmony_ci                psc_program->code,
4560bf215546Sopenharmony_ci                psc_program->code_size * sizeof(uint32_t));
4561bf215546Sopenharmony_ci#if defined(DUMP_PDS)
4562bf215546Sopenharmony_ci         for (uint32_t i = 0; i < psc_program->code_size; i++)
4563bf215546Sopenharmony_ci            PVR_PDS_PRINT_INST(buffer[i]);
4564bf215546Sopenharmony_ci#endif
4565bf215546Sopenharmony_ci      }
4566bf215546Sopenharmony_ci
4567bf215546Sopenharmony_ci      program->program = *psc_program;
4568bf215546Sopenharmony_ci   } else {
4569bf215546Sopenharmony_ci      switch ((program->arg_buffer >> 2) % 4) {
4570bf215546Sopenharmony_ci      case 0:
4571bf215546Sopenharmony_ci         if (program->support_base_instance) {
4572bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4573bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4574bf215546Sopenharmony_ci                  buffer,
4575bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4576bf215546Sopenharmony_ci                  dev_info);
4577bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4578bf215546Sopenharmony_ci                  buffer,
4579bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4580bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4581bf215546Sopenharmony_ci                  buffer,
4582bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4583bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4584bf215546Sopenharmony_ci                  buffer,
4585bf215546Sopenharmony_ci                  program->num_views);
4586bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4587bf215546Sopenharmony_ci                  buffer);
4588bf215546Sopenharmony_ci            } else {
4589bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance0_di_data(
4590bf215546Sopenharmony_ci                  buffer,
4591bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4592bf215546Sopenharmony_ci                  dev_info);
4593bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4594bf215546Sopenharmony_ci                  buffer,
4595bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4596bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4597bf215546Sopenharmony_ci                  buffer,
4598bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4599bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance0_num_views(
4600bf215546Sopenharmony_ci                  buffer,
4601bf215546Sopenharmony_ci                  program->num_views);
4602bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4603bf215546Sopenharmony_ci            }
4604bf215546Sopenharmony_ci         } else {
4605bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays0_di_data(buffer,
4606bf215546Sopenharmony_ci                                                    program->arg_buffer &
4607bf215546Sopenharmony_ci                                                       ~0xfull,
4608bf215546Sopenharmony_ci                                                    dev_info);
4609bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays0_write_vdm(
4610bf215546Sopenharmony_ci               buffer,
4611bf215546Sopenharmony_ci               program->index_list_addr_buffer + 4);
4612bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays0_flush_vdm(
4613bf215546Sopenharmony_ci               buffer,
4614bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4615bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays0_num_views(buffer,
4616bf215546Sopenharmony_ci                                                      program->num_views);
4617bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays0_immediates(buffer);
4618bf215546Sopenharmony_ci         }
4619bf215546Sopenharmony_ci         break;
4620bf215546Sopenharmony_ci      case 1:
4621bf215546Sopenharmony_ci         if (program->support_base_instance) {
4622bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4623bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4624bf215546Sopenharmony_ci                  buffer,
4625bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4626bf215546Sopenharmony_ci                  dev_info);
4627bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4628bf215546Sopenharmony_ci                  buffer,
4629bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4630bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4631bf215546Sopenharmony_ci                  buffer,
4632bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4633bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4634bf215546Sopenharmony_ci                  buffer,
4635bf215546Sopenharmony_ci                  program->num_views);
4636bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4637bf215546Sopenharmony_ci                  buffer);
4638bf215546Sopenharmony_ci            } else {
4639bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance1_di_data(
4640bf215546Sopenharmony_ci                  buffer,
4641bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4642bf215546Sopenharmony_ci                  dev_info);
4643bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4644bf215546Sopenharmony_ci                  buffer,
4645bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4646bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4647bf215546Sopenharmony_ci                  buffer,
4648bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4649bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance1_num_views(
4650bf215546Sopenharmony_ci                  buffer,
4651bf215546Sopenharmony_ci                  program->num_views);
4652bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4653bf215546Sopenharmony_ci            }
4654bf215546Sopenharmony_ci         } else {
4655bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays1_di_data(buffer,
4656bf215546Sopenharmony_ci                                                    program->arg_buffer &
4657bf215546Sopenharmony_ci                                                       ~0xfull,
4658bf215546Sopenharmony_ci                                                    dev_info);
4659bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays1_write_vdm(
4660bf215546Sopenharmony_ci               buffer,
4661bf215546Sopenharmony_ci               program->index_list_addr_buffer + 4);
4662bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays1_flush_vdm(
4663bf215546Sopenharmony_ci               buffer,
4664bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4665bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays1_num_views(buffer,
4666bf215546Sopenharmony_ci                                                      program->num_views);
4667bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays1_immediates(buffer);
4668bf215546Sopenharmony_ci         }
4669bf215546Sopenharmony_ci         break;
4670bf215546Sopenharmony_ci      case 2:
4671bf215546Sopenharmony_ci         if (program->support_base_instance) {
4672bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4673bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4674bf215546Sopenharmony_ci                  buffer,
4675bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4676bf215546Sopenharmony_ci                  dev_info);
4677bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4678bf215546Sopenharmony_ci                  buffer,
4679bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4680bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4681bf215546Sopenharmony_ci                  buffer,
4682bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4683bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4684bf215546Sopenharmony_ci                  buffer,
4685bf215546Sopenharmony_ci                  program->num_views);
4686bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4687bf215546Sopenharmony_ci                  buffer);
4688bf215546Sopenharmony_ci            } else {
4689bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance2_di_data(
4690bf215546Sopenharmony_ci                  buffer,
4691bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4692bf215546Sopenharmony_ci                  dev_info);
4693bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4694bf215546Sopenharmony_ci                  buffer,
4695bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4696bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4697bf215546Sopenharmony_ci                  buffer,
4698bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4699bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance2_num_views(
4700bf215546Sopenharmony_ci                  buffer,
4701bf215546Sopenharmony_ci                  program->num_views);
4702bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4703bf215546Sopenharmony_ci            }
4704bf215546Sopenharmony_ci         } else {
4705bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays2_di_data(buffer,
4706bf215546Sopenharmony_ci                                                    program->arg_buffer &
4707bf215546Sopenharmony_ci                                                       ~0xfull,
4708bf215546Sopenharmony_ci                                                    dev_info);
4709bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays2_write_vdm(
4710bf215546Sopenharmony_ci               buffer,
4711bf215546Sopenharmony_ci               program->index_list_addr_buffer + 4);
4712bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays2_flush_vdm(
4713bf215546Sopenharmony_ci               buffer,
4714bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4715bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays2_num_views(buffer,
4716bf215546Sopenharmony_ci                                                      program->num_views);
4717bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays2_immediates(buffer);
4718bf215546Sopenharmony_ci         }
4719bf215546Sopenharmony_ci         break;
4720bf215546Sopenharmony_ci      case 3:
4721bf215546Sopenharmony_ci         if (program->support_base_instance) {
4722bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4723bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4724bf215546Sopenharmony_ci                  buffer,
4725bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4726bf215546Sopenharmony_ci                  dev_info);
4727bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4728bf215546Sopenharmony_ci                  buffer,
4729bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4730bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4731bf215546Sopenharmony_ci                  buffer,
4732bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4733bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4734bf215546Sopenharmony_ci                  buffer,
4735bf215546Sopenharmony_ci                  program->num_views);
4736bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4737bf215546Sopenharmony_ci                  buffer);
4738bf215546Sopenharmony_ci            } else {
4739bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance3_di_data(
4740bf215546Sopenharmony_ci                  buffer,
4741bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4742bf215546Sopenharmony_ci                  dev_info);
4743bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4744bf215546Sopenharmony_ci                  buffer,
4745bf215546Sopenharmony_ci                  program->index_list_addr_buffer + 4);
4746bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4747bf215546Sopenharmony_ci                  buffer,
4748bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4749bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance3_num_views(
4750bf215546Sopenharmony_ci                  buffer,
4751bf215546Sopenharmony_ci                  program->num_views);
4752bf215546Sopenharmony_ci               pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4753bf215546Sopenharmony_ci            }
4754bf215546Sopenharmony_ci         } else {
4755bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays3_di_data(buffer,
4756bf215546Sopenharmony_ci                                                    program->arg_buffer &
4757bf215546Sopenharmony_ci                                                       ~0xfull,
4758bf215546Sopenharmony_ci                                                    dev_info);
4759bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays3_write_vdm(
4760bf215546Sopenharmony_ci               buffer,
4761bf215546Sopenharmony_ci               program->index_list_addr_buffer + 4);
4762bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays3_flush_vdm(
4763bf215546Sopenharmony_ci               buffer,
4764bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4765bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays3_num_views(buffer,
4766bf215546Sopenharmony_ci                                                      program->num_views);
4767bf215546Sopenharmony_ci            pvr_write_draw_indirect_arrays3_immediates(buffer);
4768bf215546Sopenharmony_ci         }
4769bf215546Sopenharmony_ci         break;
4770bf215546Sopenharmony_ci      }
4771bf215546Sopenharmony_ci   }
4772bf215546Sopenharmony_ci}
4773bf215546Sopenharmony_ci
4774bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements0.h"
4775bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements1.h"
4776bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements2.h"
4777bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements3.h"
4778bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance0.h"
4779bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance1.h"
4780bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance2.h"
4781bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance3.h"
4782bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4783bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4784bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4785bf215546Sopenharmony_ci#include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4786bf215546Sopenharmony_ci
4787bf215546Sopenharmony_civoid pvr_pds_generate_draw_elements_indirect(
4788bf215546Sopenharmony_ci   struct pvr_pds_drawindirect_program *restrict program,
4789bf215546Sopenharmony_ci   uint32_t *restrict buffer,
4790bf215546Sopenharmony_ci   enum pvr_pds_generate_mode gen_mode,
4791bf215546Sopenharmony_ci   const struct pvr_device_info *dev_info)
4792bf215546Sopenharmony_ci{
4793bf215546Sopenharmony_ci   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4794bf215546Sopenharmony_ci       (gen_mode == PDS_GENERATE_SIZES)) {
4795bf215546Sopenharmony_ci      const struct pvr_psc_program_output *psc_program = NULL;
4796bf215546Sopenharmony_ci      switch ((program->arg_buffer >> 2) % 4) {
4797bf215546Sopenharmony_ci      case 0:
4798bf215546Sopenharmony_ci         if (program->support_base_instance) {
4799bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4800bf215546Sopenharmony_ci               psc_program =
4801bf215546Sopenharmony_ci                  &pvr_draw_indirect_elements_base_instance_drawid0_program;
4802bf215546Sopenharmony_ci            } else {
4803bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4804bf215546Sopenharmony_ci            }
4805bf215546Sopenharmony_ci         } else {
4806bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_elements0_program;
4807bf215546Sopenharmony_ci         }
4808bf215546Sopenharmony_ci         break;
4809bf215546Sopenharmony_ci      case 1:
4810bf215546Sopenharmony_ci         if (program->support_base_instance) {
4811bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4812bf215546Sopenharmony_ci               psc_program =
4813bf215546Sopenharmony_ci                  &pvr_draw_indirect_elements_base_instance_drawid1_program;
4814bf215546Sopenharmony_ci            } else {
4815bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4816bf215546Sopenharmony_ci            }
4817bf215546Sopenharmony_ci         } else {
4818bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_elements1_program;
4819bf215546Sopenharmony_ci         }
4820bf215546Sopenharmony_ci         break;
4821bf215546Sopenharmony_ci      case 2:
4822bf215546Sopenharmony_ci         if (program->support_base_instance) {
4823bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4824bf215546Sopenharmony_ci               psc_program =
4825bf215546Sopenharmony_ci                  &pvr_draw_indirect_elements_base_instance_drawid2_program;
4826bf215546Sopenharmony_ci            } else {
4827bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4828bf215546Sopenharmony_ci            }
4829bf215546Sopenharmony_ci         } else {
4830bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_elements2_program;
4831bf215546Sopenharmony_ci         }
4832bf215546Sopenharmony_ci         break;
4833bf215546Sopenharmony_ci      case 3:
4834bf215546Sopenharmony_ci         if (program->support_base_instance) {
4835bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4836bf215546Sopenharmony_ci               psc_program =
4837bf215546Sopenharmony_ci                  &pvr_draw_indirect_elements_base_instance_drawid3_program;
4838bf215546Sopenharmony_ci            } else {
4839bf215546Sopenharmony_ci               psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4840bf215546Sopenharmony_ci            }
4841bf215546Sopenharmony_ci         } else {
4842bf215546Sopenharmony_ci            psc_program = &pvr_draw_indirect_elements3_program;
4843bf215546Sopenharmony_ci         }
4844bf215546Sopenharmony_ci         break;
4845bf215546Sopenharmony_ci      }
4846bf215546Sopenharmony_ci
4847bf215546Sopenharmony_ci      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4848bf215546Sopenharmony_ci         memcpy(buffer,
4849bf215546Sopenharmony_ci                psc_program->code,
4850bf215546Sopenharmony_ci                psc_program->code_size * sizeof(uint32_t));
4851bf215546Sopenharmony_ci
4852bf215546Sopenharmony_ci#if defined(DUMP_PDS)
4853bf215546Sopenharmony_ci         for (uint32_t i = 0; i < psc_program->code_size; i++)
4854bf215546Sopenharmony_ci            PVR_PDS_PRINT_INST(buffer[i]);
4855bf215546Sopenharmony_ci#endif
4856bf215546Sopenharmony_ci      }
4857bf215546Sopenharmony_ci
4858bf215546Sopenharmony_ci      program->program = *psc_program;
4859bf215546Sopenharmony_ci   } else {
4860bf215546Sopenharmony_ci      switch ((program->arg_buffer >> 2) % 4) {
4861bf215546Sopenharmony_ci      case 0:
4862bf215546Sopenharmony_ci         if (program->support_base_instance) {
4863bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4864bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4865bf215546Sopenharmony_ci                  buffer,
4866bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4867bf215546Sopenharmony_ci                  dev_info);
4868bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4869bf215546Sopenharmony_ci                  buffer,
4870bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4871bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4872bf215546Sopenharmony_ci                  buffer,
4873bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4874bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4875bf215546Sopenharmony_ci                  buffer,
4876bf215546Sopenharmony_ci                  program->num_views);
4877bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4878bf215546Sopenharmony_ci                  buffer,
4879bf215546Sopenharmony_ci                  program->index_stride);
4880bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4881bf215546Sopenharmony_ci                  buffer,
4882bf215546Sopenharmony_ci                  program->index_buffer);
4883bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4884bf215546Sopenharmony_ci                  buffer,
4885bf215546Sopenharmony_ci                  program->index_block_header);
4886bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4887bf215546Sopenharmony_ci                  buffer);
4888bf215546Sopenharmony_ci            } else {
4889bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_di_data(
4890bf215546Sopenharmony_ci                  buffer,
4891bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4892bf215546Sopenharmony_ci                  dev_info);
4893bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4894bf215546Sopenharmony_ci                  buffer,
4895bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4896bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4897bf215546Sopenharmony_ci                  buffer,
4898bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4899bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_num_views(
4900bf215546Sopenharmony_ci                  buffer,
4901bf215546Sopenharmony_ci                  program->num_views);
4902bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4903bf215546Sopenharmony_ci                  buffer,
4904bf215546Sopenharmony_ci                  program->index_stride);
4905bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_idx_base(
4906bf215546Sopenharmony_ci                  buffer,
4907bf215546Sopenharmony_ci                  program->index_buffer);
4908bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_idx_header(
4909bf215546Sopenharmony_ci                  buffer,
4910bf215546Sopenharmony_ci                  program->index_block_header);
4911bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance0_immediates(
4912bf215546Sopenharmony_ci                  buffer);
4913bf215546Sopenharmony_ci            }
4914bf215546Sopenharmony_ci         } else {
4915bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_di_data(buffer,
4916bf215546Sopenharmony_ci                                                      program->arg_buffer &
4917bf215546Sopenharmony_ci                                                         ~0xfull,
4918bf215546Sopenharmony_ci                                                      dev_info);
4919bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_write_vdm(
4920bf215546Sopenharmony_ci               buffer,
4921bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4922bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_flush_vdm(
4923bf215546Sopenharmony_ci               buffer,
4924bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4925bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_num_views(buffer,
4926bf215546Sopenharmony_ci                                                        program->num_views);
4927bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_idx_stride(buffer,
4928bf215546Sopenharmony_ci                                                         program->index_stride);
4929bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_idx_base(buffer,
4930bf215546Sopenharmony_ci                                                       program->index_buffer);
4931bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_idx_header(
4932bf215546Sopenharmony_ci               buffer,
4933bf215546Sopenharmony_ci               program->index_block_header);
4934bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements0_immediates(buffer);
4935bf215546Sopenharmony_ci         }
4936bf215546Sopenharmony_ci         break;
4937bf215546Sopenharmony_ci      case 1:
4938bf215546Sopenharmony_ci         if (program->support_base_instance) {
4939bf215546Sopenharmony_ci            if (program->increment_draw_id) {
4940bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4941bf215546Sopenharmony_ci                  buffer,
4942bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4943bf215546Sopenharmony_ci                  dev_info);
4944bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4945bf215546Sopenharmony_ci                  buffer,
4946bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4947bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4948bf215546Sopenharmony_ci                  buffer,
4949bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4950bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4951bf215546Sopenharmony_ci                  buffer,
4952bf215546Sopenharmony_ci                  program->num_views);
4953bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4954bf215546Sopenharmony_ci                  buffer,
4955bf215546Sopenharmony_ci                  program->index_stride);
4956bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4957bf215546Sopenharmony_ci                  buffer,
4958bf215546Sopenharmony_ci                  program->index_buffer);
4959bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4960bf215546Sopenharmony_ci                  buffer,
4961bf215546Sopenharmony_ci                  program->index_block_header);
4962bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4963bf215546Sopenharmony_ci                  buffer);
4964bf215546Sopenharmony_ci            } else {
4965bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_di_data(
4966bf215546Sopenharmony_ci                  buffer,
4967bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
4968bf215546Sopenharmony_ci                  dev_info);
4969bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4970bf215546Sopenharmony_ci                  buffer,
4971bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4972bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4973bf215546Sopenharmony_ci                  buffer,
4974bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
4975bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_num_views(
4976bf215546Sopenharmony_ci                  buffer,
4977bf215546Sopenharmony_ci                  program->num_views);
4978bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4979bf215546Sopenharmony_ci                  buffer,
4980bf215546Sopenharmony_ci                  program->index_stride);
4981bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_idx_base(
4982bf215546Sopenharmony_ci                  buffer,
4983bf215546Sopenharmony_ci                  program->index_buffer);
4984bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_idx_header(
4985bf215546Sopenharmony_ci                  buffer,
4986bf215546Sopenharmony_ci                  program->index_block_header);
4987bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance1_immediates(
4988bf215546Sopenharmony_ci                  buffer);
4989bf215546Sopenharmony_ci            }
4990bf215546Sopenharmony_ci         } else {
4991bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_di_data(buffer,
4992bf215546Sopenharmony_ci                                                      program->arg_buffer &
4993bf215546Sopenharmony_ci                                                         ~0xfull,
4994bf215546Sopenharmony_ci                                                      dev_info);
4995bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_write_vdm(
4996bf215546Sopenharmony_ci               buffer,
4997bf215546Sopenharmony_ci               program->index_list_addr_buffer);
4998bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_flush_vdm(
4999bf215546Sopenharmony_ci               buffer,
5000bf215546Sopenharmony_ci               program->index_list_addr_buffer);
5001bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_num_views(buffer,
5002bf215546Sopenharmony_ci                                                        program->num_views);
5003bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_idx_stride(buffer,
5004bf215546Sopenharmony_ci                                                         program->index_stride);
5005bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_idx_base(buffer,
5006bf215546Sopenharmony_ci                                                       program->index_buffer);
5007bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_idx_header(
5008bf215546Sopenharmony_ci               buffer,
5009bf215546Sopenharmony_ci               program->index_block_header);
5010bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements1_immediates(buffer);
5011bf215546Sopenharmony_ci         }
5012bf215546Sopenharmony_ci         break;
5013bf215546Sopenharmony_ci      case 2:
5014bf215546Sopenharmony_ci         if (program->support_base_instance) {
5015bf215546Sopenharmony_ci            if (program->increment_draw_id) {
5016bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
5017bf215546Sopenharmony_ci                  buffer,
5018bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
5019bf215546Sopenharmony_ci                  dev_info);
5020bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
5021bf215546Sopenharmony_ci                  buffer,
5022bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5023bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5024bf215546Sopenharmony_ci                  buffer,
5025bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5026bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5027bf215546Sopenharmony_ci                  buffer,
5028bf215546Sopenharmony_ci                  program->num_views);
5029bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5030bf215546Sopenharmony_ci                  buffer,
5031bf215546Sopenharmony_ci                  program->index_stride);
5032bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5033bf215546Sopenharmony_ci                  buffer,
5034bf215546Sopenharmony_ci                  program->index_buffer);
5035bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5036bf215546Sopenharmony_ci                  buffer,
5037bf215546Sopenharmony_ci                  program->index_block_header);
5038bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5039bf215546Sopenharmony_ci                  buffer);
5040bf215546Sopenharmony_ci            } else {
5041bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_di_data(
5042bf215546Sopenharmony_ci                  buffer,
5043bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
5044bf215546Sopenharmony_ci                  dev_info);
5045bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5046bf215546Sopenharmony_ci                  buffer,
5047bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5048bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5049bf215546Sopenharmony_ci                  buffer,
5050bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5051bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_num_views(
5052bf215546Sopenharmony_ci                  buffer,
5053bf215546Sopenharmony_ci                  program->num_views);
5054bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5055bf215546Sopenharmony_ci                  buffer,
5056bf215546Sopenharmony_ci                  program->index_stride);
5057bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_idx_base(
5058bf215546Sopenharmony_ci                  buffer,
5059bf215546Sopenharmony_ci                  program->index_buffer);
5060bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_idx_header(
5061bf215546Sopenharmony_ci                  buffer,
5062bf215546Sopenharmony_ci                  program->index_block_header);
5063bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance2_immediates(
5064bf215546Sopenharmony_ci                  buffer);
5065bf215546Sopenharmony_ci            }
5066bf215546Sopenharmony_ci         } else {
5067bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_di_data(buffer,
5068bf215546Sopenharmony_ci                                                      program->arg_buffer &
5069bf215546Sopenharmony_ci                                                         ~0xfull,
5070bf215546Sopenharmony_ci                                                      dev_info);
5071bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_write_vdm(
5072bf215546Sopenharmony_ci               buffer,
5073bf215546Sopenharmony_ci               program->index_list_addr_buffer);
5074bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_flush_vdm(
5075bf215546Sopenharmony_ci               buffer,
5076bf215546Sopenharmony_ci               program->index_list_addr_buffer);
5077bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_num_views(buffer,
5078bf215546Sopenharmony_ci                                                        program->num_views);
5079bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_idx_stride(buffer,
5080bf215546Sopenharmony_ci                                                         program->index_stride);
5081bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_idx_base(buffer,
5082bf215546Sopenharmony_ci                                                       program->index_buffer);
5083bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_idx_header(
5084bf215546Sopenharmony_ci               buffer,
5085bf215546Sopenharmony_ci               program->index_block_header);
5086bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements2_immediates(buffer);
5087bf215546Sopenharmony_ci         }
5088bf215546Sopenharmony_ci         break;
5089bf215546Sopenharmony_ci      case 3:
5090bf215546Sopenharmony_ci         if (program->support_base_instance) {
5091bf215546Sopenharmony_ci            if (program->increment_draw_id) {
5092bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5093bf215546Sopenharmony_ci                  buffer,
5094bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
5095bf215546Sopenharmony_ci                  dev_info);
5096bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5097bf215546Sopenharmony_ci                  buffer,
5098bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5099bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5100bf215546Sopenharmony_ci                  buffer,
5101bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5102bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5103bf215546Sopenharmony_ci                  buffer,
5104bf215546Sopenharmony_ci                  program->num_views);
5105bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5106bf215546Sopenharmony_ci                  buffer,
5107bf215546Sopenharmony_ci                  program->index_stride);
5108bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5109bf215546Sopenharmony_ci                  buffer,
5110bf215546Sopenharmony_ci                  program->index_buffer);
5111bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5112bf215546Sopenharmony_ci                  buffer,
5113bf215546Sopenharmony_ci                  program->index_block_header);
5114bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5115bf215546Sopenharmony_ci                  buffer);
5116bf215546Sopenharmony_ci            } else {
5117bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_di_data(
5118bf215546Sopenharmony_ci                  buffer,
5119bf215546Sopenharmony_ci                  program->arg_buffer & ~0xfull,
5120bf215546Sopenharmony_ci                  dev_info);
5121bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5122bf215546Sopenharmony_ci                  buffer,
5123bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5124bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5125bf215546Sopenharmony_ci                  buffer,
5126bf215546Sopenharmony_ci                  program->index_list_addr_buffer);
5127bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_num_views(
5128bf215546Sopenharmony_ci                  buffer,
5129bf215546Sopenharmony_ci                  program->num_views);
5130bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5131bf215546Sopenharmony_ci                  buffer,
5132bf215546Sopenharmony_ci                  program->index_stride);
5133bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_idx_base(
5134bf215546Sopenharmony_ci                  buffer,
5135bf215546Sopenharmony_ci                  program->index_buffer);
5136bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_idx_header(
5137bf215546Sopenharmony_ci                  buffer,
5138bf215546Sopenharmony_ci                  program->index_block_header);
5139bf215546Sopenharmony_ci               pvr_write_draw_indirect_elements_base_instance3_immediates(
5140bf215546Sopenharmony_ci                  buffer);
5141bf215546Sopenharmony_ci            }
5142bf215546Sopenharmony_ci         } else {
5143bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_di_data(buffer,
5144bf215546Sopenharmony_ci                                                      program->arg_buffer &
5145bf215546Sopenharmony_ci                                                         ~0xfull,
5146bf215546Sopenharmony_ci                                                      dev_info);
5147bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_write_vdm(
5148bf215546Sopenharmony_ci               buffer,
5149bf215546Sopenharmony_ci               program->index_list_addr_buffer);
5150bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_flush_vdm(
5151bf215546Sopenharmony_ci               buffer,
5152bf215546Sopenharmony_ci               program->index_list_addr_buffer);
5153bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_num_views(buffer,
5154bf215546Sopenharmony_ci                                                        program->num_views);
5155bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_idx_stride(buffer,
5156bf215546Sopenharmony_ci                                                         program->index_stride);
5157bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_idx_base(buffer,
5158bf215546Sopenharmony_ci                                                       program->index_buffer);
5159bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_idx_header(
5160bf215546Sopenharmony_ci               buffer,
5161bf215546Sopenharmony_ci               program->index_block_header);
5162bf215546Sopenharmony_ci            pvr_write_draw_indirect_elements3_immediates(buffer);
5163bf215546Sopenharmony_ci         }
5164bf215546Sopenharmony_ci         break;
5165bf215546Sopenharmony_ci      }
5166bf215546Sopenharmony_ci   }
5167bf215546Sopenharmony_ci}
5168