1/*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef PVR_PDS_H
25#define PVR_PDS_H
26
27#include <stdbool.h>
28
29#include "pvr_device_info.h"
30#include "pvr_limits.h"
31#include "pds/pvr_rogue_pds_defs.h"
32#include "util/macros.h"
33
34#ifdef __cplusplus
35#   define restrict __restrict__
36#endif
37
38/*****************************************************************************
39 Macro definitions
40*****************************************************************************/
41
42/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
43 * might be emitted.
44 */
45#define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6
46/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
47 * might be emitted.
48 */
49#define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3
50/* Based on max(max(UBOs,cbuffers), numTextures). */
51#define PVR_PDS_MAX_NUM_DMA_KICKS 32
52#define PVR_PDS_NUM_VERTEX_STREAMS 32
53#define PVR_PDS_NUM_VERTEX_ELEMENTS 32
54#define PVR_MAXIMUM_ITERATIONS 128
55
56#define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3
57
58#define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)       \
59   PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \
60      !PVR_HAS_ERN(dev_info, 45493)
61
62/* FIXME: Change BIL to SPV. */
63/* Any variable location can have at most 4 32-bit components. */
64#define BIL_COMPONENTS_PER_LOCATION 4
65
66/* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per
67 * attribute).
68 */
69#define PVR_MAX_VERTEX_ATTRIB_DMAS \
70   (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION)
71
72/*****************************************************************************
73 Typedefs
74*****************************************************************************/
75
76/* FIXME: We might need to change some bools to this. */
77typedef uint32_t PVR_PDS_BOOL;
78
79/*****************************************************************************
80 Enums
81*****************************************************************************/
82
83enum pvr_pds_generate_mode {
84   PDS_GENERATE_SIZES,
85   PDS_GENERATE_CODE_SEGMENT,
86   PDS_GENERATE_DATA_SEGMENT,
87   PDS_GENERATE_CODEDATA_SEGMENTS
88};
89
90enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE };
91
92enum pvr_pds_vertex_attrib_program_type {
93   PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC,
94   PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE,
95   PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT,
96   PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT
97};
98
99/*****************************************************************************
100 Structure definitions
101*****************************************************************************/
102
103struct pvr_psc_register {
104   uint32_t num;
105
106   unsigned int size; /* size of each element. */
107   unsigned int dim : 4; /* max number of elements. */
108   unsigned int index; /* offset into array. */
109
110   unsigned int cast;
111
112   unsigned int type;
113   uint64_t name;
114   bool auto_assign;
115   unsigned int original_type;
116};
117
118struct pvr_psc_program_output {
119   const uint32_t *code;
120
121   struct pvr_psc_register *data;
122   unsigned int data_count;
123
124   unsigned int data_size_aligned;
125   unsigned int code_size_aligned;
126   unsigned int temp_size_aligned;
127
128   unsigned int data_size;
129   unsigned int code_size;
130   unsigned int temp_size;
131
132   void (*write_data)(void *data, uint32_t *buffer);
133};
134
135struct pvr_pds_usc_task_control {
136   uint64_t src0;
137};
138
139/* Up to 4 64-bit state words currently supported. */
140#define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4
141
142/* Structure for DOUTW. */
143struct pvr_pds_doutw_control {
144   enum pvr_pds_store_type dest_store;
145   uint32_t num_const64;
146   uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
147   bool last_instruction;
148
149   uint32_t *data_segment;
150   uint32_t data_size;
151   uint32_t code_size;
152};
153
154/* Structure representing the PDS pixel event program.
155 *
156 * data_segment - pointer to the data segment
157 * task_control - USC task control words
158 * emit_words - array of Emit words
159 * data_size - size of data segment
160 * code_size - size of code segment
161 */
162struct pvr_pds_event_program {
163   uint32_t *data_segment;
164   struct pvr_pds_usc_task_control task_control;
165
166   uint32_t num_emit_word_pairs;
167   uint32_t *emit_words;
168
169   uint32_t data_size;
170   uint32_t code_size;
171};
172
173/*
174 * Structure representing the PDS pixel shader secondary attribute program.
175 *
176 * data_segment - pointer to the data segment
177 *
178 * num_uniform_dma_kicks - number of Uniform DMA kicks
179 * uniform_dma_control - array of Uniform DMA control words
180 * uniform_dma_address - array of Uniform DMA address words
181 *
182 * num_texture_dma_kicks - number of Texture State DMA kicks
183 * texture_dma_control - array of Texture State DMA control words
184 * texture_dma_address - array of Texture State DMA address words
185 *
186 * data_size - size of data segment
187 * code_size - size of code segment
188 *
189 * temps_used - PDS Temps
190 */
191struct pvr_pds_pixel_shader_sa_program {
192   uint32_t *data_segment;
193
194   uint32_t num_dword_doutw;
195   uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
196   uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
197
198   uint32_t num_q_word_doutw;
199   uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
200   uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
201
202   uint32_t num_uniform_dma_kicks;
203   uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
204   uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
205
206   uint32_t num_texture_dma_kicks;
207   uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
208   uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
209
210   bool kick_usc;
211   bool write_tile_position;
212   uint32_t tile_position_attr_dest;
213   struct pvr_pds_usc_task_control usc_task_control;
214
215   bool clear;
216   uint32_t *clear_color;
217   uint32_t clear_color_dest_reg;
218   bool packed_clear;
219
220   uint32_t data_size;
221   uint32_t code_size;
222
223   uint32_t temps_used;
224};
225
226/* Structure representing the PDS pixel shader program.
227 *
228 * data_segment - pointer to the data segment
229 * usc_task_control - array of USC task control words
230 *
231 * data_size - size of data segment
232 * code_size - size of code segment
233 */
234struct pvr_pds_kickusc_program {
235   uint32_t *data_segment;
236   struct pvr_pds_usc_task_control usc_task_control;
237
238   uint32_t data_size;
239   uint32_t code_size;
240};
241
242/* Structure representing the PDS fence/doutc program.
243 *
244 * data_segment - pointer to the data segment
245 * data_size - size of data segment
246 * code_size - size of code segment
247 */
248struct pvr_pds_fence_program {
249   uint32_t *data_segment;
250   uint32_t fence_constant_word;
251   uint32_t data_size;
252   uint32_t code_size;
253};
254
255/* Structure representing the PDS coefficient loading.
256 *
257 * data_segment - pointer to the data segment
258 * num_fpu_iterators - number of FPU iterators
259 * FPU_iterators - array of FPU iterator control words
260 * destination - array of Common Store destinations
261 *
262 * data_size - size of data segment
263 * code_size - size of code segment
264 */
265struct pvr_pds_coeff_loading_program {
266   uint32_t *data_segment;
267   uint32_t num_fpu_iterators;
268   uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS];
269   uint32_t destination[PVR_MAXIMUM_ITERATIONS];
270
271   uint32_t data_size;
272   uint32_t code_size;
273
274   uint32_t temps_used;
275};
276
277/* Structure representing the PDS vertex shader secondary attribute program.
278 *
279 * data_segment - pointer to the data segment
280 * num_dma_kicks - number of DMA kicks
281 * dma_control - array of DMA control words
282 * dma_address - array of DMA address words
283 *
284 * data_size - size of data segment
285 * code_size - size of code segment
286 */
287struct pvr_pds_vertex_shader_sa_program {
288   uint32_t *data_segment;
289
290   /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not
291    * used for generating PDS data section and code section, they are currently
292    * only used to simpler the driver implementation. The driver should correct
293    * these information into num_dma_kicks, dma_address and dma_control to get
294    * the PDS properly generated.
295    */
296
297   uint32_t num_dword_doutw;
298   uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
299   uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
300
301   uint32_t num_q_word_doutw;
302   uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
303   uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
304
305   uint32_t num_uniform_dma_kicks;
306   uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
307   uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
308
309   uint32_t num_texture_dma_kicks;
310   uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
311   uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
312
313   uint32_t num_dma_kicks;
314   uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
315   uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
316
317   bool kick_usc;
318   struct pvr_pds_usc_task_control usc_task_control;
319
320   /* Shared register buffer base address (VDM/CDM context load case only). */
321   bool clear_pds_barrier;
322
323   uint32_t data_size;
324   uint32_t code_size;
325};
326
327/* Structure representing a PDS vertex stream element.
328 *
329 * There are two types of element, repeat DMA and non-repeat DMA.
330 *
331 * Non repeat DMA are the classic DMA of some number of bytes from an offset
332 * into contiguous registers. It is assumed the address and size are dword
333 * aligned. To use this, specify 0 for the component size. Each four bytes read
334 * will go to the next HW register.
335 *
336 * Repeat DMA enables copying of sub dword amounts at non dword aligned
337 * addresses. To use this, specify the component size as either 1,2,3 or 4
338 * bytes. Size specifies the number of components, and each component read
339 * will go to the next HW register.
340 *
341 * In both cases, HW registers are written contiguously.
342 *
343 * offset - offset of the vertex stream element
344 * size - size of the vertex stream element in bytes for non repeat DMA, or
345 *        number of components for repeat DMA.
346 * reg - first vertex stream element register to DMA to.
347 * component_size - Size of component for repeat DMA, or 0 for non repeat dma.
348 */
349struct pvr_pds_vertex_element {
350   uint32_t offset;
351   uint32_t size;
352   uint16_t reg;
353   uint16_t component_size;
354};
355
356/* Structure representing a PDS vertex stream.
357 *
358 * instance_data - flag whether the vertex stream is indexed or instance data
359 * read_back - If True, vertex is reading back data output by GPU earlier in
360 *             same kick. This will enable MCU coherency if relevant.
361 * multiplier - vertex stream frequency multiplier
362 * shift - vertex stream frequency shift
363 * address - vertex stream address in bytes
364 * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced
365 *                        from buffer object
366 * stride - vertex stream stride in bytes
367 * num_vertices - number of vertices in buffer. Used for OOB checking.
368                - 0 = disable oob checking.
369 * num_elements - number of vertex stream elements
370 * elements - array of vertex stream elements
371 * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing
372 *              DDMADT to be use per stream element.
373 */
374struct pvr_pds_vertex_stream {
375   bool current_state;
376   bool instance_data;
377   bool read_back;
378   uint32_t multiplier;
379   uint32_t shift;
380   uint64_t address;
381   uint32_t buffer_size_in_bytes;
382   uint32_t stride;
383   uint32_t num_vertices;
384   uint32_t num_elements;
385   struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS];
386
387   bool use_ddmadt;
388};
389
390/* Structure representing the PDS vertex shader program.
391 *
392 * This structure describes the USC code and vertex buffers required
393 * by the PDS vertex loading program.
394 *
395 * data_segment - Pointer to the data segment.
396 * usc_task_control - Description of USC task for vertex shader program.
397 * num_streams - Number of vertex streams.
398 * iterate_vtx_id - If set, the vertex id should be iterated.
399 * vtx_id_register - The register to iterate the VertexID into (if applicable)
400 * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS.
401 *                   This is used because the index value received by PDS has
402 *                   INDEX_OFFSET added, and generally VertexID wouldn't.
403 * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added.
404 * iterate_instance_id - If set, the instance id should be iterated.
405 * instance_id_register - The register to iterate the InstanceID into (if
406 *                        applicable). The vertex and instance id will both be
407 *                        iterated as unsigned ints
408 *
409 * iterate_remap_id - Should be set to true if vertex shader needs
410 *                    VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after
411 *                    it).
412 * null_idx - Indicates no index buffer is bound, so every index should be
413 *            null_idx_value.
414 * null_idx_value - The value to use as index if null_idx set.
415 * data_size - Size of data segment, in dwords. Output by call to
416 *             pvr_pds_vertex_shader, and used as input when generating data.
417 * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader.
418 *             This is the number of dword instructions that are/were generated.
419 * temps_used - Number of temporaries used. Output by call to
420 *              pvr_pds_vertex_shader.
421 */
422struct pvr_pds_vertex_shader_program {
423   uint32_t *data_segment;
424   struct pvr_pds_usc_task_control usc_task_control;
425   uint32_t num_streams;
426
427   bool iterate_vtx_id;
428   uint32_t vtx_id_register;
429   uint32_t vtx_id_modifier;
430   bool vtx_id_sub_modifier;
431
432   bool iterate_instance_id;
433   uint32_t instance_id_register;
434   uint32_t instance_ID_modifier;
435   uint32_t base_instance;
436
437   bool iterate_remap_id;
438
439   bool null_idx;
440   uint32_t null_idx_value;
441
442   uint32_t *stream_patch_offsets;
443   uint32_t num_stream_patches;
444
445   uint32_t data_size;
446   uint32_t code_size;
447   uint32_t temps_used;
448   uint32_t ddmadt_enables;
449   uint32_t skip_stream_flag;
450
451   bool draw_indirect;
452   bool indexed;
453
454   struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS];
455};
456
457/* Structure representing PDS shared reg storing program. */
458struct pvr_pds_shared_storing_program {
459   struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */
460   struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */
461   bool cc_enable; /*!< cc bit is set on the doutu instruction. */
462   uint32_t data_size; /*!< total data size, non-aligned. */
463   uint32_t code_size; /*!< total code size, non-aligned. */
464};
465
466#define PVR_MAX_STREAMOUT_BUFFERS 4
467
468/* Structure representing stream out init PDS programs. */
469struct pvr_pds_stream_out_init_program {
470   /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */
471
472   /* Number of buffers to load/store.
473    * This indicates the number of entries in the next two arrays.
474    * Data is loaded/stored contiguously to persistent temps.
475    */
476   uint32_t num_buffers;
477
478   /* Number of persistent temps in dword to load/store for each buffer. */
479   uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS];
480   /* The device address for loading/storing persistent temps for each buffer.
481    * If address is zero, then no data is loaded/stored
482    * into pt registers for the buffer.
483    */
484   uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS];
485
486   /* PDS state update Stream Out Init Programs. */
487   uint32_t stream_out_init_pds_data_size;
488   uint32_t stream_out_init_pds_code_size;
489};
490
491/* Structure representing stream out terminate PDS program. */
492struct pvr_pds_stream_out_terminate_program {
493   /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM.
494    *
495    * Number of persistent temps in dword used in stream out PDS programs needs
496    * to be stored.
497    * The terminate program writes pds_persistent_temp_size_to_store number
498    * persistent temps to dev_address_for_storing_persistent_temp.
499    */
500   uint32_t pds_persistent_temp_size_to_store;
501
502   /* The device address for storing persistent temps. */
503   uint64_t dev_address_for_storing_persistent_temp;
504
505   /* PPP state update Stream Out Program for stream out terminate. */
506   uint32_t stream_out_terminate_pds_data_size;
507   uint32_t stream_out_terminate_pds_code_size;
508};
509
510/*  Structure representing the PDS compute shader program.
511 *	This structure describes the USC code and compute buffers required
512 *	by the PDS compute task loading program
513 *
514 *	data_segment
515 *		pointer to the data segment
516 *	usc_task_control
517 *		Description of USC task for compute shader program.
518 *	data_size
519 *		Size of data segment, in dwords.
520 *		Output by call to pvr_pds_compute_shader, and used as input when
521 *   generating data. code_size Size of code segment. Output by call to
522 *   pvr_pds_compute_shader. This is the number of dword instructions that
523 *   are/were generated. temps_used Number of temporaries used. Output by call
524 *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output
525 *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of
526 *   instructions we need to branch over to skip the coefficient update task.
527 */
528
529struct pvr_pds_compute_shader_program {
530   uint32_t *data_segment;
531   struct pvr_pds_usc_task_control usc_task_control;
532   struct pvr_pds_usc_task_control usc_task_control_coeff_update;
533
534   uint32_t data_size;
535   uint32_t code_size;
536
537   uint32_t temps_used;
538   uint32_t highest_temp;
539
540   uint32_t local_input_regs[3];
541   uint32_t work_group_input_regs[3];
542   uint32_t global_input_regs[3];
543
544   uint32_t barrier_coefficient;
545
546   bool fence;
547
548   bool flattened_work_groups;
549
550   bool clear_pds_barrier;
551
552   bool has_coefficient_update_task;
553
554   uint32_t coeff_update_task_branch_size;
555
556   bool add_base_workgroup;
557   uint32_t base_workgroup_constant_offset_in_dwords[3];
558
559   bool kick_usc;
560
561   bool conditional_render;
562   uint32_t cond_render_const_offset_in_dwords;
563   uint32_t cond_render_pred_temp;
564};
565struct pvr_pds_ldst_control {
566   uint64_t cache_control_const;
567};
568
569/* Define a value we can use as a register number in the driver to denote that
570 * the value is unused.
571 */
572#define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU
573
574/*****************************************************************************
575 function declarations
576*****************************************************************************/
577
578/*****************************************************************************
579 Constructors
580*****************************************************************************/
581
582void pvr_pds_pixel_shader_sa_initialize(
583   struct pvr_pds_pixel_shader_sa_program *program);
584void pvr_pds_compute_shader_initialize(
585   struct pvr_pds_compute_shader_program *program);
586
587/* Utility */
588
589uint32_t pvr_pds_append_constant64(uint32_t *constants,
590                                   uint64_t constant_value,
591                                   uint32_t *data_size);
592
593uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
594                                  uint64_t *dma_address,
595                                  uint32_t dest_offset,
596                                  uint32_t dma_size,
597                                  uint64_t src_address,
598                                  const struct pvr_device_info *dev_info);
599
600void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
601                         uint64_t execution_address,
602                         uint32_t usc_temps,
603                         uint32_t sample_rate,
604                         bool phase_rate_change);
605
606/* Pixel */
607#define pvr_pds_set_sizes_pixel_shader(X) \
608   pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES)
609#define pvr_pds_generate_pixel_shader_program(X, Y) \
610   pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
611
612#define pvr_pds_generate_VDM_sync_program(X, Y) \
613   pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
614
615uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
616                                 uint32_t *restrict buffer,
617                                 enum pvr_pds_generate_mode gen_mode);
618
619uint32_t *
620pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl,
621                       uint32_t *restrict buffer,
622                       enum pvr_pds_generate_mode gen_mode,
623                       const struct pvr_device_info *dev_info);
624
625uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
626                           uint32_t *restrict buffer,
627                           uint32_t start_next_constant,
628                           bool cc_enabled,
629                           enum pvr_pds_generate_mode gen_mode);
630
631/* Pixel Secondary */
632#define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y)     \
633   pvr_pds_pixel_shader_uniform_texture_data(X,                  \
634                                             NULL,               \
635                                             PDS_GENERATE_SIZES, \
636                                             true,               \
637                                             Y)
638#define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y)     \
639   pvr_pds_pixel_shader_uniform_texture_data(X,                  \
640                                             NULL,               \
641                                             PDS_GENERATE_SIZES, \
642                                             false,              \
643                                             Y)
644#define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \
645   pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES)
646
647#define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z)    \
648   pvr_pds_pixel_shader_uniform_texture_data(X,                         \
649                                             Y,                         \
650                                             PDS_GENERATE_DATA_SEGMENT, \
651                                             false,                     \
652                                             Z)
653
654#define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \
655   pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT)
656
657uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
658   struct pvr_pds_pixel_shader_sa_program *restrict program,
659   uint32_t *restrict buffer,
660   enum pvr_pds_generate_mode gen_mode,
661   bool uniform,
662   const struct pvr_device_info *dev_info);
663
664uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
665   struct pvr_pds_pixel_shader_sa_program *restrict program,
666   uint32_t *restrict buffer,
667   enum pvr_pds_generate_mode gen_mode);
668
669/* Vertex */
670#define pvr_pds_set_sizes_vertex_shader(X, Y) \
671   pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y)
672
673#define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \
674   pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
675
676#define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \
677   pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
678
679uint32_t *
680pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
681                      uint32_t *restrict buffer,
682                      enum pvr_pds_generate_mode gen_mode,
683                      const struct pvr_device_info *dev_info);
684
685/* Compute */
686uint32_t *
687pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
688                       uint32_t *restrict buffer,
689                       enum pvr_pds_generate_mode gen_mode,
690                       const struct pvr_device_info *dev_info);
691
692#define pvr_pds_set_sizes_compute_shader(X, Y) \
693   pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y)
694
695#define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \
696   pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
697
698#define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \
699   pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
700
701/* Vertex Secondary */
702#define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \
703   pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y)
704
705#define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \
706   pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
707
708#define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \
709   pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
710
711uint32_t *pvr_pds_vertex_shader_sa(
712   struct pvr_pds_vertex_shader_sa_program *restrict program,
713   uint32_t *restrict buffer,
714   enum pvr_pds_generate_mode gen_mode,
715   const struct pvr_device_info *dev_info);
716
717/* Pixel Event */
718#define pvr_pds_set_sizes_pixel_event(X, Y) \
719   pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, Y)
720
721#define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \
722   pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
723
724#define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \
725   pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
726
727uint32_t *
728pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
729                             uint32_t *restrict buffer,
730                             enum pvr_pds_generate_mode gen_mode,
731                             const struct pvr_device_info *dev_info);
732
733/* Coefficient Loading */
734#define pvr_pds_set_sizes_coeff_loading(X) \
735   pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES)
736
737#define pvr_pds_generate_coeff_loading_program(X, Y) \
738   pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT)
739
740uint32_t *pvr_pds_coefficient_loading(
741   struct pvr_pds_coeff_loading_program *restrict program,
742   uint32_t *restrict buffer,
743   enum pvr_pds_generate_mode gen_mode);
744
745/* Compute DM barrier-specific conditional code */
746uint32_t *pvr_pds_generate_compute_barrier_conditional(
747   uint32_t *buffer,
748   enum pvr_pds_generate_mode gen_mode);
749
750/* Shared register storing */
751uint32_t *pvr_pds_generate_shared_storing_program(
752   struct pvr_pds_shared_storing_program *restrict program,
753   uint32_t *restrict buffer,
754   enum pvr_pds_generate_mode gen_mode,
755   const struct pvr_device_info *dev_info);
756
757/*Shared register loading */
758uint32_t *pvr_pds_generate_fence_terminate_program(
759   struct pvr_pds_fence_program *restrict program,
760   uint32_t *restrict buffer,
761   enum pvr_pds_generate_mode gen_mode,
762   const struct pvr_device_info *dev_info);
763
764/* CDM Shared register loading */
765uint32_t *pvr_pds_generate_compute_shared_loading_program(
766   struct pvr_pds_shared_storing_program *restrict program,
767   uint32_t *restrict buffer,
768   enum pvr_pds_generate_mode gen_mode,
769   const struct pvr_device_info *dev_info);
770
771/* Stream out */
772uint32_t *pvr_pds_generate_stream_out_init_program(
773   struct pvr_pds_stream_out_init_program *restrict program,
774   uint32_t *restrict buffer,
775   bool store_mode,
776   enum pvr_pds_generate_mode gen_mode,
777   const struct pvr_device_info *dev_info);
778
779uint32_t *pvr_pds_generate_stream_out_terminate_program(
780   struct pvr_pds_stream_out_terminate_program *restrict program,
781   uint32_t *restrict buffer,
782   enum pvr_pds_generate_mode gen_mode,
783   const struct pvr_device_info *dev_info);
784
785/* Structure representing DrawIndirect PDS programs. */
786struct pvr_pds_drawindirect_program {
787   /* --- Input to pvr_pds_drawindirect_program --- */
788
789   /* Address of the index list block in the VDM control stream.
790    * This must point to a 128-bit aligned index list header.
791    */
792   uint64_t index_list_addr_buffer;
793   /* Address of arguments for Draw call. Layout is defined by eArgFormat. */
794   uint64_t arg_buffer;
795
796   /* Address of index buffer. */
797   uint64_t index_buffer;
798
799   /* The raw (without addr msb in [7:0]) index block header. */
800   uint32_t index_block_header;
801
802   /* Number of bytes per index. */
803   uint32_t index_stride;
804
805   /* Used during/after compilation to fill in constant buffer. */
806   struct pvr_psc_register data[32];
807
808   /* Results of compilation. */
809   struct pvr_psc_program_output program;
810
811   /* This is used for ARB_multi_draw_indirect. */
812   unsigned int count;
813   unsigned int stride;
814
815   /* Internal stuff. */
816   unsigned int num_views;
817
818   bool support_base_instance;
819   bool increment_draw_id;
820};
821
822void pvr_pds_generate_draw_arrays_indirect(
823   struct pvr_pds_drawindirect_program *restrict program,
824   uint32_t *restrict buffer,
825   enum pvr_pds_generate_mode gen_mode,
826   const struct pvr_device_info *dev_info);
827void pvr_pds_generate_draw_elements_indirect(
828   struct pvr_pds_drawindirect_program *restrict program,
829   uint32_t *restrict buffer,
830   enum pvr_pds_generate_mode gen_mode,
831   const struct pvr_device_info *dev_info);
832
833uint64_t pvr_pds_encode_st_src0(uint64_t src,
834                                uint64_t count4,
835                                uint64_t dst_add,
836                                bool write_through,
837                                const struct pvr_device_info *dev_info);
838
839uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
840                                uint64_t count8,
841                                uint64_t src_add,
842                                bool cached,
843                                const struct pvr_device_info *dev_info);
844
845uint32_t *pvr_pds_generate_single_ldst_instruction(
846   bool ld,
847   const struct pvr_pds_ldst_control *control,
848   uint32_t temp_index,
849   uint64_t address,
850   uint32_t count,
851   uint32_t *next_constant,
852   uint32_t *total_data_size,
853   uint32_t *total_code_size,
854   uint32_t *buffer,
855   bool data_fence,
856   enum pvr_pds_generate_mode gen_mode,
857   const struct pvr_device_info *dev_info);
858struct pvr_pds_descriptor_set {
859   unsigned int descriptor_set; /* id of the descriptor set. */
860   unsigned int size_in_dwords; /* Number of dwords to transfer. */
861   unsigned int destination; /* Destination shared register to which
862                              * descriptor entries should be loaded.
863                              */
864   bool primary; /* Primary or secondary? */
865   unsigned int offset_in_dwords; /* Offset from the start of the descriptor
866                                   * set to start DMA'ing from.
867                                   */
868};
869
870#define PVR_BUFFER_TYPE_UBO (0)
871#define PVR_BUFFER_TYPES_COMPILE_TIME (1)
872#define PVR_BUFFER_TYPE_BLEND_CONSTS (2)
873#define PVR_BUFFER_TYPE_PUSH_CONSTS (3)
874#define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4)
875#define PVR_BUFFER_TYPE_DYNAMIC (5)
876#define PVR_BUFFER_TYPES_UBO_ZEROING (6)
877#define PVR_BUFFER_TYPE_INVALID (~0)
878
879struct pvr_pds_buffer {
880   uint16_t type;
881
882   uint16_t size_in_dwords;
883   uint32_t destination;
884
885   union {
886      uint32_t *data;
887      struct {
888         uint32_t buffer_id;
889         uint16_t desc_set;
890         uint16_t binding;
891         uint32_t source_offset;
892      };
893   };
894};
895
896#define PVR_PDS_MAX_BUFFERS (24)
897
898struct pvr_descriptor_program_input {
899   /* User-specified descriptor sets. */
900   unsigned int descriptor_set_count;
901   struct pvr_pds_descriptor_set descriptor_sets[8];
902
903   /* "State" buffers, including:
904    * compile-time constants
905    * blend constants
906    * push constants
907    * UBOs that have been hoisted.
908    */
909   uint32_t buffer_count;
910   struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS];
911
912   uint32_t blend_constants_used_mask;
913
914   bool secondary_program_present;
915   struct pvr_pds_usc_task_control secondary_task_control;
916
917   bool must_not_be_empty;
918};
919
920#define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U)
921#define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U)
922#define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U)
923#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U)
924#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U)
925
926/* BaseVertex is used in shader. */
927#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U)
928
929#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U)
930
931#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U)
932
933struct pvr_pds_vertex_dma {
934   /* Try and keep this structure packing as small as possible. */
935   uint16_t offset;
936   uint16_t stride;
937
938   uint8_t flags;
939   uint8_t size_in_dwords;
940   uint8_t component_size_in_bytes;
941   uint8_t destination;
942   uint8_t binding_index;
943   uint32_t divisor;
944
945   uint16_t robustness_buffer_offset;
946};
947
948struct pvr_pds_vertex_primary_program_input {
949   /* Control for the DOUTU that kicks the vertex USC shader. */
950   struct pvr_pds_usc_task_control usc_task_control;
951   /* List of DMAs (of size dma_count). */
952   struct pvr_pds_vertex_dma *dma_list;
953   uint32_t dma_count;
954
955   /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */
956   uint32_t flags;
957
958   uint16_t vertex_id_register;
959   uint16_t instance_id_register;
960
961   /* API provided baseInstance (i.e. not from drawIndirect). */
962   uint32_t base_instance;
963
964   uint16_t base_instance_register;
965   uint16_t base_vertex_register;
966   uint16_t draw_index_register;
967};
968
969#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0)
970#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1)
971#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2)
972#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3)
973#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4)
974#define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5)
975#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6)
976#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7)
977#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8)
978
979/* Use if pds_ddmadt is enabled. */
980#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9)
981
982/* Use if pds_ddmadt is not enabled. */
983#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9)
984
985#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10)
986#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11)
987#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12)
988#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13)
989#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14)
990
991/* We pack all the following structs tightly into a buffer using += sizeof(x)
992 * offsets, this can lead to data that is not native aligned. Supplying the
993 * packed attribute indicates that unaligned accesses may be required, and the
994 * aligned attribute causes the size of the structure to be aligned to a
995 * specific boundary.
996 */
997#define PVR_ALIGNED __attribute__((packed, aligned(1)))
998
999struct pvr_const_map_entry {
1000   uint8_t type;
1001   uint8_t const_offset;
1002} PVR_ALIGNED;
1003
1004struct pvr_const_map_entry_literal32 {
1005   uint8_t type;
1006   uint8_t const_offset;
1007
1008   uint32_t literal_value;
1009} PVR_ALIGNED;
1010
1011struct pvr_const_map_entry_literal64 {
1012   uint8_t type;
1013   uint8_t const_offset;
1014
1015   uint64_t literal_value;
1016} PVR_ALIGNED;
1017
1018struct pvr_const_map_entry_descriptor_set {
1019   uint8_t type;
1020   uint8_t const_offset;
1021
1022   uint32_t descriptor_set;
1023   PVR_PDS_BOOL primary;
1024   uint32_t offset_in_dwords;
1025} PVR_ALIGNED;
1026
1027struct pvr_const_map_entry_constant_buffer {
1028   uint8_t type;
1029   uint8_t const_offset;
1030
1031   uint16_t buffer_id;
1032   uint16_t desc_set;
1033   uint16_t binding;
1034   uint32_t offset;
1035   uint32_t size_in_dwords;
1036} PVR_ALIGNED;
1037
1038struct pvr_const_map_entry_constant_buffer_zeroing {
1039   uint8_t type;
1040   uint8_t const_offset;
1041
1042   uint16_t buffer_id;
1043   uint32_t offset;
1044   uint32_t size_in_dwords;
1045} PVR_ALIGNED;
1046
1047struct pvr_const_map_entry_special_buffer {
1048   uint8_t type;
1049   uint8_t const_offset;
1050
1051   uint8_t buffer_type;
1052   uint32_t buffer_index;
1053} PVR_ALIGNED;
1054
1055struct pvr_const_map_entry_doutu_address {
1056   uint8_t type;
1057   uint8_t const_offset;
1058
1059   uint64_t doutu_control;
1060} PVR_ALIGNED;
1061
1062struct pvr_const_map_entry_vertex_attribute_address {
1063   uint8_t type;
1064   uint8_t const_offset;
1065
1066   uint16_t offset;
1067   uint16_t stride;
1068   uint8_t binding_index;
1069   uint8_t size_in_dwords;
1070} PVR_ALIGNED;
1071
1072struct pvr_const_map_entry_robust_vertex_attribute_address {
1073   uint8_t type;
1074   uint8_t const_offset;
1075
1076   uint16_t offset;
1077   uint16_t stride;
1078   uint8_t binding_index;
1079   uint8_t size_in_dwords;
1080   uint16_t robustness_buffer_offset;
1081   uint8_t component_size_in_bytes;
1082} PVR_ALIGNED;
1083
1084struct pvr_const_map_entry_vertex_attribute_max_index {
1085   uint8_t type;
1086   uint8_t const_offset;
1087
1088   uint8_t binding_index;
1089   uint8_t size_in_dwords;
1090   uint16_t offset;
1091   uint16_t stride;
1092   uint8_t component_size_in_bytes;
1093} PVR_ALIGNED;
1094
1095struct pvr_const_map_entry_base_instance {
1096   uint8_t type;
1097   uint8_t const_offset;
1098} PVR_ALIGNED;
1099
1100struct pvr_const_map_entry_base_vertex {
1101   uint8_t type;
1102   uint8_t const_offset;
1103};
1104
1105struct pvr_pds_const_map_entry_base_workgroup {
1106   uint8_t type;
1107   uint8_t const_offset;
1108   uint8_t workgroup_component;
1109} PVR_ALIGNED;
1110
1111struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size {
1112   uint8_t type;
1113   uint8_t const_offset;
1114   uint8_t binding_index;
1115} PVR_ALIGNED;
1116
1117struct pvr_pds_const_map_entry_cond_render {
1118   uint8_t type;
1119   uint8_t const_offset;
1120
1121   uint32_t cond_render_pred_temp;
1122} PVR_ALIGNED;
1123
1124struct pvr_pds_info {
1125   uint32_t temps_required;
1126   uint32_t code_size_in_dwords;
1127   uint32_t data_size_in_dwords;
1128
1129   uint32_t entry_count;
1130   size_t entries_size_in_bytes;
1131   size_t entries_written_size_in_bytes;
1132   struct pvr_const_map_entry *entries;
1133};
1134
1135void pvr_pds_generate_descriptor_upload_program(
1136   struct pvr_descriptor_program_input *input_program,
1137   uint32_t *code_section,
1138   struct pvr_pds_info *info);
1139void pvr_pds_generate_vertex_primary_program(
1140   struct pvr_pds_vertex_primary_program_input *input_program,
1141   uint32_t *code,
1142   struct pvr_pds_info *info,
1143   bool use_robust_vertex_fetch,
1144   const struct pvr_device_info *dev_info);
1145
1146/**
1147 * Generate USC address.
1148 *
1149 * \param doutu Location to write the generated address.
1150 * \param execution_address Address to generate from.
1151 */
1152static ALWAYS_INLINE void
1153pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address)
1154{
1155   doutu[0] |= (((execution_address >>
1156                  PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT)
1157                 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) &
1158                ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK);
1159}
1160
1161#endif /* PVR_PDS_H */
1162