1/*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <stdbool.h>
25#include <stdint.h>
26
27#include "hwdef/rogue_hw_defs.h"
28#include "hwdef/rogue_hw_utils.h"
29#include "pvr_device_info.h"
30#include "pvr_job_common.h"
31#include "pvr_private.h"
32#include "util/macros.h"
33#include "util/u_math.h"
34#include "vk_alloc.h"
35#include "vk_format.h"
36#include "vk_object.h"
37
38/* clang-format off */
39static enum PVRX(PBESTATE_SWIZ)
40pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz)
41/* clang-format on */
42{
43   switch (swz) {
44   case PIPE_SWIZZLE_0:
45      return ROGUE_PBESTATE_SWIZ_ZERO;
46   case PIPE_SWIZZLE_1:
47      return ROGUE_PBESTATE_SWIZ_ONE;
48   case PIPE_SWIZZLE_X:
49      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
50   case PIPE_SWIZZLE_Y:
51      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
52   case PIPE_SWIZZLE_Z:
53      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
54   case PIPE_SWIZZLE_W:
55      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
56   case PIPE_SWIZZLE_NONE:
57      if (comp == VK_COMPONENT_SWIZZLE_A)
58         return ROGUE_PBESTATE_SWIZ_ONE;
59      else
60         return ROGUE_PBESTATE_SWIZ_ZERO;
61   default:
62      unreachable("Unknown enum pipe_swizzle");
63   };
64}
65
66void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
67                                      enum pvr_pbe_gamma default_gamma,
68                                      bool with_packed_usc_channel,
69                                      uint32_t *const src_format_out,
70                                      enum pvr_pbe_gamma *const gamma_out)
71{
72   uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0);
73
74   *gamma_out = default_gamma;
75
76   if (vk_format_has_32bit_component(vk_format) ||
77       vk_format_is_int(vk_format)) {
78      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
79   } else if (vk_format_is_float(vk_format)) {
80      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
81   } else if (vk_format_is_srgb(vk_format)) {
82      *gamma_out = PVR_PBE_GAMMA_ENABLED;
83
84      /* F16 source for gamma'd formats. */
85      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
86   } else if (vk_format_has_depth(vk_format) &&
87              vk_format_get_component_bits(vk_format,
88                                           UTIL_FORMAT_COLORSPACE_ZS,
89                                           0) > 16) {
90      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
91   } else if (vk_format_has_stencil(vk_format) &&
92              vk_format_get_component_bits(vk_format,
93                                           UTIL_FORMAT_COLORSPACE_ZS,
94                                           1) > 0) {
95      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
96   } else if (chan_0_width > 16) {
97      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
98   } else if (chan_0_width > 8) {
99      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
100   } else if (!with_packed_usc_channel) {
101      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
102   } else {
103      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
104   }
105}
106
107static void pvr_pbe_get_src_pos(const struct pvr_device_info *dev_info,
108                                enum pvr_pbe_source_start_pos source_start,
109                                uint32_t *const src_pos_out,
110                                bool *const src_pos_offset_128_out)
111{
112   *src_pos_offset_128_out = false;
113
114   switch (source_start) {
115   case PVR_PBE_STARTPOS_BIT32:
116      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
117      break;
118
119   case PVR_PBE_STARTPOS_BIT64:
120      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
121      break;
122
123   case PVR_PBE_STARTPOS_BIT96:
124      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
125      break;
126
127   case PVR_PBE_STARTPOS_BIT0:
128   default:
129      if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
130         switch (source_start) {
131         case PVR_PBE_STARTPOS_BIT128:
132            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
133            *src_pos_offset_128_out = true;
134            break;
135
136         case PVR_PBE_STARTPOS_BIT160:
137            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
138            *src_pos_offset_128_out = true;
139            break;
140
141         case PVR_PBE_STARTPOS_BIT192:
142            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
143            *src_pos_offset_128_out = true;
144            break;
145
146         case PVR_PBE_STARTPOS_BIT224:
147            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
148            *src_pos_offset_128_out = true;
149            break;
150
151         default:
152            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
153            break;
154         }
155      } else {
156         *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
157      }
158      break;
159   }
160}
161
162void pvr_pbe_pack_state(
163   const struct pvr_device_info *dev_info,
164   const struct pvr_pbe_surf_params *surface_params,
165   const struct pvr_pbe_render_params *render_params,
166   uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
167   uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
168{
169   /* This function needs updating if the value of
170    * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
171    * value.
172    */
173   STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
174
175   /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
176    * changes, so check that it's the expected value.
177    */
178   STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
179
180   pbe_reg_words[2] = 0;
181
182   if (surface_params->z_only_render) {
183      pbe_cs_words[0] = 0;
184
185      pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
186         state.emptytile = true;
187      }
188
189      pbe_reg_words[0] = 0;
190      pbe_reg_words[1] = 0;
191
192      return;
193   }
194
195   pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
196      state.address_low = surface_params->addr;
197   }
198
199   pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
200      state.address_high = surface_params->addr;
201
202      state.source_format = surface_params->source_format;
203
204      pvr_pbe_get_src_pos(dev_info,
205                          render_params->source_start,
206                          &state.source_pos,
207                          &state.source_pos_offset_128);
208
209      /* MRT index (Use 0 for a single render target)/ */
210      state.mrt_index = render_params->mrt_index;
211
212      /* Normalization flag based on output format. */
213      state.norm = surface_params->is_normalized;
214
215      state.packmode = surface_params->pbe_packmode;
216   }
217
218   pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
219      reg.tilerelative = true;
220
221      switch (surface_params->mem_layout) {
222      case PVR_MEMLAYOUT_TWIDDLED:
223         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
224         break;
225
226      case PVR_MEMLAYOUT_3DTWIDDLED:
227         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
228         break;
229
230      case PVR_MEMLAYOUT_LINEAR:
231      default:
232         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
233         break;
234      }
235
236      /* FIXME: Remove rotation and y_flip hardcoding if needed. */
237      reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
238      reg.y_flip = false;
239
240      /* Note: Due to gamma being overridden above, anything other than
241       * ENABLED/NONE is ignored.
242       */
243      if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
244         reg.gamma = true;
245
246         if (surface_params->nr_components == 2)
247            reg.twocomp_gamma =
248               PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
249      }
250
251      reg.linestride = (surface_params->stride - 1) /
252                       PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
253      reg.minclip_x = render_params->min_x_clip;
254
255      reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R,
256                                              surface_params->swizzle[0]);
257      reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G,
258                                              surface_params->swizzle[1]);
259      reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B,
260                                              surface_params->swizzle[2]);
261      reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A,
262                                              surface_params->swizzle[3]);
263
264      if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
265         reg.size_z = util_logbase2_ceil(surface_params->depth);
266
267      reg.downscale = surface_params->down_scale;
268   }
269
270   pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
271      if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
272          surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
273         reg.size_x = util_logbase2_ceil(surface_params->width);
274         reg.size_y = util_logbase2_ceil(surface_params->height);
275      }
276
277      reg.minclip_y = render_params->min_y_clip;
278      reg.maxclip_x = render_params->max_x_clip;
279      reg.zslice = render_params->slice;
280      reg.maxclip_y = render_params->max_y_clip;
281   }
282}
283
284/* TODO: Split this into smaller functions to make it easier to follow. When
285 * doing this, it would be nice to have a function that returns
286 * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
287 * pvr_render_job_ws_fragment_state_init().
288 */
289void pvr_setup_tiles_in_flight(
290   const struct pvr_device_info *dev_info,
291   const struct pvr_device_runtime_info *dev_runtime_info,
292   uint32_t msaa_mode,
293   uint32_t pixel_width,
294   bool paired_tiles,
295   uint32_t max_tiles_in_flight,
296   uint32_t *const isp_ctl_out,
297   uint32_t *const pixel_ctl_out)
298{
299   uint32_t total_tiles_in_flight = 0;
300   uint32_t usable_partition_size;
301   uint32_t partitions_available;
302   uint32_t usc_min_output_regs;
303   uint32_t max_partitions;
304   uint32_t partition_size;
305   uint32_t max_phantoms;
306   uint32_t tile_size_x;
307   uint32_t tile_size_y;
308   uint32_t isp_samples;
309
310   /* Round up the pixel width to the next allocation granularity. */
311   usc_min_output_regs =
312      PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
313   pixel_width = MAX2(pixel_width, usc_min_output_regs);
314   pixel_width = util_next_power_of_two(pixel_width);
315
316   assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
317
318   partition_size = pixel_width;
319
320   isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
321   if (isp_samples == 2) {
322      if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
323         partition_size *= 2U;
324   } else if (isp_samples == 4) {
325      if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
326          msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
327         partition_size *= 4U;
328      else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
329         partition_size *= 2U;
330   }
331
332   /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
333    * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
334    * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
335    */
336   tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
337   tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
338
339   /* We only support square tiles. */
340   assert(tile_size_x == tile_size_y);
341
342   if (tile_size_x == 16U) {
343      /* Cores with 16x16 tiles does not use tile quadrants. */
344      partition_size *= tile_size_x * tile_size_y;
345   } else {
346      /* Size of a tile quadrant (in dwords). */
347      partition_size *= (tile_size_x * tile_size_y / 4U);
348   }
349
350   /* Maximum available partition space for partitions of this size. */
351   max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
352   usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
353                                partition_size * max_partitions);
354
355   if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
356       (1024 * 4 * 4)) {
357      /* Do not apply the limit for cores with 16x16 tile size (no quadrant
358       * affinity). */
359      if (tile_size_x != 16) {
360         /* This is to counter the extremely limited CS size on some cores.
361          */
362         /* Available partition space is limited to 8 tile quadrants. */
363         usable_partition_size =
364            MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
365      }
366   }
367
368   /* Ensure that maximum number of partitions in use is not greater
369    * than the total number of partitions available.
370    */
371   partitions_available =
372      MIN2(max_partitions, usable_partition_size / partition_size);
373
374   if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
375      max_phantoms = dev_runtime_info->num_phantoms;
376   else if (PVR_HAS_FEATURE(dev_info, roguexe))
377      max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
378   else
379      max_phantoms = 1;
380
381   for (uint32_t i = 0; i < max_phantoms; i++) {
382      uint32_t usc_tiles_in_flight = partitions_available;
383      uint32_t isp_tiles_in_flight;
384
385      /* Cores with tiles size other than 16x16 use tile quadrants. */
386      if (tile_size_x != 16) {
387         uint32_t num_clusters =
388            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
389         usc_tiles_in_flight =
390            (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
391      }
392
393      assert(usc_tiles_in_flight > 0);
394
395      isp_tiles_in_flight =
396         PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
397      /* Ensure that maximum number of ISP tiles in flight is not greater
398       * than the maximum number of USC tiles in flight.
399       */
400      if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
401          PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
402             2) {
403         isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
404      }
405
406      isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
407
408      /* Limit the number of tiles in flight if the shaders have
409       * requested a large allocation of local memory.
410       */
411      if (max_tiles_in_flight > 0U) {
412         isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
413
414         if (PVR_HAS_FEATURE(dev_info, roguexe)) {
415            if (tile_size_x == 16) {
416               /* The FW infers the tiles in flight value from the
417                * partitions setting.
418                */
419               /* Partitions per tile. */
420               partitions_available = isp_tiles_in_flight;
421            } else {
422               /* Partitions per tile quadrant. */
423               partitions_available = isp_tiles_in_flight * 4U;
424            }
425         }
426      }
427
428      /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
429       * 1 between Phantoms.
430       */
431      if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
432         total_tiles_in_flight = isp_tiles_in_flight + 1U;
433
434      total_tiles_in_flight += isp_tiles_in_flight;
435   }
436
437   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
438       PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
439          2) {
440      /* Limit the ISP tiles in flight to fit into the available USC partition
441       * store.
442       */
443      total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
444   }
445
446   if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
447      total_tiles_in_flight =
448         MIN2(total_tiles_in_flight, partitions_available / 2);
449   }
450
451   pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
452      if (pixel_width == 1 && usc_min_output_regs == 1) {
453         reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
454      } else if (pixel_width == 2) {
455         reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
456      } else if (pixel_width == 4) {
457         reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
458      } else if (pixel_width == 8 &&
459                 PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
460         reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
461      } else if (usc_min_output_regs == 1) {
462         reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
463      } else {
464         reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
465      }
466
467      if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
468         /* Setup the partition mask based on the maximum number of
469          * partitions available.
470          */
471         reg.partition_mask = (1 << max_partitions) - 1;
472      } else {
473         reg.enable_4th_partition = true;
474
475         /* Setup the partition mask based on the number of partitions
476          * available.
477          */
478         reg.partition_mask = (1U << partitions_available) - 1U;
479      }
480   }
481
482   pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
483      if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
484         reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
485      else
486         reg.pipe_enable = total_tiles_in_flight - 1;
487   }
488}
489