1/*
2 * Copyright (C) 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "genxml/gen_macros.h"
25
26#include "util/macros.h"
27#include "compiler/shader_enums.h"
28
29#include "vk_util.h"
30
31#include "pan_cs.h"
32#include "pan_encoder.h"
33#include "pan_pool.h"
34#include "pan_shader.h"
35#include "pan_earlyzs.h"
36
37#include "panvk_cs.h"
38#include "panvk_private.h"
39#include "panvk_varyings.h"
40
41#include "vk_sampler.h"
42
43static enum mali_mipmap_mode
44panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
45{
46   switch (mode) {
47   case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
48   case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
49   default: unreachable("Invalid mipmap mode");
50   }
51}
52
53static unsigned
54panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
55{
56   switch (mode) {
57   case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
58   case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
59   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
60   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
61   case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
62   default: unreachable("Invalid wrap");
63   }
64}
65
66static mali_pixel_format
67panvk_varying_hw_format(const struct panvk_device *dev,
68                        const struct panvk_varyings_info *varyings,
69                        gl_shader_stage stage, unsigned idx)
70{
71   const struct panfrost_device *pdev = &dev->physical_device->pdev;
72   gl_varying_slot loc = varyings->stage[stage].loc[idx];
73
74   switch (loc) {
75   case VARYING_SLOT_PNTC:
76   case VARYING_SLOT_PSIZ:
77#if PAN_ARCH <= 6
78      return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
79#else
80      return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
81#endif
82   case VARYING_SLOT_POS:
83#if PAN_ARCH <= 6
84      return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
85#else
86      return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
87#endif
88   default:
89      if (varyings->varying[loc].format != PIPE_FORMAT_NONE)
90         return pdev->formats[varyings->varying[loc].format].hw;
91#if PAN_ARCH >= 7
92      return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
93#else
94      return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
95#endif
96   }
97}
98
99static void
100panvk_emit_varying(const struct panvk_device *dev,
101                   const struct panvk_varyings_info *varyings,
102                   gl_shader_stage stage, unsigned idx,
103                   void *attrib)
104{
105   gl_varying_slot loc = varyings->stage[stage].loc[idx];
106
107   pan_pack(attrib, ATTRIBUTE, cfg) {
108      cfg.buffer_index = varyings->varying[loc].buf;
109      cfg.offset = varyings->varying[loc].offset;
110      cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
111   }
112}
113
114void
115panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
116                              const struct panvk_varyings_info *varyings,
117                              gl_shader_stage stage,
118                              void *descs)
119{
120   struct mali_attribute_packed *attrib = descs;
121
122   for (unsigned i = 0; i < varyings->stage[stage].count; i++)
123      panvk_emit_varying(dev, varyings, stage, i, attrib++);
124}
125
126static void
127panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
128                       enum panvk_varying_buf_id id, void *buf)
129{
130   unsigned buf_idx = panvk_varying_buf_index(varyings, id);
131
132   pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
133      unsigned offset = varyings->buf[buf_idx].address & 63;
134
135      cfg.stride = varyings->buf[buf_idx].stride;
136      cfg.size = varyings->buf[buf_idx].size + offset;
137      cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
138   }
139}
140
141void
142panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
143                                  void *descs)
144{
145   struct mali_attribute_buffer_packed *buf = descs;
146
147   for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
148      if (varyings->buf_mask & (1 << i))
149         panvk_emit_varying_buf(varyings, i, buf++);
150   }
151}
152
153static void
154panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
155                      const struct panvk_draw_info *draw,
156                      const struct panvk_attrib_buf *bufs,
157                      unsigned buf_count,
158                      unsigned idx, void *desc)
159{
160   const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
161
162   assert(idx < buf_count);
163   const struct panvk_attrib_buf *buf = &bufs[idx];
164   mali_ptr addr = buf->address & ~63ULL;
165   unsigned size = buf->size + (buf->address & 63);
166   unsigned divisor =
167      draw->padded_vertex_count * buf_info->instance_divisor;
168
169   /* TODO: support instanced arrays */
170   if (draw->instance_count <= 1) {
171      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
172         cfg.type = MALI_ATTRIBUTE_TYPE_1D;
173         cfg.stride = buf_info->per_instance ? 0 : buf_info->stride;
174         cfg.pointer = addr;
175         cfg.size = size;
176      }
177   } else if (!buf_info->per_instance) {
178      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
179         cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
180         cfg.divisor = draw->padded_vertex_count;
181         cfg.stride = buf_info->stride;
182         cfg.pointer = addr;
183         cfg.size = size;
184      }
185   } else if (!divisor) {
186      /* instance_divisor == 0 means all instances share the same value.
187       * Make it a 1D array with a zero stride.
188       */
189      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
190         cfg.type = MALI_ATTRIBUTE_TYPE_1D;
191         cfg.stride = 0;
192         cfg.pointer = addr;
193         cfg.size = size;
194      }
195   } else if (util_is_power_of_two_or_zero(divisor)) {
196      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
197         cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
198         cfg.stride = buf_info->stride;
199         cfg.pointer = addr;
200         cfg.size = size;
201         cfg.divisor_r = __builtin_ctz(divisor);
202      }
203   } else {
204      unsigned divisor_r = 0, divisor_e = 0;
205      unsigned divisor_num =
206         panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
207      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
208         cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
209         cfg.stride = buf_info->stride;
210         cfg.pointer = addr;
211         cfg.size = size;
212         cfg.divisor_r = divisor_r;
213         cfg.divisor_e = divisor_e;
214      }
215
216      desc += pan_size(ATTRIBUTE_BUFFER);
217      pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
218         cfg.divisor_numerator = divisor_num;
219         cfg.divisor = buf_info->instance_divisor;
220      }
221   }
222}
223
224void
225panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
226                                 const struct panvk_attrib_buf *bufs,
227                                 unsigned buf_count,
228                                 const struct panvk_draw_info *draw,
229                                 void *descs)
230{
231   struct mali_attribute_buffer_packed *buf = descs;
232
233   for (unsigned i = 0; i < info->buf_count; i++) {
234      panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf);
235      buf += 2;
236   }
237}
238
239void
240panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
241                             void *desc)
242{
243   VkClearColorValue border_color =
244      vk_sampler_border_color_value(pCreateInfo, NULL);
245
246   pan_pack(desc, SAMPLER, cfg) {
247      cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
248      cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
249      cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
250      cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
251
252      cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
253      cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
254      cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
255      cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
256      cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
257      cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
258      cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
259      cfg.border_color_r = border_color.uint32[0];
260      cfg.border_color_g = border_color.uint32[1];
261      cfg.border_color_b = border_color.uint32[2];
262      cfg.border_color_a = border_color.uint32[3];
263   }
264}
265
266static void
267panvk_emit_attrib(const struct panvk_device *dev,
268                  const struct panvk_draw_info *draw,
269                  const struct panvk_attribs_info *attribs,
270                  const struct panvk_attrib_buf *bufs,
271                  unsigned buf_count,
272                  unsigned idx, void *attrib)
273{
274   const struct panfrost_device *pdev = &dev->physical_device->pdev;
275   unsigned buf_idx = attribs->attrib[idx].buf;
276   const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx];
277
278   pan_pack(attrib, ATTRIBUTE, cfg) {
279      cfg.buffer_index = buf_idx * 2;
280      cfg.offset = attribs->attrib[idx].offset +
281                   (bufs[buf_idx].address & 63);
282
283      if (buf_info->per_instance)
284         cfg.offset += draw->first_instance * buf_info->stride;
285
286      cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
287   }
288}
289
290void
291panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
292                             const struct panvk_draw_info *draw,
293                             const struct panvk_attribs_info *attribs,
294                             const struct panvk_attrib_buf *bufs,
295                             unsigned buf_count,
296                             void *descs)
297{
298   struct mali_attribute_packed *attrib = descs;
299
300   for (unsigned i = 0; i < attribs->attrib_count; i++)
301      panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++);
302}
303
304void
305panvk_per_arch(emit_ubo)(mali_ptr address, size_t size,  void *desc)
306{
307   pan_pack(desc, UNIFORM_BUFFER, cfg) {
308      cfg.pointer = address;
309      cfg.entries = DIV_ROUND_UP(size, 16);
310   }
311}
312
313void
314panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
315                          const struct panvk_descriptor_state *state,
316                          void *descs)
317{
318   struct mali_uniform_buffer_packed *ubos = descs;
319
320   panvk_per_arch(emit_ubo)(state->sysvals_ptr,
321                            sizeof(state->sysvals),
322                            &ubos[PANVK_SYSVAL_UBO_INDEX]);
323
324   if (pipeline->layout->push_constants.size) {
325      panvk_per_arch(emit_ubo)(state->push_constants,
326                               ALIGN_POT(pipeline->layout->push_constants.size, 16),
327                               &ubos[PANVK_PUSH_CONST_UBO_INDEX]);
328   } else {
329      memset(&ubos[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubos));
330   }
331
332   for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
333      const struct panvk_descriptor_set_layout *set_layout =
334         vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]);
335      const struct panvk_descriptor_set *set = state->sets[s];
336
337      unsigned ubo_start =
338         panvk_pipeline_layout_ubo_start(pipeline->layout, s, false);
339
340      if (!set) {
341         unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos;
342         memset(&ubos[ubo_start], 0, all_ubos * sizeof(*ubos));
343      } else {
344         memcpy(&ubos[ubo_start], set->ubos,
345                set_layout->num_ubos * sizeof(*ubos));
346
347         unsigned dyn_ubo_start =
348            panvk_pipeline_layout_ubo_start(pipeline->layout, s, true);
349
350         for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) {
351            const struct panvk_buffer_desc *bdesc =
352               &state->dyn.ubos[pipeline->layout->sets[s].dyn_ubo_offset + i];
353
354            mali_ptr address = panvk_buffer_gpu_ptr(bdesc->buffer,
355                                                    bdesc->offset);
356            size_t size = panvk_buffer_range(bdesc->buffer,
357                                             bdesc->offset, bdesc->size);
358            if (size) {
359               panvk_per_arch(emit_ubo)(address, size,
360                                        &ubos[dyn_ubo_start + i]);
361            } else {
362               memset(&ubos[dyn_ubo_start + i], 0, sizeof(*ubos));
363            }
364         }
365      }
366   }
367}
368
369void
370panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
371                                const struct panvk_draw_info *draw,
372                                void *job)
373{
374   void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
375
376   memcpy(section, &draw->invocation, pan_size(INVOCATION));
377
378   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
379      cfg.job_task_split = 5;
380   }
381
382   pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
383      cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
384      cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
385      cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs;
386      cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
387      cfg.varying_buffers = draw->varying_bufs;
388      cfg.thread_storage = draw->tls;
389      cfg.offset_start = draw->offset_start;
390      cfg.instance_size = draw->instance_count > 1 ?
391                          draw->padded_vertex_count : 1;
392      cfg.uniform_buffers = draw->ubos;
393      cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
394      cfg.textures = draw->textures;
395      cfg.samplers = draw->samplers;
396   }
397}
398
399void
400panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline,
401                                 const struct panvk_dispatch_info *dispatch,
402                                 void *job)
403{
404   panfrost_pack_work_groups_compute(pan_section_ptr(job, COMPUTE_JOB, INVOCATION),
405                                     dispatch->wg_count.x,
406                                     dispatch->wg_count.y,
407                                     dispatch->wg_count.z,
408                                     pipeline->cs.local_size.x,
409                                     pipeline->cs.local_size.y,
410                                     pipeline->cs.local_size.z,
411                                     false, false);
412
413   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
414      cfg.job_task_split =
415         util_logbase2_ceil(pipeline->cs.local_size.x + 1) +
416         util_logbase2_ceil(pipeline->cs.local_size.y + 1) +
417         util_logbase2_ceil(pipeline->cs.local_size.z + 1);
418   }
419
420   pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
421      cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE];
422      cfg.attributes = dispatch->attributes;
423      cfg.attribute_buffers = dispatch->attribute_bufs;
424      cfg.thread_storage = dispatch->tsd;
425      cfg.uniform_buffers = dispatch->ubos;
426      cfg.push_uniforms = dispatch->push_uniforms;
427      cfg.textures = dispatch->textures;
428      cfg.samplers = dispatch->samplers;
429   }
430}
431
432static void
433panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
434                           const struct panvk_draw_info *draw,
435                           void *prim)
436{
437   pan_pack(prim, PRIMITIVE, cfg) {
438      cfg.draw_mode = pipeline->ia.topology;
439      if (pipeline->ia.writes_point_size)
440         cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
441
442      cfg.first_provoking_vertex = true;
443      if (pipeline->ia.primitive_restart)
444         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
445      cfg.job_task_split = 6;
446
447      if (draw->index_size) {
448         cfg.index_count = draw->index_count;
449         cfg.indices = draw->indices;
450         cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
451
452         switch (draw->index_size) {
453         case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break;
454         case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break;
455         case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break;
456         default: unreachable("Invalid index size");
457         }
458      } else {
459         cfg.index_count = draw->vertex_count;
460         cfg.index_type = MALI_INDEX_TYPE_NONE;
461      }
462   }
463}
464
465static void
466panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
467                                const struct panvk_draw_info *draw,
468                                void *primsz)
469{
470   pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
471      if (pipeline->ia.writes_point_size) {
472         cfg.size_array = draw->psiz;
473      } else {
474         cfg.constant = draw->line_width;
475      }
476   }
477}
478
479static void
480panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
481                     const struct panvk_draw_info *draw,
482                     void *dcd)
483{
484   pan_pack(dcd, DRAW, cfg) {
485      cfg.front_face_ccw = pipeline->rast.front_ccw;
486      cfg.cull_front_face = pipeline->rast.cull_front_face;
487      cfg.cull_back_face = pipeline->rast.cull_back_face;
488      cfg.position = draw->position;
489      cfg.state = draw->fs_rsd;
490      cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
491      cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs;
492      cfg.viewport = draw->viewport;
493      cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
494      cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
495      cfg.thread_storage = draw->tls;
496
497      /* For all primitives but lines DRAW.flat_shading_vertex must
498       * be set to 0 and the provoking vertex is selected with the
499       * PRIMITIVE.first_provoking_vertex field.
500       */
501      if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
502          pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
503          pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
504         cfg.flat_shading_vertex = true;
505      }
506
507      cfg.offset_start = draw->offset_start;
508      cfg.instance_size = draw->instance_count > 1 ?
509                         draw->padded_vertex_count : 1;
510      cfg.uniform_buffers = draw->ubos;
511      cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
512      cfg.textures = draw->textures;
513      cfg.samplers = draw->samplers;
514
515      /* TODO: occlusion queries */
516   }
517}
518
519void
520panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
521                               const struct panvk_draw_info *draw,
522                               void *job)
523{
524   void *section;
525
526   section = pan_section_ptr(job, TILER_JOB, INVOCATION);
527   memcpy(section, &draw->invocation, pan_size(INVOCATION));
528
529   section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
530   panvk_emit_tiler_primitive(pipeline, draw, section);
531
532   section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
533   panvk_emit_tiler_primitive_size(pipeline, draw, section);
534
535   section = pan_section_ptr(job, TILER_JOB, DRAW);
536   panvk_emit_tiler_dcd(pipeline, draw, section);
537
538   pan_section_pack(job, TILER_JOB, TILER, cfg) {
539      cfg.address = draw->tiler_ctx->bifrost;
540   }
541   pan_section_pack(job, TILER_JOB, PADDING, padding);
542}
543
544void
545panvk_per_arch(emit_viewport)(const VkViewport *viewport,
546                              const VkRect2D *scissor,
547                              void *vpd)
548{
549   /* The spec says "width must be greater than 0.0" */
550   assert(viewport->x >= 0);
551   int minx = (int)viewport->x;
552   int maxx = (int)(viewport->x + viewport->width);
553
554   /* Viewport height can be negative */
555   int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
556   int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
557
558   assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
559   miny = MAX2(scissor->offset.x, minx);
560   miny = MAX2(scissor->offset.y, miny);
561   maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
562   maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
563
564   /* Make sure we don't end up with a max < min when width/height is 0 */
565   maxx = maxx > minx ? maxx - 1 : maxx;
566   maxy = maxy > miny ? maxy - 1 : maxy;
567
568   assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
569   assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
570
571   pan_pack(vpd, VIEWPORT, cfg) {
572      cfg.scissor_minimum_x = minx;
573      cfg.scissor_minimum_y = miny;
574      cfg.scissor_maximum_x = maxx;
575      cfg.scissor_maximum_y = maxy;
576      cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
577      cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
578   }
579}
580
581static enum mali_register_file_format
582bifrost_blend_type_from_nir(nir_alu_type nir_type)
583{
584   switch(nir_type) {
585   case 0: /* Render target not in use */
586      return 0;
587   case nir_type_float16:
588      return MALI_REGISTER_FILE_FORMAT_F16;
589   case nir_type_float32:
590      return MALI_REGISTER_FILE_FORMAT_F32;
591   case nir_type_int32:
592      return MALI_REGISTER_FILE_FORMAT_I32;
593   case nir_type_uint32:
594      return MALI_REGISTER_FILE_FORMAT_U32;
595   case nir_type_int16:
596      return MALI_REGISTER_FILE_FORMAT_I16;
597   case nir_type_uint16:
598      return MALI_REGISTER_FILE_FORMAT_U16;
599   default:
600      unreachable("Unsupported blend shader type for NIR alu type");
601   }
602}
603
604void
605panvk_per_arch(emit_blend)(const struct panvk_device *dev,
606                           const struct panvk_pipeline *pipeline,
607                           unsigned rt, void *bd)
608{
609   const struct pan_blend_state *blend = &pipeline->blend.state;
610   const struct pan_blend_rt_state *rts = &blend->rts[rt];
611   bool dithered = false;
612
613   pan_pack(bd, BLEND, cfg) {
614      if (!blend->rt_count || !rts->equation.color_mask) {
615         cfg.enable = false;
616         cfg.internal.mode = MALI_BLEND_MODE_OFF;
617         continue;
618      }
619
620      cfg.srgb = util_format_is_srgb(rts->format);
621      cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
622      cfg.round_to_fb_precision = !dithered;
623
624      const struct panfrost_device *pdev = &dev->physical_device->pdev;
625      const struct util_format_description *format_desc =
626         util_format_description(rts->format);
627      unsigned chan_size = 0;
628      for (unsigned i = 0; i < format_desc->nr_channels; i++)
629         chan_size = MAX2(format_desc->channel[i].size, chan_size);
630
631      pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
632                                           &cfg.equation);
633
634      /* Fixed point constant */
635      float fconst =
636         pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
637                                blend->constants);
638      u16 constant = fconst * ((1 << chan_size) - 1);
639      constant <<= 16 - chan_size;
640      cfg.constant = constant;
641
642      if (pan_blend_is_opaque(blend->rts[rt].equation)) {
643         cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
644      } else {
645         cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
646
647         cfg.internal.fixed_function.alpha_zero_nop =
648                 pan_blend_alpha_zero_nop(blend->rts[rt].equation);
649         cfg.internal.fixed_function.alpha_one_store =
650                 pan_blend_alpha_one_store(blend->rts[rt].equation);
651      }
652
653      /* If we want the conversion to work properly,
654       * num_comps must be set to 4
655       */
656      cfg.internal.fixed_function.num_comps = 4;
657      cfg.internal.fixed_function.conversion.memory_format =
658         panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
659      cfg.internal.fixed_function.conversion.register_format =
660         bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
661      cfg.internal.fixed_function.rt = rt;
662   }
663}
664
665void
666panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
667                                    const struct panvk_pipeline *pipeline,
668                                    unsigned rt, const float *constants,
669                                    void *bd)
670{
671   float constant = constants[pipeline->blend.constant[rt].index];
672
673   pan_pack(bd, BLEND, cfg) {
674      cfg.enable = false;
675      cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
676   }
677}
678
679void
680panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
681                                const struct panvk_cmd_state *state,
682                                void *rsd)
683{
684   pan_pack(rsd, RENDERER_STATE, cfg) {
685      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
686         cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
687         cfg.depth_factor = state->rast.depth_bias.slope_factor;
688         cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
689      }
690
691      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
692         cfg.stencil_front.mask = state->zs.s_front.compare_mask;
693         cfg.stencil_back.mask = state->zs.s_back.compare_mask;
694      }
695
696      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
697         cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
698         cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
699      }
700
701      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
702         cfg.stencil_front.reference_value = state->zs.s_front.ref;
703         cfg.stencil_back.reference_value = state->zs.s_back.ref;
704      }
705   }
706}
707
708void
709panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
710                                 const struct panvk_pipeline *pipeline,
711                                 void *rsd)
712{
713   const struct pan_shader_info *info = &pipeline->fs.info;
714
715   pan_pack(rsd, RENDERER_STATE, cfg) {
716      if (pipeline->fs.required) {
717         pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
718
719         uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
720         uint8_t rt_mask = pipeline->fs.rt_mask;
721         cfg.properties.allow_forward_pixel_to_kill =
722                 pipeline->fs.info.fs.can_fpk &&
723                 !(rt_mask & ~rt_written) &&
724                 !pipeline->ms.alpha_to_coverage &&
725                 !pipeline->blend.reads_dest;
726
727         bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test;
728         bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test;
729         bool oq = false; /* TODO: Occlusion queries */
730
731         struct pan_earlyzs_state earlyzs =
732            pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq,
733                            pipeline->ms.alpha_to_coverage, zs_always_passes);
734
735         cfg.properties.pixel_kill_operation = earlyzs.kill;
736         cfg.properties.zs_update_operation = earlyzs.update;
737      } else {
738         cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
739         cfg.properties.allow_forward_pixel_to_kill = true;
740         cfg.properties.allow_forward_pixel_to_be_killed = true;
741         cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
742      }
743
744      bool msaa = pipeline->ms.rast_samples > 1;
745      cfg.multisample_misc.multisample_enable = msaa;
746      cfg.multisample_misc.sample_mask =
747         msaa ? pipeline->ms.sample_mask : UINT16_MAX;
748
749      cfg.multisample_misc.depth_function =
750         pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
751
752      cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
753      cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
754      cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
755      cfg.multisample_misc.shader_depth_range_fixed = true;
756
757      cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
758      cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
759      cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
760      cfg.stencil_mask_misc.front_facing_depth_bias = pipeline->rast.depth_bias.enable;
761      cfg.stencil_mask_misc.back_facing_depth_bias = pipeline->rast.depth_bias.enable;
762      cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
763
764      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
765         cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
766         cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
767         cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
768      }
769
770      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
771         cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
772         cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
773      }
774
775      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
776         cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
777         cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
778      }
779
780      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
781         cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
782         cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
783      }
784
785      cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
786      cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
787      cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
788      cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
789      cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
790      cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
791      cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
792      cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
793   }
794}
795
796void
797panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
798                                const struct pan_shader_info *shader_info,
799                                mali_ptr shader_ptr,
800                                void *rsd)
801{
802   assert(shader_info->stage != MESA_SHADER_FRAGMENT);
803
804   pan_pack(rsd, RENDERER_STATE, cfg) {
805      pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
806   }
807}
808
809void
810panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
811                                   unsigned width, unsigned height,
812                                   const struct panfrost_ptr *descs)
813{
814   const struct panfrost_device *pdev = &dev->physical_device->pdev;
815
816   pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
817      cfg.size = pdev->tiler_heap->size;
818      cfg.base = pdev->tiler_heap->ptr.gpu;
819      cfg.bottom = pdev->tiler_heap->ptr.gpu;
820      cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
821   }
822
823   pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
824      cfg.hierarchy_mask = 0x28;
825      cfg.fb_width = width;
826      cfg.fb_height = height;
827      cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
828   }
829}
830