1/**********************************************************
2 * Copyright 2008-2022 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26#include "util/u_bitmask.h"
27#include "util/u_memory.h"
28#include "util/format/u_format.h"
29#include "svga_context.h"
30#include "svga_cmd.h"
31#include "svga_format.h"
32#include "svga_shader.h"
33#include "svga_tgsi.h"
34#include "svga_resource_texture.h"
35#include "VGPU10ShaderTokens.h"
36#include "tgsi/tgsi_parse.h"
37#include "tgsi/tgsi_text.h"
38#include "nir/nir_to_tgsi.h"
39
40
41/**
42 * This bit isn't really used anywhere.  It only serves to help
43 * generate a unique "signature" for the vertex shader output bitmask.
44 * Shader input/output signatures are used to resolve shader linking
45 * issues.
46 */
47#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
48
49
50/**
51 * Use the shader info to generate a bitmask indicating which generic
52 * inputs are used by the shader.  A set bit indicates that GENERIC[i]
53 * is used.
54 */
55uint64_t
56svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
57{
58   unsigned i;
59   uint64_t mask = 0x0;
60
61   for (i = 0; i < info->num_inputs; i++) {
62      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
63         unsigned j = info->input_semantic_index[i];
64         assert(j < sizeof(mask) * 8);
65         mask |= ((uint64_t) 1) << j;
66      }
67   }
68
69   return mask;
70}
71
72
73/**
74 * Scan shader info to return a bitmask of written outputs.
75 */
76uint64_t
77svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
78{
79   unsigned i;
80   uint64_t mask = 0x0;
81
82   for (i = 0; i < info->num_outputs; i++) {
83      switch (info->output_semantic_name[i]) {
84      case TGSI_SEMANTIC_GENERIC:
85         {
86            unsigned j = info->output_semantic_index[i];
87            assert(j < sizeof(mask) * 8);
88            mask |= ((uint64_t) 1) << j;
89         }
90         break;
91      case TGSI_SEMANTIC_FOG:
92         mask |= FOG_GENERIC_BIT;
93         break;
94      }
95   }
96
97   return mask;
98}
99
100
101
102/**
103 * Given a mask of used generic variables (as returned by the above functions)
104 * fill in a table which maps those indexes to small integers.
105 * This table is used by the remap_generic_index() function in
106 * svga_tgsi_decl_sm30.c
107 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
108 * GENERIC[3] are used.  The remap_table will contain:
109 *   table[1] = 0;
110 *   table[3] = 1;
111 * The remaining table entries will be filled in with the next unused
112 * generic index (in this example, 2).
113 */
114void
115svga_remap_generics(uint64_t generics_mask,
116                    int8_t remap_table[MAX_GENERIC_VARYING])
117{
118   /* Note texcoord[0] is reserved so start at 1 */
119   unsigned count = 1, i;
120
121   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
122      remap_table[i] = -1;
123   }
124
125   /* for each bit set in generic_mask */
126   while (generics_mask) {
127      unsigned index = ffsll(generics_mask) - 1;
128      remap_table[index] = count++;
129      generics_mask &= ~((uint64_t) 1 << index);
130   }
131}
132
133
134/**
135 * Use the generic remap table to map a TGSI generic varying variable
136 * index to a small integer.  If the remapping table doesn't have a
137 * valid value for the given index (the table entry is -1) it means
138 * the fragment shader doesn't use that VS output.  Just allocate
139 * the next free value in that case.  Alternately, we could cull
140 * VS instructions that write to register, or replace the register
141 * with a dummy temp register.
142 * XXX TODO: we should do one of the later as it would save precious
143 * texcoord registers.
144 */
145int
146svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
147                         int generic_index)
148{
149   assert(generic_index < MAX_GENERIC_VARYING);
150
151   if (generic_index >= MAX_GENERIC_VARYING) {
152      /* just don't return a random/garbage value */
153      generic_index = MAX_GENERIC_VARYING - 1;
154   }
155
156   if (remap_table[generic_index] == -1) {
157      /* This is a VS output that has no matching PS input.  Find a
158       * free index.
159       */
160      int i, max = 0;
161      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
162         max = MAX2(max, remap_table[i]);
163      }
164      remap_table[generic_index] = max + 1;
165   }
166
167   return remap_table[generic_index];
168}
169
170static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
171   PIPE_SWIZZLE_X,
172   PIPE_SWIZZLE_Y,
173   PIPE_SWIZZLE_Z,
174   PIPE_SWIZZLE_W,
175   PIPE_SWIZZLE_0,
176   PIPE_SWIZZLE_1,
177   PIPE_SWIZZLE_NONE
178};
179
180static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
181   PIPE_SWIZZLE_X,
182   PIPE_SWIZZLE_Y,
183   PIPE_SWIZZLE_Z,
184   PIPE_SWIZZLE_1,
185   PIPE_SWIZZLE_0,
186   PIPE_SWIZZLE_1,
187   PIPE_SWIZZLE_NONE
188};
189
190static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
191   PIPE_SWIZZLE_0,
192   PIPE_SWIZZLE_0,
193   PIPE_SWIZZLE_0,
194   PIPE_SWIZZLE_X,
195   PIPE_SWIZZLE_0,
196   PIPE_SWIZZLE_1,
197   PIPE_SWIZZLE_NONE
198};
199
200static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
201   PIPE_SWIZZLE_X,
202   PIPE_SWIZZLE_X,
203   PIPE_SWIZZLE_X,
204   PIPE_SWIZZLE_X,
205   PIPE_SWIZZLE_0,
206   PIPE_SWIZZLE_1,
207   PIPE_SWIZZLE_NONE
208};
209
210static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
211   PIPE_SWIZZLE_X,
212   PIPE_SWIZZLE_X,
213   PIPE_SWIZZLE_X,
214   PIPE_SWIZZLE_1,
215   PIPE_SWIZZLE_0,
216   PIPE_SWIZZLE_1,
217   PIPE_SWIZZLE_NONE
218};
219
220static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
221   PIPE_SWIZZLE_X,
222   PIPE_SWIZZLE_X,
223   PIPE_SWIZZLE_X,
224   PIPE_SWIZZLE_Y,
225   PIPE_SWIZZLE_0,
226   PIPE_SWIZZLE_1,
227   PIPE_SWIZZLE_NONE
228};
229
230static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
231   PIPE_SWIZZLE_Y,
232   PIPE_SWIZZLE_Y,
233   PIPE_SWIZZLE_Y,
234   PIPE_SWIZZLE_Y,
235   PIPE_SWIZZLE_0,
236   PIPE_SWIZZLE_1,
237   PIPE_SWIZZLE_NONE
238};
239
240
241static VGPU10_RESOURCE_RETURN_TYPE
242vgpu10_return_type(enum pipe_format format)
243{
244   if (util_format_is_unorm(format))
245      return VGPU10_RETURN_TYPE_UNORM;
246   else if (util_format_is_snorm(format))
247      return VGPU10_RETURN_TYPE_SNORM;
248   else if (util_format_is_pure_uint(format))
249      return VGPU10_RETURN_TYPE_UINT;
250   else if (util_format_is_pure_sint(format))
251      return VGPU10_RETURN_TYPE_SINT;
252   else if (util_format_is_float(format))
253      return VGPU10_RETURN_TYPE_FLOAT;
254   else
255      return VGPU10_RETURN_TYPE_MAX;
256}
257
258
259/**
260 * A helper function to return TRUE if the specified format
261 * is a supported format for sample_c instruction.
262 */
263static bool
264isValidSampleCFormat(enum pipe_format format)
265{
266   return util_format_is_depth_or_stencil(format);
267}
268
269
270/**
271 * Initialize the shader-neutral fields of svga_compile_key from context
272 * state.  This is basically the texture-related state.
273 */
274void
275svga_init_shader_key_common(const struct svga_context *svga,
276                            enum pipe_shader_type shader_type,
277                            const struct svga_shader *shader,
278                            struct svga_compile_key *key)
279{
280   unsigned i, idx = 0;
281   unsigned sampler_slots = 0;
282
283   assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
284
285   /* In case the number of samplers and sampler_views doesn't match,
286    * loop over the upper of the two counts.
287    */
288   key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
289                            svga->curr.num_samplers[shader_type]);
290
291   key->num_samplers = 0;
292
293   /* Set sampler_state_mapping only if GL43 is supported and
294    * the number of samplers exceeds SVGA limit or the sampler state
295    * mapping env is set.
296    */
297   boolean sampler_state_mapping =
298      svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
299
300   key->sampler_state_mapping =
301      key->num_textures && sampler_state_mapping ? 1 : 0;
302
303   for (i = 0; i < key->num_textures; i++) {
304      struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
305      const struct svga_sampler_state
306         *sampler = svga->curr.sampler[shader_type][i];
307
308      if (view) {
309         assert(view->texture);
310
311         enum pipe_texture_target target = view->target;
312         assert(target < (1 << 4)); /* texture_target:4 */
313
314	 key->tex[i].target = target;
315	 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
316	 key->tex[i].sampler_view = 1;
317
318         /* 1D/2D array textures with one slice and cube map array textures
319          * with one cube are treated as non-arrays by the SVGA3D device.
320          * Set the is_array flag only if we know that we have more than 1
321          * element.  This will be used to select shader instruction/resource
322          * types during shader translation.
323          */
324         switch (target) {
325         case PIPE_TEXTURE_1D_ARRAY:
326         case PIPE_TEXTURE_2D_ARRAY:
327            key->tex[i].is_array = view->texture->array_size > 1;
328            break;
329         case PIPE_TEXTURE_CUBE_ARRAY:
330            key->tex[i].is_array = view->texture->array_size > 6;
331            break;
332         default:
333            ; /* nothing / silence compiler warning */
334         }
335
336         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
337         key->tex[i].num_samples = view->texture->nr_samples;
338
339         const enum pipe_swizzle *swizzle_tab;
340         if (target == PIPE_BUFFER) {
341            SVGA3dSurfaceFormat svga_format;
342            unsigned tf_flags;
343
344            assert(view->texture->target == PIPE_BUFFER);
345
346            /* Apply any special swizzle mask for the view format if needed */
347
348            svga_translate_texture_buffer_view_format(view->format,
349                                                      &svga_format, &tf_flags);
350            if (tf_flags & TF_000X)
351               swizzle_tab = set_000X;
352            else if (tf_flags & TF_XXXX)
353               swizzle_tab = set_XXXX;
354            else if (tf_flags & TF_XXX1)
355               swizzle_tab = set_XXX1;
356            else if (tf_flags & TF_XXXY)
357               swizzle_tab = set_XXXY;
358            else
359               swizzle_tab = copy_alpha;
360         }
361         else {
362            /* If we have a non-alpha view into an svga3d surface with an
363             * alpha channel, then explicitly set the alpha channel to 1
364             * when sampling. Note that we need to check the
365             * actual device format to cover also imported surface cases.
366             */
367            swizzle_tab =
368               (!util_format_has_alpha(view->format) &&
369                svga_texture_device_format_has_alpha(view->texture)) ?
370                set_alpha : copy_alpha;
371
372            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
373                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
374               swizzle_tab = set_alpha;
375
376            if (view->format == PIPE_FORMAT_X24S8_UINT ||
377                view->format == PIPE_FORMAT_X32_S8X24_UINT)
378               swizzle_tab = set_YYYY;
379
380            /* Save the compare function as we need to handle
381             * depth compare in the shader.
382             */
383            key->tex[i].compare_mode = sampler->compare_mode;
384            key->tex[i].compare_func = sampler->compare_func;
385
386            /* Set the compare_in_shader bit if the view format
387             * is not a supported format for shadow compare.
388             * In this case, we'll do the comparison in the shader.
389             */
390            if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
391                !isValidSampleCFormat(view->format)) {
392               key->tex[i].compare_in_shader = TRUE;
393            }
394         }
395
396         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
397         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
398         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
399         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
400      }
401      else {
402	 key->tex[i].sampler_view = 0;
403      }
404
405      if (sampler) {
406         if (!sampler->normalized_coords) {
407            if (view) {
408               assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
409               key->tex[i].width_height_idx = idx++;
410	    }
411            key->tex[i].unnormalized = TRUE;
412            ++key->num_unnormalized_coords;
413
414            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
415                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
416                key->tex[i].texel_bias = TRUE;
417            }
418         }
419
420         if (!sampler_state_mapping) {
421            /* Use the same index if sampler state mapping is not supported */
422            key->tex[i].sampler_index = i;
423            key->num_samplers = i + 1;
424         }
425         else {
426
427            /* The current samplers list can have redundant entries.
428             * In order to allow the number of bound samplers within the
429             * max limit supported by SVGA, we'll recreate the list with
430             * unique sampler state objects only.
431             */
432
433            /* Check to see if this sampler is already on the list.
434             * If so, set the sampler index of this sampler to the
435             * same sampler index.
436             */
437            for (unsigned j = 0; j <= i; j++) {
438               if (svga->curr.sampler[shader_type][j] == sampler) {
439
440                  if (!(sampler_slots & (1 << j))) {
441
442                     /* if this sampler is not added to the new list yet,
443                      * set its sampler index to the next sampler index,
444                      * increment the sampler count, and mark this
445                      * sampler as added to the list.
446                      */
447
448                     unsigned next_index =
449                        MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
450
451                     key->tex[i].sampler_index = next_index;
452                     key->num_samplers = next_index + 1;
453
454                     if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
455                        /* reserve one slot for the alternate sampler */
456                        key->num_samplers++;
457                     }
458
459                     sampler_slots |= (1 << j);
460                  }
461                  else {
462                     key->tex[i].sampler_index = key->tex[j].sampler_index;
463                  }
464                  break;
465               }
466            }
467         }
468      }
469   }
470
471   if (svga_have_gl43(svga)) {
472      if (shader->info.uses_images || shader->info.uses_hw_atomic ||
473          shader->info.uses_shader_buffers) {
474
475         /* Save the uavSpliceIndex which is the index used for the first uav
476          * in the draw pipeline. For compute, uavSpliceIndex is always 0.
477          */
478         if (shader_type != PIPE_SHADER_COMPUTE)
479            key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
480
481         unsigned uav_splice_index = key->uav_splice_index;
482
483         /* Also get the texture data type to be used in the uav declaration */
484         const struct svga_image_view *cur_image_view =
485            &svga->curr.image_views[shader_type][0];
486
487         for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
488              i++, cur_image_view++) {
489
490            struct pipe_resource *resource = cur_image_view->desc.resource;
491
492            if (resource) {
493               key->images[i].return_type =
494                  svga_get_texture_datatype(cur_image_view->desc.format);
495
496               key->images[i].is_array = resource->array_size > 1;
497
498               /* Save the image resource target in the shader key because
499                * for single layer image view, the resource target in the
500                * tgsi shader is changed to a different texture target.
501                */
502               key->images[i].resource_target = resource->target;
503               if (resource->target == PIPE_TEXTURE_3D ||
504                   resource->target == PIPE_TEXTURE_1D_ARRAY ||
505                   resource->target == PIPE_TEXTURE_2D_ARRAY ||
506                   resource->target == PIPE_TEXTURE_CUBE ||
507                   resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
508                  key->images[i].is_single_layer =
509                     cur_image_view->desc.u.tex.first_layer ==
510                     cur_image_view->desc.u.tex.last_layer;
511               }
512
513               key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
514            }
515            else
516               key->images[i].uav_index = SVGA3D_INVALID_ID;
517         }
518
519         const struct svga_shader_buffer *cur_sbuf =
520            &svga->curr.shader_buffers[shader_type][0];
521
522         for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
523              i++, cur_sbuf++) {
524
525            if (cur_sbuf->resource)
526               key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
527            else
528               key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
529         }
530
531         const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
532
533         for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
534              i++, cur_buf++) {
535
536            if (cur_buf->resource)
537               key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
538            else
539               key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
540         }
541
542         key->image_size_used = shader->info.uses_image_size;
543      }
544
545      /* Save info about which constant buffers are to be viewed
546       * as raw buffers in the shader key.
547       */
548      if (shader->info.const_buffers_declared &
549          svga->state.raw_constbufs[shader_type]) {
550         key->raw_buffers = svga->state.raw_constbufs[shader_type];
551
552         /* beginning index for srv for raw buffers */
553         key->srv_raw_buf_index = PIPE_MAX_SAMPLERS;
554      }
555   }
556
557   key->clamp_vertex_color = svga->curr.rast ?
558                             svga->curr.rast->templ.clamp_vertex_color : 0;
559}
560
561
562/** Search for a compiled shader variant with the same compile key */
563struct svga_shader_variant *
564svga_search_shader_key(const struct svga_shader *shader,
565                       const struct svga_compile_key *key)
566{
567   struct svga_shader_variant *variant = shader->variants;
568
569   assert(key);
570
571   for ( ; variant; variant = variant->next) {
572      if (svga_compile_keys_equal(key, &variant->key))
573         return variant;
574   }
575   return NULL;
576}
577
578/** Search for a shader with the same token key */
579struct svga_shader *
580svga_search_shader_token_key(struct svga_shader *pshader,
581                             const struct svga_token_key *key)
582{
583   struct svga_shader *shader = pshader;
584
585   assert(key);
586
587   for ( ; shader; shader = shader->next) {
588      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
589         return shader;
590   }
591   return NULL;
592}
593
594/**
595 * Helper function to define a gb shader for non-vgpu10 device
596 */
597static enum pipe_error
598define_gb_shader_vgpu9(struct svga_context *svga,
599                       struct svga_shader_variant *variant,
600                       unsigned codeLen)
601{
602   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
603   enum pipe_error ret;
604
605   /**
606    * Create gb memory for the shader and upload the shader code.
607    * Kernel module will allocate an id for the shader and issue
608    * the DefineGBShader command.
609    */
610   variant->gb_shader = sws->shader_create(sws, variant->type,
611                                           variant->tokens, codeLen);
612
613   svga->hud.shader_mem_used += codeLen;
614
615   if (!variant->gb_shader)
616      return PIPE_ERROR_OUT_OF_MEMORY;
617
618   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
619
620   return ret;
621}
622
623/**
624 * Helper function to define a gb shader for vgpu10 device
625 */
626static enum pipe_error
627define_gb_shader_vgpu10(struct svga_context *svga,
628                        struct svga_shader_variant *variant,
629                        unsigned codeLen)
630{
631   struct svga_winsys_context *swc = svga->swc;
632   enum pipe_error ret;
633   unsigned len = codeLen + variant->signatureLen;
634
635   /**
636    * Shaders in VGPU10 enabled device reside in the device COTable.
637    * SVGA driver will allocate an integer ID for the shader and
638    * issue DXDefineShader and DXBindShader commands.
639    */
640   variant->id = util_bitmask_add(svga->shader_id_bm);
641   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
642      return PIPE_ERROR_OUT_OF_MEMORY;
643   }
644
645   /* Create gb memory for the shader and upload the shader code */
646   variant->gb_shader = swc->shader_create(swc,
647                                           variant->id, variant->type,
648                                           variant->tokens, codeLen,
649                                           variant->signature,
650                                           variant->signatureLen);
651
652   svga->hud.shader_mem_used += len;
653
654   if (!variant->gb_shader) {
655      /* Free the shader ID */
656      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
657      goto fail_no_allocation;
658   }
659
660   /**
661    * Since we don't want to do any flush within state emission to avoid
662    * partial state in a command buffer, it's important to make sure that
663    * there is enough room to send both the DXDefineShader & DXBindShader
664    * commands in the same command buffer. So let's send both
665    * commands in one command reservation. If it fails, we'll undo
666    * the shader creation and return an error.
667    */
668   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
669                                           variant->id, variant->type,
670                                           len);
671
672   if (ret != PIPE_OK)
673      goto fail;
674
675   return PIPE_OK;
676
677fail:
678   swc->shader_destroy(swc, variant->gb_shader);
679   variant->gb_shader = NULL;
680
681fail_no_allocation:
682   util_bitmask_clear(svga->shader_id_bm, variant->id);
683   variant->id = UTIL_BITMASK_INVALID_INDEX;
684
685   return PIPE_ERROR_OUT_OF_MEMORY;
686}
687
688/**
689 * Issue the SVGA3D commands to define a new shader.
690 * \param variant  contains the shader tokens, etc.  The result->id field will
691 *                 be set here.
692 */
693enum pipe_error
694svga_define_shader(struct svga_context *svga,
695                   struct svga_shader_variant *variant)
696{
697   unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
698   enum pipe_error ret;
699
700   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
701
702   variant->id = UTIL_BITMASK_INVALID_INDEX;
703
704   if (svga_have_gb_objects(svga)) {
705      if (svga_have_vgpu10(svga))
706         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
707      else
708         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
709   }
710   else {
711      /* Allocate an integer ID for the shader */
712      variant->id = util_bitmask_add(svga->shader_id_bm);
713      if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
714         ret = PIPE_ERROR_OUT_OF_MEMORY;
715         goto done;
716      }
717
718      /* Issue SVGA3D device command to define the shader */
719      ret = SVGA3D_DefineShader(svga->swc,
720                                variant->id,
721                                variant->type,
722                                variant->tokens,
723                                codeLen);
724      if (ret != PIPE_OK) {
725         /* free the ID */
726         assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
727         util_bitmask_clear(svga->shader_id_bm, variant->id);
728         variant->id = UTIL_BITMASK_INVALID_INDEX;
729      }
730   }
731
732done:
733   SVGA_STATS_TIME_POP(svga_sws(svga));
734   return ret;
735}
736
737
738/**
739 * Issue the SVGA3D commands to set/bind a shader.
740 * \param result  the shader to bind.
741 */
742enum pipe_error
743svga_set_shader(struct svga_context *svga,
744                SVGA3dShaderType type,
745                struct svga_shader_variant *variant)
746{
747   enum pipe_error ret;
748   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
749
750   assert(type == SVGA3D_SHADERTYPE_VS ||
751          type == SVGA3D_SHADERTYPE_GS ||
752          type == SVGA3D_SHADERTYPE_PS ||
753          type == SVGA3D_SHADERTYPE_HS ||
754          type == SVGA3D_SHADERTYPE_DS ||
755          type == SVGA3D_SHADERTYPE_CS);
756
757   if (svga_have_gb_objects(svga)) {
758      struct svga_winsys_gb_shader *gbshader =
759         variant ? variant->gb_shader : NULL;
760
761      if (svga_have_vgpu10(svga))
762         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
763      else
764         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
765   }
766   else {
767      ret = SVGA3D_SetShader(svga->swc, type, id);
768   }
769
770   return ret;
771}
772
773
774struct svga_shader_variant *
775svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
776{
777   struct svga_shader_variant *variant;
778
779   switch (type) {
780   case PIPE_SHADER_FRAGMENT:
781      variant = CALLOC(1, sizeof(struct svga_fs_variant));
782      break;
783   case PIPE_SHADER_GEOMETRY:
784      variant = CALLOC(1, sizeof(struct svga_gs_variant));
785      break;
786   case PIPE_SHADER_VERTEX:
787      variant = CALLOC(1, sizeof(struct svga_vs_variant));
788      break;
789   case PIPE_SHADER_TESS_EVAL:
790      variant = CALLOC(1, sizeof(struct svga_tes_variant));
791      break;
792   case PIPE_SHADER_TESS_CTRL:
793      variant = CALLOC(1, sizeof(struct svga_tcs_variant));
794      break;
795   case PIPE_SHADER_COMPUTE:
796      variant = CALLOC(1, sizeof(struct svga_cs_variant));
797      break;
798   default:
799      return NULL;
800   }
801
802   if (variant) {
803      variant->type = svga_shader_type(type);
804      svga->hud.num_shaders++;
805   }
806   return variant;
807}
808
809
810void
811svga_destroy_shader_variant(struct svga_context *svga,
812                            struct svga_shader_variant *variant)
813{
814   if (svga_have_gb_objects(svga) && variant->gb_shader) {
815      if (svga_have_vgpu10(svga)) {
816         struct svga_winsys_context *swc = svga->swc;
817         swc->shader_destroy(swc, variant->gb_shader);
818         SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
819         util_bitmask_clear(svga->shader_id_bm, variant->id);
820      }
821      else {
822         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
823         sws->shader_destroy(sws, variant->gb_shader);
824      }
825      variant->gb_shader = NULL;
826   }
827   else {
828      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
829         SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
830                                               variant->type));
831         util_bitmask_clear(svga->shader_id_bm, variant->id);
832      }
833   }
834
835   FREE(variant->signature);
836   FREE((unsigned *)variant->tokens);
837   FREE(variant);
838
839   svga->hud.num_shaders--;
840}
841
842/*
843 * Rebind shaders.
844 * Called at the beginning of every new command buffer to ensure that
845 * shaders are properly paged-in. Instead of sending the SetShader
846 * command, this function sends a private allocation command to
847 * page in a shader. This avoids emitting redundant state to the device
848 * just to page in a resource.
849 */
850enum pipe_error
851svga_rebind_shaders(struct svga_context *svga)
852{
853   struct svga_winsys_context *swc = svga->swc;
854   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
855   enum pipe_error ret;
856
857   assert(svga_have_vgpu10(svga));
858
859   /**
860    * If the underlying winsys layer does not need resource rebinding,
861    * just clear the rebind flags and return.
862    */
863   if (swc->resource_rebind == NULL) {
864      svga->rebind.flags.vs = 0;
865      svga->rebind.flags.gs = 0;
866      svga->rebind.flags.fs = 0;
867      svga->rebind.flags.tcs = 0;
868      svga->rebind.flags.tes = 0;
869
870      return PIPE_OK;
871   }
872
873   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
874      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
875      if (ret != PIPE_OK)
876         return ret;
877   }
878   svga->rebind.flags.vs = 0;
879
880   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
881      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
882      if (ret != PIPE_OK)
883         return ret;
884   }
885   svga->rebind.flags.gs = 0;
886
887   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
888      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
889      if (ret != PIPE_OK)
890         return ret;
891   }
892   svga->rebind.flags.fs = 0;
893
894   if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
895      ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
896      if (ret != PIPE_OK)
897         return ret;
898   }
899   svga->rebind.flags.tcs = 0;
900
901   if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
902      ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
903      if (ret != PIPE_OK)
904         return ret;
905   }
906   svga->rebind.flags.tes = 0;
907
908   return PIPE_OK;
909}
910
911
912/**
913 * Helper function to create a shader object.
914 */
915struct svga_shader *
916svga_create_shader(struct pipe_context *pipe,
917                   const struct pipe_shader_state *templ,
918                   enum pipe_shader_type stage,
919                   unsigned shader_structlen)
920{
921   struct svga_context *svga = svga_context(pipe);
922   struct svga_shader *shader = CALLOC(1, shader_structlen);
923
924   if (shader == NULL)
925      return NULL;
926
927   shader->id = svga->debug.shader_id++;
928   shader->type = templ->type;
929   shader->stage = stage;
930
931   shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
932
933   if (shader->type == PIPE_SHADER_IR_TGSI) {
934      /* Collect basic info of the shader */
935      svga_tgsi_scan_shader(shader);
936   }
937   else {
938      debug_printf("Unexpected nir shader\n");
939      assert(0);
940   }
941
942   /* check for any stream output declarations */
943   if (templ->stream_output.num_outputs) {
944      shader->stream_output = svga_create_stream_output(svga, shader,
945                                                        &templ->stream_output);
946   }
947
948   return shader;
949}
950
951
952/**
953 * Helper function to compile a shader.
954 * Depending on the shader IR type, it calls the corresponding
955 * compile shader function.
956 */
957enum pipe_error
958svga_compile_shader(struct svga_context *svga,
959                    struct svga_shader *shader,
960                    const struct svga_compile_key *key,
961                    struct svga_shader_variant **out_variant)
962{
963   struct svga_shader_variant *variant = NULL;
964   enum pipe_error ret = PIPE_ERROR;
965
966   if (shader->type == PIPE_SHADER_IR_TGSI) {
967      variant = svga_tgsi_compile_shader(svga, shader, key);
968   } else {
969      debug_printf("Unexpected nir shader\n");
970      assert(0);
971   }
972
973   if (variant == NULL) {
974      if (shader->get_dummy_shader != NULL) {
975         debug_printf("Failed to compile shader, using dummy shader.\n");
976         variant = shader->get_dummy_shader(svga, shader, key);
977      }
978   }
979   else if (svga_shader_too_large(svga, variant)) {
980      /* too big, use shader */
981      if (shader->get_dummy_shader != NULL) {
982         debug_printf("Shader too large (%u bytes), using dummy shader.\n",
983                      (unsigned)(variant->nr_tokens
984                                 * sizeof(variant->tokens[0])));
985
986         /* Free the too-large variant */
987         svga_destroy_shader_variant(svga, variant);
988
989         /* Use simple pass-through shader instead */
990         variant = shader->get_dummy_shader(svga, shader, key);
991      }
992   }
993
994   if (variant == NULL)
995      return PIPE_ERROR;
996
997   ret = svga_define_shader(svga, variant);
998   if (ret != PIPE_OK) {
999      svga_destroy_shader_variant(svga, variant);
1000      return ret;
1001   }
1002
1003   *out_variant = variant;
1004
1005   /* insert variant at head of linked list */
1006   variant->next = shader->variants;
1007   shader->variants = variant;
1008
1009   return PIPE_OK;
1010}
1011