1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2010 Christoph Bumiller
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci#include "pipe/p_defines.h"
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
26bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h"
27bf215546Sopenharmony_ci#include "util/blob.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "nvc0/nvc0_context.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "nv50_ir_driver.h"
32bf215546Sopenharmony_ci#include "nvc0/nve4_compute.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
35bf215546Sopenharmony_ci * 124 scalar varying values.
36bf215546Sopenharmony_ci */
37bf215546Sopenharmony_cistatic uint32_t
38bf215546Sopenharmony_cinvc0_shader_input_address(unsigned sn, unsigned si)
39bf215546Sopenharmony_ci{
40bf215546Sopenharmony_ci   switch (sn) {
41bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TESSOUTER:    return 0x000 + si * 0x4;
42bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TESSINNER:    return 0x010 + si * 0x4;
43bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PATCH:        return 0x020 + si * 0x10;
44bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PRIMID:       return 0x060;
45bf215546Sopenharmony_ci   case TGSI_SEMANTIC_LAYER:        return 0x064;
46bf215546Sopenharmony_ci   case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
47bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PSIZE:        return 0x06c;
48bf215546Sopenharmony_ci   case TGSI_SEMANTIC_POSITION:     return 0x070;
49bf215546Sopenharmony_ci   case TGSI_SEMANTIC_GENERIC:      return 0x080 + si * 0x10;
50bf215546Sopenharmony_ci   case TGSI_SEMANTIC_FOG:          return 0x2e8;
51bf215546Sopenharmony_ci   case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
52bf215546Sopenharmony_ci   case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
53bf215546Sopenharmony_ci   case TGSI_SEMANTIC_CLIPDIST:     return 0x2c0 + si * 0x10;
54bf215546Sopenharmony_ci   case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
55bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PCOORD:       return 0x2e0;
56bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TESSCOORD:    return 0x2f0;
57bf215546Sopenharmony_ci   case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
58bf215546Sopenharmony_ci   case TGSI_SEMANTIC_VERTEXID:     return 0x2fc;
59bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TEXCOORD:     return 0x300 + si * 0x10;
60bf215546Sopenharmony_ci   default:
61bf215546Sopenharmony_ci      assert(!"invalid TGSI input semantic");
62bf215546Sopenharmony_ci      return ~0;
63bf215546Sopenharmony_ci   }
64bf215546Sopenharmony_ci}
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_cistatic uint32_t
67bf215546Sopenharmony_cinvc0_shader_output_address(unsigned sn, unsigned si)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   switch (sn) {
70bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TESSOUTER:     return 0x000 + si * 0x4;
71bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TESSINNER:     return 0x010 + si * 0x4;
72bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PATCH:         return 0x020 + si * 0x10;
73bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PRIMID:        return 0x060;
74bf215546Sopenharmony_ci   case TGSI_SEMANTIC_LAYER:         return 0x064;
75bf215546Sopenharmony_ci   case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
76bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PSIZE:         return 0x06c;
77bf215546Sopenharmony_ci   case TGSI_SEMANTIC_POSITION:      return 0x070;
78bf215546Sopenharmony_ci   case TGSI_SEMANTIC_GENERIC:       return 0x080 + si * 0x10;
79bf215546Sopenharmony_ci   case TGSI_SEMANTIC_FOG:           return 0x2e8;
80bf215546Sopenharmony_ci   case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
81bf215546Sopenharmony_ci   case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
82bf215546Sopenharmony_ci   case TGSI_SEMANTIC_CLIPDIST:      return 0x2c0 + si * 0x10;
83bf215546Sopenharmony_ci   case TGSI_SEMANTIC_CLIPVERTEX:    return 0x270;
84bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10;
85bf215546Sopenharmony_ci   case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0;
86bf215546Sopenharmony_ci   case TGSI_SEMANTIC_EDGEFLAG:      return ~0;
87bf215546Sopenharmony_ci   default:
88bf215546Sopenharmony_ci      assert(!"invalid TGSI output semantic");
89bf215546Sopenharmony_ci      return ~0;
90bf215546Sopenharmony_ci   }
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistatic int
94bf215546Sopenharmony_cinvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   unsigned i, c, n;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   for (n = 0, i = 0; i < info->numInputs; ++i) {
99bf215546Sopenharmony_ci      switch (info->in[i].sn) {
100bf215546Sopenharmony_ci      case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */
101bf215546Sopenharmony_ci      case TGSI_SEMANTIC_VERTEXID:
102bf215546Sopenharmony_ci         info->in[i].mask = 0x1;
103bf215546Sopenharmony_ci         info->in[i].slot[0] =
104bf215546Sopenharmony_ci            nvc0_shader_input_address(info->in[i].sn, 0) / 4;
105bf215546Sopenharmony_ci         continue;
106bf215546Sopenharmony_ci      default:
107bf215546Sopenharmony_ci         break;
108bf215546Sopenharmony_ci      }
109bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
110bf215546Sopenharmony_ci         info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4;
111bf215546Sopenharmony_ci      ++n;
112bf215546Sopenharmony_ci   }
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   return 0;
115bf215546Sopenharmony_ci}
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_cistatic int
118bf215546Sopenharmony_cinvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info)
119bf215546Sopenharmony_ci{
120bf215546Sopenharmony_ci   unsigned offset;
121bf215546Sopenharmony_ci   unsigned i, c;
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   for (i = 0; i < info->numInputs; ++i) {
124bf215546Sopenharmony_ci      offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
127bf215546Sopenharmony_ci         info->in[i].slot[c] = (offset + c * 0x4) / 4;
128bf215546Sopenharmony_ci   }
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   return 0;
131bf215546Sopenharmony_ci}
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_cistatic int
134bf215546Sopenharmony_cinvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info)
135bf215546Sopenharmony_ci{
136bf215546Sopenharmony_ci   unsigned count = info->prop.fp.numColourResults * 4;
137bf215546Sopenharmony_ci   unsigned i, c;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   /* Compute the relative position of each color output, since skipped MRT
140bf215546Sopenharmony_ci    * positions will not have registers allocated to them.
141bf215546Sopenharmony_ci    */
142bf215546Sopenharmony_ci   unsigned colors[8] = {0};
143bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i)
144bf215546Sopenharmony_ci      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
145bf215546Sopenharmony_ci         colors[info->out[i].si] = 1;
146bf215546Sopenharmony_ci   for (i = 0, c = 0; i < 8; i++)
147bf215546Sopenharmony_ci      if (colors[i])
148bf215546Sopenharmony_ci         colors[i] = c++;
149bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i)
150bf215546Sopenharmony_ci      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
151bf215546Sopenharmony_ci         for (c = 0; c < 4; ++c)
152bf215546Sopenharmony_ci            info->out[i].slot[c] = colors[info->out[i].si] * 4 + c;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
155bf215546Sopenharmony_ci      info->out[info->io.sampleMask].slot[0] = count++;
156bf215546Sopenharmony_ci   else
157bf215546Sopenharmony_ci   if (info->target >= 0xe0)
158bf215546Sopenharmony_ci      count++; /* on Kepler, depth is always last colour reg + 2 */
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
161bf215546Sopenharmony_ci      info->out[info->io.fragDepth].slot[2] = count;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   return 0;
164bf215546Sopenharmony_ci}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_cistatic int
167bf215546Sopenharmony_cinvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info)
168bf215546Sopenharmony_ci{
169bf215546Sopenharmony_ci   unsigned offset;
170bf215546Sopenharmony_ci   unsigned i, c;
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i) {
173bf215546Sopenharmony_ci      offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
176bf215546Sopenharmony_ci         info->out[i].slot[c] = (offset + c * 0x4) / 4;
177bf215546Sopenharmony_ci   }
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   return 0;
180bf215546Sopenharmony_ci}
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_cistatic int
183bf215546Sopenharmony_cinvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
184bf215546Sopenharmony_ci{
185bf215546Sopenharmony_ci   int ret;
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   if (info->type == PIPE_SHADER_VERTEX)
188bf215546Sopenharmony_ci      ret = nvc0_vp_assign_input_slots(info);
189bf215546Sopenharmony_ci   else
190bf215546Sopenharmony_ci      ret = nvc0_sp_assign_input_slots(info);
191bf215546Sopenharmony_ci   if (ret)
192bf215546Sopenharmony_ci      return ret;
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   if (info->type == PIPE_SHADER_FRAGMENT)
195bf215546Sopenharmony_ci      ret = nvc0_fp_assign_output_slots(info);
196bf215546Sopenharmony_ci   else
197bf215546Sopenharmony_ci      ret = nvc0_sp_assign_output_slots(info);
198bf215546Sopenharmony_ci   return ret;
199bf215546Sopenharmony_ci}
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_cistatic inline void
202bf215546Sopenharmony_cinvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci   uint8_t min = (vp->hdr[4] >> 12) & 0xff;
205bf215546Sopenharmony_ci   uint8_t max = (vp->hdr[4] >> 24);
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   min = MIN2(min, slot);
208bf215546Sopenharmony_ci   max = MAX2(max, slot);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   vp->hdr[4] = (max << 24) | (min << 12);
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci/* Common part of header generation for VP, TCP, TEP and GP. */
214bf215546Sopenharmony_cistatic int
215bf215546Sopenharmony_cinvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info_out *info)
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   unsigned i, c, a;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   for (i = 0; i < info->numInputs; ++i) {
220bf215546Sopenharmony_ci      if (info->in[i].patch)
221bf215546Sopenharmony_ci         continue;
222bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c) {
223bf215546Sopenharmony_ci         a = info->in[i].slot[c];
224bf215546Sopenharmony_ci         if (info->in[i].mask & (1 << c))
225bf215546Sopenharmony_ci            vp->hdr[5 + a / 32] |= 1 << (a % 32);
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci   }
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i) {
230bf215546Sopenharmony_ci      if (info->out[i].patch)
231bf215546Sopenharmony_ci         continue;
232bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c) {
233bf215546Sopenharmony_ci         if (!(info->out[i].mask & (1 << c)))
234bf215546Sopenharmony_ci            continue;
235bf215546Sopenharmony_ci         assert(info->out[i].slot[c] >= 0x40 / 4);
236bf215546Sopenharmony_ci         a = info->out[i].slot[c] - 0x40 / 4;
237bf215546Sopenharmony_ci         vp->hdr[13 + a / 32] |= 1 << (a % 32);
238bf215546Sopenharmony_ci         if (info->out[i].oread)
239bf215546Sopenharmony_ci            nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
240bf215546Sopenharmony_ci      }
241bf215546Sopenharmony_ci   }
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   for (i = 0; i < info->numSysVals; ++i) {
244bf215546Sopenharmony_ci      switch (info->sv[i].sn) {
245bf215546Sopenharmony_ci      case TGSI_SEMANTIC_PRIMID:
246bf215546Sopenharmony_ci         vp->hdr[5] |= 1 << 24;
247bf215546Sopenharmony_ci         break;
248bf215546Sopenharmony_ci      case TGSI_SEMANTIC_INSTANCEID:
249bf215546Sopenharmony_ci         vp->hdr[10] |= 1 << 30;
250bf215546Sopenharmony_ci         break;
251bf215546Sopenharmony_ci      case TGSI_SEMANTIC_VERTEXID:
252bf215546Sopenharmony_ci         vp->hdr[10] |= 1 << 31;
253bf215546Sopenharmony_ci         break;
254bf215546Sopenharmony_ci      case TGSI_SEMANTIC_TESSCOORD:
255bf215546Sopenharmony_ci         /* We don't have the mask, nor the slots populated. While this could
256bf215546Sopenharmony_ci          * be achieved, the vast majority of the time if either of the coords
257bf215546Sopenharmony_ci          * are read, then both will be read.
258bf215546Sopenharmony_ci          */
259bf215546Sopenharmony_ci         nvc0_vtgp_hdr_update_oread(vp, 0x2f0 / 4);
260bf215546Sopenharmony_ci         nvc0_vtgp_hdr_update_oread(vp, 0x2f4 / 4);
261bf215546Sopenharmony_ci         break;
262bf215546Sopenharmony_ci      default:
263bf215546Sopenharmony_ci         break;
264bf215546Sopenharmony_ci      }
265bf215546Sopenharmony_ci   }
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   vp->vp.clip_enable = (1 << info->io.clipDistances) - 1;
268bf215546Sopenharmony_ci   vp->vp.cull_enable =
269bf215546Sopenharmony_ci      ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
270bf215546Sopenharmony_ci   for (i = 0; i < info->io.cullDistances; ++i)
271bf215546Sopenharmony_ci      vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   if (info->io.genUserClip < 0)
274bf215546Sopenharmony_ci      vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   vp->vp.layer_viewport_relative = info->io.layer_viewport_relative;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   return 0;
279bf215546Sopenharmony_ci}
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_cistatic int
282bf215546Sopenharmony_cinvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info_out *info)
283bf215546Sopenharmony_ci{
284bf215546Sopenharmony_ci   vp->hdr[0] = 0x20061 | (1 << 10);
285bf215546Sopenharmony_ci   vp->hdr[4] = 0xff000;
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   return nvc0_vtgp_gen_header(vp, info);
288bf215546Sopenharmony_ci}
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_cistatic void
291bf215546Sopenharmony_cinvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info_out *info)
292bf215546Sopenharmony_ci{
293bf215546Sopenharmony_ci   if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) {
294bf215546Sopenharmony_ci      tp->tp.tess_mode = ~0;
295bf215546Sopenharmony_ci      return;
296bf215546Sopenharmony_ci   }
297bf215546Sopenharmony_ci   switch (info->prop.tp.domain) {
298bf215546Sopenharmony_ci   case PIPE_PRIM_LINES:
299bf215546Sopenharmony_ci      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
300bf215546Sopenharmony_ci      break;
301bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLES:
302bf215546Sopenharmony_ci      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
303bf215546Sopenharmony_ci      break;
304bf215546Sopenharmony_ci   case PIPE_PRIM_QUADS:
305bf215546Sopenharmony_ci      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
306bf215546Sopenharmony_ci      break;
307bf215546Sopenharmony_ci   default:
308bf215546Sopenharmony_ci      tp->tp.tess_mode = ~0;
309bf215546Sopenharmony_ci      return;
310bf215546Sopenharmony_ci   }
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   /* It seems like lines want the "CW" bit to indicate they're connected, and
313bf215546Sopenharmony_ci    * spit out errors in dmesg when the "CONNECTED" bit is set.
314bf215546Sopenharmony_ci    */
315bf215546Sopenharmony_ci   if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) {
316bf215546Sopenharmony_ci      if (info->prop.tp.domain == PIPE_PRIM_LINES)
317bf215546Sopenharmony_ci         tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
318bf215546Sopenharmony_ci      else
319bf215546Sopenharmony_ci         tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
320bf215546Sopenharmony_ci   }
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   /* Winding only matters for triangles/quads, not lines. */
323bf215546Sopenharmony_ci   if (info->prop.tp.domain != PIPE_PRIM_LINES &&
324bf215546Sopenharmony_ci       info->prop.tp.outputPrim != PIPE_PRIM_POINTS &&
325bf215546Sopenharmony_ci       info->prop.tp.winding > 0)
326bf215546Sopenharmony_ci      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   switch (info->prop.tp.partitioning) {
329bf215546Sopenharmony_ci   case PIPE_TESS_SPACING_EQUAL:
330bf215546Sopenharmony_ci      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
331bf215546Sopenharmony_ci      break;
332bf215546Sopenharmony_ci   case PIPE_TESS_SPACING_FRACTIONAL_ODD:
333bf215546Sopenharmony_ci      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
334bf215546Sopenharmony_ci      break;
335bf215546Sopenharmony_ci   case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
336bf215546Sopenharmony_ci      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
337bf215546Sopenharmony_ci      break;
338bf215546Sopenharmony_ci   default:
339bf215546Sopenharmony_ci      assert(!"invalid tessellator partitioning");
340bf215546Sopenharmony_ci      break;
341bf215546Sopenharmony_ci   }
342bf215546Sopenharmony_ci}
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_cistatic int
345bf215546Sopenharmony_cinvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info_out *info)
346bf215546Sopenharmony_ci{
347bf215546Sopenharmony_ci   unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   if (info->numPatchConstants)
350bf215546Sopenharmony_ci      opcs = 8 + info->numPatchConstants * 4;
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci   tcp->hdr[0] = 0x20061 | (2 << 10);
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   tcp->hdr[1] = opcs << 24;
355bf215546Sopenharmony_ci   tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   nvc0_vtgp_gen_header(tcp, info);
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   if (info->target >= NVISA_GM107_CHIPSET) {
362bf215546Sopenharmony_ci      /* On GM107+, the number of output patch components has moved in the TCP
363bf215546Sopenharmony_ci       * header, but it seems like blob still also uses the old position.
364bf215546Sopenharmony_ci       * Also, the high 8-bits are located in between the min/max parallel
365bf215546Sopenharmony_ci       * field and has to be set after updating the outputs. */
366bf215546Sopenharmony_ci      tcp->hdr[3] = (opcs & 0x0f) << 28;
367bf215546Sopenharmony_ci      tcp->hdr[4] |= (opcs & 0xf0) << 16;
368bf215546Sopenharmony_ci   }
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   nvc0_tp_get_tess_mode(tcp, info);
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci   return 0;
373bf215546Sopenharmony_ci}
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_cistatic int
376bf215546Sopenharmony_cinvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info_out *info)
377bf215546Sopenharmony_ci{
378bf215546Sopenharmony_ci   tep->hdr[0] = 0x20061 | (3 << 10);
379bf215546Sopenharmony_ci   tep->hdr[4] = 0xff000;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   nvc0_vtgp_gen_header(tep, info);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   nvc0_tp_get_tess_mode(tep, info);
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   tep->hdr[18] |= 0x3 << 12; /* ? */
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   return 0;
388bf215546Sopenharmony_ci}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_cistatic int
391bf215546Sopenharmony_cinvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info_out *info)
392bf215546Sopenharmony_ci{
393bf215546Sopenharmony_ci   gp->hdr[0] = 0x20061 | (4 << 10);
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci   switch (info->prop.gp.outputPrim) {
398bf215546Sopenharmony_ci   case PIPE_PRIM_POINTS:
399bf215546Sopenharmony_ci      gp->hdr[3] = 0x01000000;
400bf215546Sopenharmony_ci      gp->hdr[0] |= 0xf0000000;
401bf215546Sopenharmony_ci      break;
402bf215546Sopenharmony_ci   case PIPE_PRIM_LINE_STRIP:
403bf215546Sopenharmony_ci      gp->hdr[3] = 0x06000000;
404bf215546Sopenharmony_ci      gp->hdr[0] |= 0x10000000;
405bf215546Sopenharmony_ci      break;
406bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLE_STRIP:
407bf215546Sopenharmony_ci      gp->hdr[3] = 0x07000000;
408bf215546Sopenharmony_ci      gp->hdr[0] |= 0x10000000;
409bf215546Sopenharmony_ci      break;
410bf215546Sopenharmony_ci   default:
411bf215546Sopenharmony_ci      assert(0);
412bf215546Sopenharmony_ci      break;
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   gp->hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024);
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   return nvc0_vtgp_gen_header(gp, info);
418bf215546Sopenharmony_ci}
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci#define NVC0_INTERP_FLAT          (1 << 0)
421bf215546Sopenharmony_ci#define NVC0_INTERP_PERSPECTIVE   (2 << 0)
422bf215546Sopenharmony_ci#define NVC0_INTERP_LINEAR        (3 << 0)
423bf215546Sopenharmony_ci#define NVC0_INTERP_CENTROID      (1 << 2)
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_cistatic uint8_t
426bf215546Sopenharmony_cinvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
427bf215546Sopenharmony_ci{
428bf215546Sopenharmony_ci   if (var->linear)
429bf215546Sopenharmony_ci      return NVC0_INTERP_LINEAR;
430bf215546Sopenharmony_ci   if (var->flat)
431bf215546Sopenharmony_ci      return NVC0_INTERP_FLAT;
432bf215546Sopenharmony_ci   return NVC0_INTERP_PERSPECTIVE;
433bf215546Sopenharmony_ci}
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_cistatic int
436bf215546Sopenharmony_cinvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info_out *info)
437bf215546Sopenharmony_ci{
438bf215546Sopenharmony_ci   unsigned i, c, a, m;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   /* just 00062 on Kepler */
441bf215546Sopenharmony_ci   fp->hdr[0] = 0x20062 | (5 << 10);
442bf215546Sopenharmony_ci   fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   if (info->prop.fp.usesDiscard)
445bf215546Sopenharmony_ci      fp->hdr[0] |= 0x8000;
446bf215546Sopenharmony_ci   if (!info->prop.fp.separateFragData)
447bf215546Sopenharmony_ci      fp->hdr[0] |= 0x4000;
448bf215546Sopenharmony_ci   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
449bf215546Sopenharmony_ci      fp->hdr[19] |= 0x1;
450bf215546Sopenharmony_ci   if (info->prop.fp.writesDepth) {
451bf215546Sopenharmony_ci      fp->hdr[19] |= 0x2;
452bf215546Sopenharmony_ci      fp->flags[0] = 0x11; /* deactivate ZCULL */
453bf215546Sopenharmony_ci   }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   for (i = 0; i < info->numInputs; ++i) {
456bf215546Sopenharmony_ci      m = nvc0_hdr_interp_mode(&info->in[i]);
457bf215546Sopenharmony_ci      if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
458bf215546Sopenharmony_ci         fp->fp.colors |= 1 << info->in[i].si;
459bf215546Sopenharmony_ci         if (info->in[i].sc)
460bf215546Sopenharmony_ci            fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
461bf215546Sopenharmony_ci      }
462bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c) {
463bf215546Sopenharmony_ci         if (!(info->in[i].mask & (1 << c)))
464bf215546Sopenharmony_ci            continue;
465bf215546Sopenharmony_ci         a = info->in[i].slot[c];
466bf215546Sopenharmony_ci         if (info->in[i].slot[0] >= (0x060 / 4) &&
467bf215546Sopenharmony_ci             info->in[i].slot[0] <= (0x07c / 4)) {
468bf215546Sopenharmony_ci            fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4));
469bf215546Sopenharmony_ci         } else
470bf215546Sopenharmony_ci         if (info->in[i].slot[0] >= (0x2c0 / 4) &&
471bf215546Sopenharmony_ci             info->in[i].slot[0] <= (0x2fc / 4)) {
472bf215546Sopenharmony_ci            fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
473bf215546Sopenharmony_ci         } else {
474bf215546Sopenharmony_ci            if (info->in[i].slot[c] < (0x040 / 4) ||
475bf215546Sopenharmony_ci                info->in[i].slot[c] > (0x380 / 4))
476bf215546Sopenharmony_ci               continue;
477bf215546Sopenharmony_ci            a *= 2;
478bf215546Sopenharmony_ci            if (info->in[i].slot[0] >= (0x300 / 4))
479bf215546Sopenharmony_ci               a -= 32;
480bf215546Sopenharmony_ci            fp->hdr[4 + a / 32] |= m << (a % 32);
481bf215546Sopenharmony_ci         }
482bf215546Sopenharmony_ci      }
483bf215546Sopenharmony_ci   }
484bf215546Sopenharmony_ci   /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
485bf215546Sopenharmony_ci   if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
486bf215546Sopenharmony_ci      fp->hdr[5] |= 0x30000000;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i) {
489bf215546Sopenharmony_ci      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
490bf215546Sopenharmony_ci         fp->hdr[18] |= 0xf << (4 * info->out[i].si);
491bf215546Sopenharmony_ci   }
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci   /* There are no "regular" attachments, but the shader still needs to be
494bf215546Sopenharmony_ci    * executed. It seems like it wants to think that it has some color
495bf215546Sopenharmony_ci    * outputs in order to actually run.
496bf215546Sopenharmony_ci    */
497bf215546Sopenharmony_ci   if (info->prop.fp.numColourResults == 0 && !info->prop.fp.writesDepth)
498bf215546Sopenharmony_ci      fp->hdr[18] |= 0xf;
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   fp->fp.early_z = info->prop.fp.earlyFragTests;
501bf215546Sopenharmony_ci   fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
502bf215546Sopenharmony_ci   fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
503bf215546Sopenharmony_ci   fp->fp.post_depth_coverage = info->prop.fp.postDepthCoverage;
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   /* Mark position xy and layer as read */
506bf215546Sopenharmony_ci   if (fp->fp.reads_framebuffer)
507bf215546Sopenharmony_ci      fp->hdr[5] |= 0x32000000;
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci   return 0;
510bf215546Sopenharmony_ci}
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_cistatic struct nvc0_transform_feedback_state *
513bf215546Sopenharmony_cinvc0_program_create_tfb_state(const struct nv50_ir_prog_info_out *info,
514bf215546Sopenharmony_ci                              const struct pipe_stream_output_info *pso)
515bf215546Sopenharmony_ci{
516bf215546Sopenharmony_ci   struct nvc0_transform_feedback_state *tfb;
517bf215546Sopenharmony_ci   unsigned b, i, c;
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci   tfb = MALLOC_STRUCT(nvc0_transform_feedback_state);
520bf215546Sopenharmony_ci   if (!tfb)
521bf215546Sopenharmony_ci      return NULL;
522bf215546Sopenharmony_ci   for (b = 0; b < 4; ++b) {
523bf215546Sopenharmony_ci      tfb->stride[b] = pso->stride[b] * 4;
524bf215546Sopenharmony_ci      tfb->varying_count[b] = 0;
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci   memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   for (i = 0; i < pso->num_outputs; ++i) {
529bf215546Sopenharmony_ci      unsigned s = pso->output[i].start_component;
530bf215546Sopenharmony_ci      unsigned p = pso->output[i].dst_offset;
531bf215546Sopenharmony_ci      const unsigned r = pso->output[i].register_index;
532bf215546Sopenharmony_ci      b = pso->output[i].output_buffer;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci      if (r >= info->numOutputs)
535bf215546Sopenharmony_ci         continue;
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci      for (c = 0; c < pso->output[i].num_components; ++c)
538bf215546Sopenharmony_ci         tfb->varying_index[b][p++] = info->out[r].slot[s + c];
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci      tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
541bf215546Sopenharmony_ci      tfb->stream[b] = pso->output[i].stream;
542bf215546Sopenharmony_ci   }
543bf215546Sopenharmony_ci   for (b = 0; b < 4; ++b) // zero unused indices (looks nicer)
544bf215546Sopenharmony_ci      for (c = tfb->varying_count[b]; c & 3; ++c)
545bf215546Sopenharmony_ci         tfb->varying_index[b][c] = 0;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci   return tfb;
548bf215546Sopenharmony_ci}
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci#ifndef NDEBUG
551bf215546Sopenharmony_cistatic void
552bf215546Sopenharmony_cinvc0_program_dump(struct nvc0_program *prog)
553bf215546Sopenharmony_ci{
554bf215546Sopenharmony_ci   unsigned pos;
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci   if (prog->type != PIPE_SHADER_COMPUTE) {
557bf215546Sopenharmony_ci      _debug_printf("dumping HDR for type %i\n", prog->type);
558bf215546Sopenharmony_ci      for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos)
559bf215546Sopenharmony_ci         _debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
560bf215546Sopenharmony_ci                      pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci   _debug_printf("shader binary code (0x%x bytes):", prog->code_size);
563bf215546Sopenharmony_ci   for (pos = 0; pos < prog->code_size / 4; ++pos) {
564bf215546Sopenharmony_ci      if ((pos % 8) == 0)
565bf215546Sopenharmony_ci         _debug_printf("\n");
566bf215546Sopenharmony_ci      _debug_printf("%08x ", prog->code[pos]);
567bf215546Sopenharmony_ci   }
568bf215546Sopenharmony_ci   _debug_printf("\n");
569bf215546Sopenharmony_ci}
570bf215546Sopenharmony_ci#endif
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_cibool
573bf215546Sopenharmony_cinvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
574bf215546Sopenharmony_ci                       struct disk_cache *disk_shader_cache,
575bf215546Sopenharmony_ci                       struct util_debug_callback *debug)
576bf215546Sopenharmony_ci{
577bf215546Sopenharmony_ci   struct blob blob;
578bf215546Sopenharmony_ci   size_t cache_size;
579bf215546Sopenharmony_ci   struct nv50_ir_prog_info *info;
580bf215546Sopenharmony_ci   struct nv50_ir_prog_info_out info_out = {};
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   int ret = 0;
583bf215546Sopenharmony_ci   cache_key key;
584bf215546Sopenharmony_ci   bool shader_loaded = false;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   info = CALLOC_STRUCT(nv50_ir_prog_info);
587bf215546Sopenharmony_ci   if (!info)
588bf215546Sopenharmony_ci      return false;
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   info->type = prog->type;
591bf215546Sopenharmony_ci   info->target = chipset;
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   info->bin.sourceRep = prog->pipe.type;
594bf215546Sopenharmony_ci   switch (prog->pipe.type) {
595bf215546Sopenharmony_ci   case PIPE_SHADER_IR_TGSI:
596bf215546Sopenharmony_ci      info->bin.source = (void *)prog->pipe.tokens;
597bf215546Sopenharmony_ci      break;
598bf215546Sopenharmony_ci   case PIPE_SHADER_IR_NIR:
599bf215546Sopenharmony_ci      info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir);
600bf215546Sopenharmony_ci      break;
601bf215546Sopenharmony_ci   default:
602bf215546Sopenharmony_ci      assert(!"unsupported IR!");
603bf215546Sopenharmony_ci      free(info);
604bf215546Sopenharmony_ci      return false;
605bf215546Sopenharmony_ci   }
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci#ifndef NDEBUG
608bf215546Sopenharmony_ci   info->target = debug_get_num_option("NV50_PROG_CHIPSET", chipset);
609bf215546Sopenharmony_ci   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
610bf215546Sopenharmony_ci   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
611bf215546Sopenharmony_ci   info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0);
612bf215546Sopenharmony_ci#else
613bf215546Sopenharmony_ci   info->optLevel = 3;
614bf215546Sopenharmony_ci#endif
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   info->bin.smemSize = prog->cp.smem_size;
617bf215546Sopenharmony_ci   info->io.genUserClip = prog->vp.num_ucps;
618bf215546Sopenharmony_ci   info->io.auxCBSlot = 15;
619bf215546Sopenharmony_ci   info->io.msInfoCBSlot = 15;
620bf215546Sopenharmony_ci   info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
621bf215546Sopenharmony_ci   info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
622bf215546Sopenharmony_ci   info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
623bf215546Sopenharmony_ci   info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
624bf215546Sopenharmony_ci   info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
625bf215546Sopenharmony_ci   if (info->target >= NVISA_GK104_CHIPSET) {
626bf215546Sopenharmony_ci      info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
627bf215546Sopenharmony_ci      info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
628bf215546Sopenharmony_ci      info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
629bf215546Sopenharmony_ci   }
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_COMPUTE) {
632bf215546Sopenharmony_ci      if (info->target >= NVISA_GK104_CHIPSET) {
633bf215546Sopenharmony_ci         info->io.auxCBSlot = 7;
634bf215546Sopenharmony_ci         info->io.msInfoCBSlot = 7;
635bf215546Sopenharmony_ci         info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
636bf215546Sopenharmony_ci      }
637bf215546Sopenharmony_ci      info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
638bf215546Sopenharmony_ci   } else {
639bf215546Sopenharmony_ci      info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
640bf215546Sopenharmony_ci   }
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   info->assignSlots = nvc0_program_assign_varying_slots;
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci   blob_init(&blob);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   if (disk_shader_cache) {
647bf215546Sopenharmony_ci      if (nv50_ir_prog_info_serialize(&blob, info)) {
648bf215546Sopenharmony_ci         void *cached_data = NULL;
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci         disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
651bf215546Sopenharmony_ci         cached_data = disk_cache_get(disk_shader_cache, key, &cache_size);
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci         if (cached_data && cache_size >= blob.size) { // blob.size is the size of serialized "info"
654bf215546Sopenharmony_ci            /* Blob contains only "info". In disk cache, "info_out" comes right after it */
655bf215546Sopenharmony_ci            size_t offset = blob.size;
656bf215546Sopenharmony_ci            if (nv50_ir_prog_info_out_deserialize(cached_data, cache_size, offset, &info_out))
657bf215546Sopenharmony_ci               shader_loaded = true;
658bf215546Sopenharmony_ci            else
659bf215546Sopenharmony_ci               debug_printf("WARNING: Couldn't deserialize shaders");
660bf215546Sopenharmony_ci         }
661bf215546Sopenharmony_ci         free(cached_data);
662bf215546Sopenharmony_ci      } else {
663bf215546Sopenharmony_ci         debug_printf("WARNING: Couldn't serialize input shaders");
664bf215546Sopenharmony_ci      }
665bf215546Sopenharmony_ci   }
666bf215546Sopenharmony_ci   if (!shader_loaded) {
667bf215546Sopenharmony_ci      cache_size = 0;
668bf215546Sopenharmony_ci      ret = nv50_ir_generate_code(info, &info_out);
669bf215546Sopenharmony_ci      if (ret) {
670bf215546Sopenharmony_ci         NOUVEAU_ERR("shader translation failed: %i\n", ret);
671bf215546Sopenharmony_ci         goto out;
672bf215546Sopenharmony_ci      }
673bf215546Sopenharmony_ci      if (disk_shader_cache) {
674bf215546Sopenharmony_ci         if (nv50_ir_prog_info_out_serialize(&blob, &info_out)) {
675bf215546Sopenharmony_ci            disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
676bf215546Sopenharmony_ci            cache_size = blob.size;
677bf215546Sopenharmony_ci         } else {
678bf215546Sopenharmony_ci            debug_printf("WARNING: Couldn't serialize shaders");
679bf215546Sopenharmony_ci         }
680bf215546Sopenharmony_ci      }
681bf215546Sopenharmony_ci   }
682bf215546Sopenharmony_ci   blob_finish(&blob);
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci   prog->code = info_out.bin.code;
685bf215546Sopenharmony_ci   prog->code_size = info_out.bin.codeSize;
686bf215546Sopenharmony_ci   prog->relocs = info_out.bin.relocData;
687bf215546Sopenharmony_ci   prog->fixups = info_out.bin.fixupData;
688bf215546Sopenharmony_ci   if (info_out.target >= NVISA_GV100_CHIPSET)
689bf215546Sopenharmony_ci      prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 256); //XXX: why?
690bf215546Sopenharmony_ci   else
691bf215546Sopenharmony_ci      prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1));
692bf215546Sopenharmony_ci   prog->cp.smem_size = info_out.bin.smemSize;
693bf215546Sopenharmony_ci   prog->num_barriers = info_out.numBarriers;
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci   prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS;
696bf215546Sopenharmony_ci   prog->vp.need_draw_parameters = info_out.prop.vp.usesDrawParameters;
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci   if (info_out.io.edgeFlagOut < PIPE_MAX_ATTRIBS)
699bf215546Sopenharmony_ci      info_out.out[info_out.io.edgeFlagOut].mask = 0; /* for headergen */
700bf215546Sopenharmony_ci   prog->vp.edgeflag = info_out.io.edgeFlagIn;
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   switch (prog->type) {
703bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
704bf215546Sopenharmony_ci      ret = nvc0_vp_gen_header(prog, &info_out);
705bf215546Sopenharmony_ci      break;
706bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
707bf215546Sopenharmony_ci      ret = nvc0_tcp_gen_header(prog, &info_out);
708bf215546Sopenharmony_ci      break;
709bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
710bf215546Sopenharmony_ci      ret = nvc0_tep_gen_header(prog, &info_out);
711bf215546Sopenharmony_ci      break;
712bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
713bf215546Sopenharmony_ci      ret = nvc0_gp_gen_header(prog, &info_out);
714bf215546Sopenharmony_ci      break;
715bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
716bf215546Sopenharmony_ci      ret = nvc0_fp_gen_header(prog, &info_out);
717bf215546Sopenharmony_ci      break;
718bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:
719bf215546Sopenharmony_ci      break;
720bf215546Sopenharmony_ci   default:
721bf215546Sopenharmony_ci      ret = -1;
722bf215546Sopenharmony_ci      NOUVEAU_ERR("unknown program type: %u\n", prog->type);
723bf215546Sopenharmony_ci      break;
724bf215546Sopenharmony_ci   }
725bf215546Sopenharmony_ci   if (ret)
726bf215546Sopenharmony_ci      goto out;
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci   if (info_out.bin.tlsSpace) {
729bf215546Sopenharmony_ci      assert(info_out.bin.tlsSpace < (1 << 24));
730bf215546Sopenharmony_ci      prog->hdr[0] |= 1 << 26;
731bf215546Sopenharmony_ci      prog->hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */
732bf215546Sopenharmony_ci      prog->need_tls = true;
733bf215546Sopenharmony_ci   }
734bf215546Sopenharmony_ci   /* TODO: factor 2 only needed where joinat/precont is used,
735bf215546Sopenharmony_ci    *       and we only have to count non-uniform branches
736bf215546Sopenharmony_ci    */
737bf215546Sopenharmony_ci   /*
738bf215546Sopenharmony_ci   if ((info->maxCFDepth * 2) > 16) {
739bf215546Sopenharmony_ci      prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
740bf215546Sopenharmony_ci      prog->need_tls = true;
741bf215546Sopenharmony_ci   }
742bf215546Sopenharmony_ci   */
743bf215546Sopenharmony_ci   if (info_out.io.globalAccess)
744bf215546Sopenharmony_ci      prog->hdr[0] |= 1 << 26;
745bf215546Sopenharmony_ci   if (info_out.io.globalAccess & 0x2)
746bf215546Sopenharmony_ci      prog->hdr[0] |= 1 << 16;
747bf215546Sopenharmony_ci   if (info_out.io.fp64)
748bf215546Sopenharmony_ci      prog->hdr[0] |= 1 << 27;
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   if (prog->pipe.stream_output.num_outputs)
751bf215546Sopenharmony_ci      prog->tfb = nvc0_program_create_tfb_state(&info_out,
752bf215546Sopenharmony_ci                                                &prog->pipe.stream_output);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   util_debug_message(debug, SHADER_INFO,
755bf215546Sopenharmony_ci                      "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d, cached: %zd",
756bf215546Sopenharmony_ci                      prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize,
757bf215546Sopenharmony_ci                      prog->num_gprs, info_out.bin.instructions,
758bf215546Sopenharmony_ci                      info_out.bin.codeSize, cache_size);
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci#ifndef NDEBUG
761bf215546Sopenharmony_ci   if (debug_get_option("NV50_PROG_CHIPSET", NULL) && info->dbgFlags)
762bf215546Sopenharmony_ci      nvc0_program_dump(prog);
763bf215546Sopenharmony_ci#endif
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ciout:
766bf215546Sopenharmony_ci   if (info->bin.sourceRep == PIPE_SHADER_IR_NIR)
767bf215546Sopenharmony_ci      ralloc_free((void *)info->bin.source);
768bf215546Sopenharmony_ci   FREE(info);
769bf215546Sopenharmony_ci   return !ret;
770bf215546Sopenharmony_ci}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_cistatic inline int
773bf215546Sopenharmony_cinvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
774bf215546Sopenharmony_ci{
775bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
776bf215546Sopenharmony_ci   const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
777bf215546Sopenharmony_ci   int ret;
778bf215546Sopenharmony_ci   uint32_t size = prog->code_size;
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   if (!is_cp) {
781bf215546Sopenharmony_ci      if (screen->eng3d->oclass < TU102_3D_CLASS)
782bf215546Sopenharmony_ci         size += GF100_SHADER_HEADER_SIZE;
783bf215546Sopenharmony_ci      else
784bf215546Sopenharmony_ci         size += TU102_SHADER_HEADER_SIZE;
785bf215546Sopenharmony_ci   }
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   /* On Fermi, SP_START_ID must be aligned to 0x40.
788bf215546Sopenharmony_ci    * On Kepler, the first instruction must be aligned to 0x80 because
789bf215546Sopenharmony_ci    * latency information is expected only at certain positions.
790bf215546Sopenharmony_ci    */
791bf215546Sopenharmony_ci   if (screen->base.class_3d >= NVE4_3D_CLASS)
792bf215546Sopenharmony_ci      size = size + (is_cp ? 0x40 : 0x70);
793bf215546Sopenharmony_ci   size = align(size, 0x40);
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
796bf215546Sopenharmony_ci   if (ret)
797bf215546Sopenharmony_ci      return ret;
798bf215546Sopenharmony_ci   prog->code_base = prog->mem->start;
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   if (!is_cp) {
801bf215546Sopenharmony_ci      if (screen->base.class_3d >= NVE4_3D_CLASS &&
802bf215546Sopenharmony_ci          screen->base.class_3d < TU102_3D_CLASS) {
803bf215546Sopenharmony_ci         switch (prog->mem->start & 0xff) {
804bf215546Sopenharmony_ci         case 0x40: prog->code_base += 0x70; break;
805bf215546Sopenharmony_ci         case 0x80: prog->code_base += 0x30; break;
806bf215546Sopenharmony_ci         case 0xc0: prog->code_base += 0x70; break;
807bf215546Sopenharmony_ci         default:
808bf215546Sopenharmony_ci            prog->code_base += 0x30;
809bf215546Sopenharmony_ci            assert((prog->mem->start & 0xff) == 0x00);
810bf215546Sopenharmony_ci            break;
811bf215546Sopenharmony_ci         }
812bf215546Sopenharmony_ci      }
813bf215546Sopenharmony_ci   } else {
814bf215546Sopenharmony_ci      if (screen->base.class_3d >= NVE4_3D_CLASS) {
815bf215546Sopenharmony_ci         if (prog->mem->start & 0x40)
816bf215546Sopenharmony_ci            prog->code_base += 0x40;
817bf215546Sopenharmony_ci         assert((prog->code_base & 0x7f) == 0x00);
818bf215546Sopenharmony_ci      }
819bf215546Sopenharmony_ci   }
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci   return 0;
822bf215546Sopenharmony_ci}
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_cistatic inline void
825bf215546Sopenharmony_cinvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
826bf215546Sopenharmony_ci{
827bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
828bf215546Sopenharmony_ci   const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
829bf215546Sopenharmony_ci   uint32_t code_pos = prog->code_base;
830bf215546Sopenharmony_ci   uint32_t size_sph = 0;
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   if (!is_cp) {
833bf215546Sopenharmony_ci      if (screen->eng3d->oclass < TU102_3D_CLASS)
834bf215546Sopenharmony_ci         size_sph = GF100_SHADER_HEADER_SIZE;
835bf215546Sopenharmony_ci      else
836bf215546Sopenharmony_ci         size_sph = TU102_SHADER_HEADER_SIZE;
837bf215546Sopenharmony_ci   }
838bf215546Sopenharmony_ci   code_pos += size_sph;
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci   if (prog->relocs)
841bf215546Sopenharmony_ci      nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
842bf215546Sopenharmony_ci                            screen->lib_code->start, 0);
843bf215546Sopenharmony_ci   if (prog->fixups) {
844bf215546Sopenharmony_ci      nv50_ir_apply_fixups(prog->fixups, prog->code,
845bf215546Sopenharmony_ci                           prog->fp.force_persample_interp,
846bf215546Sopenharmony_ci                           prog->fp.flatshade,
847bf215546Sopenharmony_ci                           0 /* alphatest */,
848bf215546Sopenharmony_ci                           prog->fp.msaa);
849bf215546Sopenharmony_ci      for (int i = 0; i < 2; i++) {
850bf215546Sopenharmony_ci         unsigned mask = prog->fp.color_interp[i] >> 4;
851bf215546Sopenharmony_ci         unsigned interp = prog->fp.color_interp[i] & 3;
852bf215546Sopenharmony_ci         if (!mask)
853bf215546Sopenharmony_ci            continue;
854bf215546Sopenharmony_ci         prog->hdr[14] &= ~(0xff << (8 * i));
855bf215546Sopenharmony_ci         if (prog->fp.flatshade)
856bf215546Sopenharmony_ci            interp = NVC0_INTERP_FLAT;
857bf215546Sopenharmony_ci         for (int c = 0; c < 4; c++)
858bf215546Sopenharmony_ci            if (mask & (1 << c))
859bf215546Sopenharmony_ci               prog->hdr[14] |= interp << (2 * (4 * i + c));
860bf215546Sopenharmony_ci      }
861bf215546Sopenharmony_ci   }
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   if (!is_cp)
864bf215546Sopenharmony_ci      nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
865bf215546Sopenharmony_ci                           NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
868bf215546Sopenharmony_ci                        NV_VRAM_DOMAIN(&screen->base), prog->code_size,
869bf215546Sopenharmony_ci                        prog->code);
870bf215546Sopenharmony_ci}
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_cibool
873bf215546Sopenharmony_cinvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
874bf215546Sopenharmony_ci{
875bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
876bf215546Sopenharmony_ci   const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
877bf215546Sopenharmony_ci   int ret;
878bf215546Sopenharmony_ci   uint32_t size = prog->code_size;
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   if (!is_cp) {
881bf215546Sopenharmony_ci      if (screen->eng3d->oclass < TU102_3D_CLASS)
882bf215546Sopenharmony_ci         size += GF100_SHADER_HEADER_SIZE;
883bf215546Sopenharmony_ci      else
884bf215546Sopenharmony_ci         size += TU102_SHADER_HEADER_SIZE;
885bf215546Sopenharmony_ci   }
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   ret = nvc0_program_alloc_code(nvc0, prog);
888bf215546Sopenharmony_ci   if (ret) {
889bf215546Sopenharmony_ci      struct nouveau_heap *heap = screen->text_heap;
890bf215546Sopenharmony_ci      struct nvc0_program *progs[] = { /* Sorted accordingly to SP_START_ID */
891bf215546Sopenharmony_ci         nvc0->compprog, nvc0->vertprog, nvc0->tctlprog,
892bf215546Sopenharmony_ci         nvc0->tevlprog, nvc0->gmtyprog, nvc0->fragprog
893bf215546Sopenharmony_ci      };
894bf215546Sopenharmony_ci
895bf215546Sopenharmony_ci      /* Note that the code library, which is allocated before anything else,
896bf215546Sopenharmony_ci       * does not have a priv pointer. We can stop once we hit it.
897bf215546Sopenharmony_ci       */
898bf215546Sopenharmony_ci      while (heap->next && heap->next->priv) {
899bf215546Sopenharmony_ci         struct nvc0_program *evict = heap->next->priv;
900bf215546Sopenharmony_ci         nouveau_heap_free(&evict->mem);
901bf215546Sopenharmony_ci      }
902bf215546Sopenharmony_ci      debug_printf("WARNING: out of code space, evicting all shaders.\n");
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci      /* Make sure to synchronize before deleting the code segment. */
905bf215546Sopenharmony_ci      IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci      if ((screen->text->size << 1) <= (1 << 23)) {
908bf215546Sopenharmony_ci         ret = nvc0_screen_resize_text_area(screen, screen->text->size << 1);
909bf215546Sopenharmony_ci         if (ret) {
910bf215546Sopenharmony_ci            NOUVEAU_ERR("Error allocating TEXT area: %d\n", ret);
911bf215546Sopenharmony_ci            return false;
912bf215546Sopenharmony_ci         }
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci         /* Re-upload the builtin function into the new code segment. */
915bf215546Sopenharmony_ci         nvc0_program_library_upload(nvc0);
916bf215546Sopenharmony_ci      }
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci      ret = nvc0_program_alloc_code(nvc0, prog);
919bf215546Sopenharmony_ci      if (ret) {
920bf215546Sopenharmony_ci         NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
921bf215546Sopenharmony_ci         return false;
922bf215546Sopenharmony_ci      }
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci      /* All currently bound shaders have to be reuploaded. */
925bf215546Sopenharmony_ci      for (int i = 0; i < ARRAY_SIZE(progs); i++) {
926bf215546Sopenharmony_ci         if (!progs[i] || progs[i] == prog)
927bf215546Sopenharmony_ci            continue;
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci         ret = nvc0_program_alloc_code(nvc0, progs[i]);
930bf215546Sopenharmony_ci         if (ret) {
931bf215546Sopenharmony_ci            NOUVEAU_ERR("failed to re-upload a shader after code eviction.\n");
932bf215546Sopenharmony_ci            return false;
933bf215546Sopenharmony_ci         }
934bf215546Sopenharmony_ci         nvc0_program_upload_code(nvc0, progs[i]);
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci         if (progs[i]->type == PIPE_SHADER_COMPUTE) {
937bf215546Sopenharmony_ci            /* Caches have to be invalidated but the CP_START_ID will be
938bf215546Sopenharmony_ci             * updated in the launch_grid functions. */
939bf215546Sopenharmony_ci            BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1);
940bf215546Sopenharmony_ci            PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE);
941bf215546Sopenharmony_ci         } else {
942bf215546Sopenharmony_ci            nvc0_program_sp_start_id(nvc0, i, progs[i]);
943bf215546Sopenharmony_ci         }
944bf215546Sopenharmony_ci      }
945bf215546Sopenharmony_ci   }
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci   nvc0_program_upload_code(nvc0, prog);
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci#ifndef NDEBUG
950bf215546Sopenharmony_ci   if (debug_get_bool_option("NV50_PROG_DEBUG", false))
951bf215546Sopenharmony_ci      nvc0_program_dump(prog);
952bf215546Sopenharmony_ci#endif
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci   BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
955bf215546Sopenharmony_ci   PUSH_DATA (nvc0->base.pushbuf, 0x1011);
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci   return true;
958bf215546Sopenharmony_ci}
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci/* Upload code for builtin functions like integer division emulation. */
961bf215546Sopenharmony_civoid
962bf215546Sopenharmony_cinvc0_program_library_upload(struct nvc0_context *nvc0)
963bf215546Sopenharmony_ci{
964bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
965bf215546Sopenharmony_ci   int ret;
966bf215546Sopenharmony_ci   uint32_t size;
967bf215546Sopenharmony_ci   const uint32_t *code;
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci   if (screen->lib_code)
970bf215546Sopenharmony_ci      return;
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
973bf215546Sopenharmony_ci   if (!size)
974bf215546Sopenharmony_ci      return;
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci   ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL,
977bf215546Sopenharmony_ci                            &screen->lib_code);
978bf215546Sopenharmony_ci   if (ret)
979bf215546Sopenharmony_ci      return;
980bf215546Sopenharmony_ci
981bf215546Sopenharmony_ci   nvc0->base.push_data(&nvc0->base,
982bf215546Sopenharmony_ci                        screen->text, screen->lib_code->start, NV_VRAM_DOMAIN(&screen->base),
983bf215546Sopenharmony_ci                        size, code);
984bf215546Sopenharmony_ci   /* no need for a memory barrier, will be emitted with first program */
985bf215546Sopenharmony_ci}
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_civoid
988bf215546Sopenharmony_cinvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
989bf215546Sopenharmony_ci{
990bf215546Sopenharmony_ci   const struct pipe_shader_state pipe = prog->pipe;
991bf215546Sopenharmony_ci   const ubyte type = prog->type;
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci   if (prog->mem)
994bf215546Sopenharmony_ci      nouveau_heap_free(&prog->mem);
995bf215546Sopenharmony_ci   FREE(prog->code); /* may be 0 for hardcoded shaders */
996bf215546Sopenharmony_ci   FREE(prog->relocs);
997bf215546Sopenharmony_ci   FREE(prog->fixups);
998bf215546Sopenharmony_ci   if (prog->tfb) {
999bf215546Sopenharmony_ci      if (nvc0->state.tfb == prog->tfb)
1000bf215546Sopenharmony_ci         nvc0->state.tfb = NULL;
1001bf215546Sopenharmony_ci      FREE(prog->tfb);
1002bf215546Sopenharmony_ci   }
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci   memset(prog, 0, sizeof(*prog));
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci   prog->pipe = pipe;
1007bf215546Sopenharmony_ci   prog->type = type;
1008bf215546Sopenharmony_ci}
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_civoid
1011bf215546Sopenharmony_cinvc0_program_init_tcp_empty(struct nvc0_context *nvc0)
1012bf215546Sopenharmony_ci{
1013bf215546Sopenharmony_ci   struct ureg_program *ureg;
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci   ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
1016bf215546Sopenharmony_ci   if (!ureg)
1017bf215546Sopenharmony_ci      return;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci   ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1);
1020bf215546Sopenharmony_ci   ureg_END(ureg);
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci   nvc0->tcp_empty = ureg_create_shader_and_destroy(ureg, &nvc0->base.pipe);
1023bf215546Sopenharmony_ci}
1024