1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2010 Christoph Bumiller
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci#include "pipe/p_defines.h"
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "nv50/nv50_context.h"
28bf215546Sopenharmony_ci#include "nv50/nv50_program.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "nv50_ir_driver.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistatic inline unsigned
33bf215546Sopenharmony_cibitcount4(const uint32_t val)
34bf215546Sopenharmony_ci{
35bf215546Sopenharmony_ci   static const uint8_t cnt[16]
36bf215546Sopenharmony_ci   = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37bf215546Sopenharmony_ci   return cnt[val & 0xf];
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic int
41bf215546Sopenharmony_cinv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
44bf215546Sopenharmony_ci   unsigned i, n, c;
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci   n = 0;
47bf215546Sopenharmony_ci   for (i = 0; i < info->numInputs; ++i) {
48bf215546Sopenharmony_ci      prog->in[i].id = i;
49bf215546Sopenharmony_ci      prog->in[i].sn = info->in[i].sn;
50bf215546Sopenharmony_ci      prog->in[i].si = info->in[i].si;
51bf215546Sopenharmony_ci      prog->in[i].hw = n;
52bf215546Sopenharmony_ci      prog->in[i].mask = info->in[i].mask;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci      prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
57bf215546Sopenharmony_ci         if (info->in[i].mask & (1 << c))
58bf215546Sopenharmony_ci            info->in[i].slot[c] = n++;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci      if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
61bf215546Sopenharmony_ci         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
62bf215546Sopenharmony_ci   }
63bf215546Sopenharmony_ci   prog->in_nr = info->numInputs;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   for (i = 0; i < info->numSysVals; ++i) {
66bf215546Sopenharmony_ci      switch (info->sv[i].sn) {
67bf215546Sopenharmony_ci      case TGSI_SEMANTIC_INSTANCEID:
68bf215546Sopenharmony_ci         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
69bf215546Sopenharmony_ci         continue;
70bf215546Sopenharmony_ci      case TGSI_SEMANTIC_VERTEXID:
71bf215546Sopenharmony_ci         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
72bf215546Sopenharmony_ci         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
73bf215546Sopenharmony_ci         continue;
74bf215546Sopenharmony_ci      case TGSI_SEMANTIC_PRIMID:
75bf215546Sopenharmony_ci         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
76bf215546Sopenharmony_ci         break;
77bf215546Sopenharmony_ci      default:
78bf215546Sopenharmony_ci         break;
79bf215546Sopenharmony_ci      }
80bf215546Sopenharmony_ci   }
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   /*
83bf215546Sopenharmony_ci    * Corner case: VP has no inputs, but we will still need to submit data to
84bf215546Sopenharmony_ci    * draw it. HW will shout at us and won't draw anything if we don't enable
85bf215546Sopenharmony_ci    * any input, so let's just pretend it's the first one.
86bf215546Sopenharmony_ci    */
87bf215546Sopenharmony_ci   if (prog->vp.attrs[0] == 0 &&
88bf215546Sopenharmony_ci       prog->vp.attrs[1] == 0 &&
89bf215546Sopenharmony_ci       prog->vp.attrs[2] == 0)
90bf215546Sopenharmony_ci      prog->vp.attrs[0] |= 0xf;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   /* VertexID before InstanceID */
93bf215546Sopenharmony_ci   if (info->io.vertexId < info->numSysVals)
94bf215546Sopenharmony_ci      info->sv[info->io.vertexId].slot[0] = n++;
95bf215546Sopenharmony_ci   if (info->io.instanceId < info->numSysVals)
96bf215546Sopenharmony_ci      info->sv[info->io.instanceId].slot[0] = n++;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   n = 0;
99bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i) {
100bf215546Sopenharmony_ci      switch (info->out[i].sn) {
101bf215546Sopenharmony_ci      case TGSI_SEMANTIC_PSIZE:
102bf215546Sopenharmony_ci         prog->vp.psiz = i;
103bf215546Sopenharmony_ci         break;
104bf215546Sopenharmony_ci      case TGSI_SEMANTIC_CLIPDIST:
105bf215546Sopenharmony_ci         prog->vp.clpd[info->out[i].si] = n;
106bf215546Sopenharmony_ci         break;
107bf215546Sopenharmony_ci      case TGSI_SEMANTIC_EDGEFLAG:
108bf215546Sopenharmony_ci         prog->vp.edgeflag = i;
109bf215546Sopenharmony_ci         break;
110bf215546Sopenharmony_ci      case TGSI_SEMANTIC_BCOLOR:
111bf215546Sopenharmony_ci         prog->vp.bfc[info->out[i].si] = i;
112bf215546Sopenharmony_ci         break;
113bf215546Sopenharmony_ci      case TGSI_SEMANTIC_LAYER:
114bf215546Sopenharmony_ci         prog->gp.has_layer = true;
115bf215546Sopenharmony_ci         prog->gp.layerid = n;
116bf215546Sopenharmony_ci         break;
117bf215546Sopenharmony_ci      case TGSI_SEMANTIC_VIEWPORT_INDEX:
118bf215546Sopenharmony_ci         prog->gp.has_viewport = true;
119bf215546Sopenharmony_ci         prog->gp.viewportid = n;
120bf215546Sopenharmony_ci         break;
121bf215546Sopenharmony_ci      default:
122bf215546Sopenharmony_ci         break;
123bf215546Sopenharmony_ci      }
124bf215546Sopenharmony_ci      prog->out[i].id = i;
125bf215546Sopenharmony_ci      prog->out[i].sn = info->out[i].sn;
126bf215546Sopenharmony_ci      prog->out[i].si = info->out[i].si;
127bf215546Sopenharmony_ci      prog->out[i].hw = n;
128bf215546Sopenharmony_ci      prog->out[i].mask = info->out[i].mask;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
131bf215546Sopenharmony_ci         if (info->out[i].mask & (1 << c))
132bf215546Sopenharmony_ci            info->out[i].slot[c] = n++;
133bf215546Sopenharmony_ci   }
134bf215546Sopenharmony_ci   prog->out_nr = info->numOutputs;
135bf215546Sopenharmony_ci   prog->max_out = n;
136bf215546Sopenharmony_ci   if (!prog->max_out)
137bf215546Sopenharmony_ci      prog->max_out = 1;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   if (prog->vp.psiz < info->numOutputs)
140bf215546Sopenharmony_ci      prog->vp.psiz = prog->out[prog->vp.psiz].hw;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   return 0;
143bf215546Sopenharmony_ci}
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_cistatic int
146bf215546Sopenharmony_cinv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info)
147bf215546Sopenharmony_ci{
148bf215546Sopenharmony_ci   struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
149bf215546Sopenharmony_ci   unsigned i, n, m, c;
150bf215546Sopenharmony_ci   unsigned nvary;
151bf215546Sopenharmony_ci   unsigned nflat;
152bf215546Sopenharmony_ci   unsigned nintp = 0;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   /* count recorded non-flat inputs */
155bf215546Sopenharmony_ci   for (m = 0, i = 0; i < info->numInputs; ++i) {
156bf215546Sopenharmony_ci      switch (info->in[i].sn) {
157bf215546Sopenharmony_ci      case TGSI_SEMANTIC_POSITION:
158bf215546Sopenharmony_ci         continue;
159bf215546Sopenharmony_ci      default:
160bf215546Sopenharmony_ci         m += info->in[i].flat ? 0 : 1;
161bf215546Sopenharmony_ci         break;
162bf215546Sopenharmony_ci      }
163bf215546Sopenharmony_ci   }
164bf215546Sopenharmony_ci   /* careful: id may be != i in info->in[prog->in[i].id] */
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   /* Fill prog->in[] so that non-flat inputs are first and
167bf215546Sopenharmony_ci    * kick out special inputs that don't use the RESULT_MAP.
168bf215546Sopenharmony_ci    */
169bf215546Sopenharmony_ci   for (n = 0, i = 0; i < info->numInputs; ++i) {
170bf215546Sopenharmony_ci      if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
171bf215546Sopenharmony_ci         prog->fp.interp |= info->in[i].mask << 24;
172bf215546Sopenharmony_ci         for (c = 0; c < 4; ++c)
173bf215546Sopenharmony_ci            if (info->in[i].mask & (1 << c))
174bf215546Sopenharmony_ci               info->in[i].slot[c] = nintp++;
175bf215546Sopenharmony_ci      } else {
176bf215546Sopenharmony_ci         unsigned j = info->in[i].flat ? m++ : n++;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci         if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
179bf215546Sopenharmony_ci            prog->vp.bfc[info->in[i].si] = j;
180bf215546Sopenharmony_ci         else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
181bf215546Sopenharmony_ci            prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci         prog->in[j].id = i;
184bf215546Sopenharmony_ci         prog->in[j].mask = info->in[i].mask;
185bf215546Sopenharmony_ci         prog->in[j].sn = info->in[i].sn;
186bf215546Sopenharmony_ci         prog->in[j].si = info->in[i].si;
187bf215546Sopenharmony_ci         prog->in[j].linear = info->in[i].linear;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci         prog->in_nr++;
190bf215546Sopenharmony_ci      }
191bf215546Sopenharmony_ci   }
192bf215546Sopenharmony_ci   if (!(prog->fp.interp & (8 << 24))) {
193bf215546Sopenharmony_ci      ++nintp;
194bf215546Sopenharmony_ci      prog->fp.interp |= 8 << 24;
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   for (i = 0; i < prog->in_nr; ++i) {
198bf215546Sopenharmony_ci      int j = prog->in[i].id;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci      prog->in[i].hw = nintp;
201bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
202bf215546Sopenharmony_ci         if (prog->in[i].mask & (1 << c))
203bf215546Sopenharmony_ci            info->in[j].slot[c] = nintp++;
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci   /* (n == m) if m never increased, i.e. no flat inputs */
206bf215546Sopenharmony_ci   nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
207bf215546Sopenharmony_ci   nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
208bf215546Sopenharmony_ci   nvary = nintp - nflat;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
211bf215546Sopenharmony_ci   prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   /* put front/back colors right after HPOS */
214bf215546Sopenharmony_ci   prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
215bf215546Sopenharmony_ci   for (i = 0; i < 2; ++i)
216bf215546Sopenharmony_ci      if (prog->vp.bfc[i] < 0xff)
217bf215546Sopenharmony_ci         prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   /* FP outputs */
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   if (info->prop.fp.numColourResults > 1)
222bf215546Sopenharmony_ci      prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   for (i = 0; i < info->numOutputs; ++i) {
225bf215546Sopenharmony_ci      prog->out[i].id = i;
226bf215546Sopenharmony_ci      prog->out[i].sn = info->out[i].sn;
227bf215546Sopenharmony_ci      prog->out[i].si = info->out[i].si;
228bf215546Sopenharmony_ci      prog->out[i].mask = info->out[i].mask;
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      if (i == info->io.fragDepth || i == info->io.sampleMask)
231bf215546Sopenharmony_ci         continue;
232bf215546Sopenharmony_ci      prog->out[i].hw = info->out[i].si * 4;
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci      for (c = 0; c < 4; ++c)
235bf215546Sopenharmony_ci         info->out[i].slot[c] = prog->out[i].hw + c;
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci      prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
238bf215546Sopenharmony_ci   }
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
241bf215546Sopenharmony_ci      info->out[info->io.sampleMask].slot[0] = prog->max_out++;
242bf215546Sopenharmony_ci      prog->fp.has_samplemask = 1;
243bf215546Sopenharmony_ci   }
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
246bf215546Sopenharmony_ci      info->out[info->io.fragDepth].slot[2] = prog->max_out++;
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   if (!prog->max_out)
249bf215546Sopenharmony_ci      prog->max_out = 4;
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   return 0;
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_cistatic int
255bf215546Sopenharmony_cinv50_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
256bf215546Sopenharmony_ci{
257bf215546Sopenharmony_ci   switch (info->type) {
258bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
259bf215546Sopenharmony_ci      return nv50_vertprog_assign_slots(info);
260bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
261bf215546Sopenharmony_ci      return nv50_vertprog_assign_slots(info);
262bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
263bf215546Sopenharmony_ci      return nv50_fragprog_assign_slots(info);
264bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:
265bf215546Sopenharmony_ci      return 0;
266bf215546Sopenharmony_ci   default:
267bf215546Sopenharmony_ci      return -1;
268bf215546Sopenharmony_ci   }
269bf215546Sopenharmony_ci}
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_cistatic struct nv50_stream_output_state *
272bf215546Sopenharmony_cinv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info,
273bf215546Sopenharmony_ci                                  const struct pipe_stream_output_info *pso)
274bf215546Sopenharmony_ci{
275bf215546Sopenharmony_ci   struct nv50_stream_output_state *so;
276bf215546Sopenharmony_ci   unsigned b, i, c;
277bf215546Sopenharmony_ci   unsigned base[4];
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   so = MALLOC_STRUCT(nv50_stream_output_state);
280bf215546Sopenharmony_ci   if (!so)
281bf215546Sopenharmony_ci      return NULL;
282bf215546Sopenharmony_ci   memset(so->map, 0xff, sizeof(so->map));
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci   for (b = 0; b < 4; ++b)
285bf215546Sopenharmony_ci      so->num_attribs[b] = 0;
286bf215546Sopenharmony_ci   for (i = 0; i < pso->num_outputs; ++i) {
287bf215546Sopenharmony_ci      unsigned end =  pso->output[i].dst_offset + pso->output[i].num_components;
288bf215546Sopenharmony_ci      b = pso->output[i].output_buffer;
289bf215546Sopenharmony_ci      assert(b < 4);
290bf215546Sopenharmony_ci      so->num_attribs[b] = MAX2(so->num_attribs[b], end);
291bf215546Sopenharmony_ci   }
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   so->stride[0] = pso->stride[0] * 4;
296bf215546Sopenharmony_ci   base[0] = 0;
297bf215546Sopenharmony_ci   for (b = 1; b < 4; ++b) {
298bf215546Sopenharmony_ci      assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
299bf215546Sopenharmony_ci      so->stride[b] = so->num_attribs[b] * 4;
300bf215546Sopenharmony_ci      if (so->num_attribs[b])
301bf215546Sopenharmony_ci         so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
302bf215546Sopenharmony_ci      base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
303bf215546Sopenharmony_ci   }
304bf215546Sopenharmony_ci   if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
305bf215546Sopenharmony_ci      assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
306bf215546Sopenharmony_ci      so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   so->map_size = base[3] + so->num_attribs[3];
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   for (i = 0; i < pso->num_outputs; ++i) {
312bf215546Sopenharmony_ci      const unsigned s = pso->output[i].start_component;
313bf215546Sopenharmony_ci      const unsigned p = pso->output[i].dst_offset;
314bf215546Sopenharmony_ci      const unsigned r = pso->output[i].register_index;
315bf215546Sopenharmony_ci      b = pso->output[i].output_buffer;
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci      if (r >= info->numOutputs)
318bf215546Sopenharmony_ci         continue;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci      for (c = 0; c < pso->output[i].num_components; ++c)
321bf215546Sopenharmony_ci         so->map[base[b] + p + c] = info->out[r].slot[s + c];
322bf215546Sopenharmony_ci   }
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   return so;
325bf215546Sopenharmony_ci}
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_cibool
328bf215546Sopenharmony_cinv50_program_translate(struct nv50_program *prog, uint16_t chipset,
329bf215546Sopenharmony_ci                       struct util_debug_callback *debug)
330bf215546Sopenharmony_ci{
331bf215546Sopenharmony_ci   struct nv50_ir_prog_info *info;
332bf215546Sopenharmony_ci   struct nv50_ir_prog_info_out info_out = {};
333bf215546Sopenharmony_ci   int i, ret;
334bf215546Sopenharmony_ci   const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   info = CALLOC_STRUCT(nv50_ir_prog_info);
337bf215546Sopenharmony_ci   if (!info)
338bf215546Sopenharmony_ci      return false;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   info->type = prog->type;
341bf215546Sopenharmony_ci   info->target = chipset;
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   info->bin.sourceRep = prog->pipe.type;
344bf215546Sopenharmony_ci   switch (prog->pipe.type) {
345bf215546Sopenharmony_ci   case PIPE_SHADER_IR_TGSI:
346bf215546Sopenharmony_ci      info->bin.source = (void *)prog->pipe.tokens;
347bf215546Sopenharmony_ci      break;
348bf215546Sopenharmony_ci   case PIPE_SHADER_IR_NIR:
349bf215546Sopenharmony_ci      info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir);
350bf215546Sopenharmony_ci      break;
351bf215546Sopenharmony_ci   default:
352bf215546Sopenharmony_ci      assert(!"unsupported IR!");
353bf215546Sopenharmony_ci      free(info);
354bf215546Sopenharmony_ci      return false;
355bf215546Sopenharmony_ci   }
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   info->bin.smemSize = prog->cp.smem_size;
358bf215546Sopenharmony_ci   info->io.auxCBSlot = 15;
359bf215546Sopenharmony_ci   info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
360bf215546Sopenharmony_ci   info->io.genUserClip = prog->vp.clpd_nr;
361bf215546Sopenharmony_ci   if (prog->fp.alphatest)
362bf215546Sopenharmony_ci      info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
365bf215546Sopenharmony_ci   info->io.bufInfoBase = NV50_CB_AUX_BUF_INFO(0);
366bf215546Sopenharmony_ci   info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
367bf215546Sopenharmony_ci   info->io.msInfoCBSlot = 15;
368bf215546Sopenharmony_ci   info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   info->io.membarOffset = NV50_CB_AUX_MEMBAR_OFFSET;
371bf215546Sopenharmony_ci   info->io.gmemMembar = 15;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   info->assignSlots = nv50_program_assign_varying_slots;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   prog->vp.bfc[0] = 0xff;
376bf215546Sopenharmony_ci   prog->vp.bfc[1] = 0xff;
377bf215546Sopenharmony_ci   prog->vp.edgeflag = 0xff;
378bf215546Sopenharmony_ci   prog->vp.clpd[0] = map_undef;
379bf215546Sopenharmony_ci   prog->vp.clpd[1] = map_undef;
380bf215546Sopenharmony_ci   prog->vp.psiz = map_undef;
381bf215546Sopenharmony_ci   prog->gp.has_layer = 0;
382bf215546Sopenharmony_ci   prog->gp.has_viewport = 0;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_COMPUTE)
385bf215546Sopenharmony_ci      info->prop.cp.inputOffset = 0x14;
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   info_out.driverPriv = prog;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci#ifndef NDEBUG
390bf215546Sopenharmony_ci   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
391bf215546Sopenharmony_ci   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
392bf215546Sopenharmony_ci   info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0);
393bf215546Sopenharmony_ci#else
394bf215546Sopenharmony_ci   info->optLevel = 3;
395bf215546Sopenharmony_ci#endif
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci   ret = nv50_ir_generate_code(info, &info_out);
398bf215546Sopenharmony_ci   if (ret) {
399bf215546Sopenharmony_ci      NOUVEAU_ERR("shader translation failed: %i\n", ret);
400bf215546Sopenharmony_ci      goto out;
401bf215546Sopenharmony_ci   }
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci   prog->code = info_out.bin.code;
404bf215546Sopenharmony_ci   prog->code_size = info_out.bin.codeSize;
405bf215546Sopenharmony_ci   prog->fixups = info_out.bin.relocData;
406bf215546Sopenharmony_ci   prog->interps = info_out.bin.fixupData;
407bf215546Sopenharmony_ci   prog->max_gpr = MAX2(4, (info_out.bin.maxGPR >> 1) + 1);
408bf215546Sopenharmony_ci   prog->tls_space = info_out.bin.tlsSpace;
409bf215546Sopenharmony_ci   prog->cp.smem_size = info_out.bin.smemSize;
410bf215546Sopenharmony_ci   prog->mul_zero_wins = info->io.mul_zero_wins;
411bf215546Sopenharmony_ci   prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS;
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   prog->vp.clip_enable = (1 << info_out.io.clipDistances) - 1;
414bf215546Sopenharmony_ci   prog->vp.cull_enable =
415bf215546Sopenharmony_ci      ((1 << info_out.io.cullDistances) - 1) << info_out.io.clipDistances;
416bf215546Sopenharmony_ci   prog->vp.clip_mode = 0;
417bf215546Sopenharmony_ci   for (i = 0; i < info_out.io.cullDistances; ++i)
418bf215546Sopenharmony_ci      prog->vp.clip_mode |= 1 << ((info_out.io.clipDistances + i) * 4);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_FRAGMENT) {
421bf215546Sopenharmony_ci      if (info_out.prop.fp.writesDepth) {
422bf215546Sopenharmony_ci         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
423bf215546Sopenharmony_ci         prog->fp.flags[1] = 0x11;
424bf215546Sopenharmony_ci      }
425bf215546Sopenharmony_ci      if (info_out.prop.fp.usesDiscard)
426bf215546Sopenharmony_ci         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
427bf215546Sopenharmony_ci   } else
428bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_GEOMETRY) {
429bf215546Sopenharmony_ci      switch (info_out.prop.gp.outputPrim) {
430bf215546Sopenharmony_ci      case PIPE_PRIM_LINE_STRIP:
431bf215546Sopenharmony_ci         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
432bf215546Sopenharmony_ci         break;
433bf215546Sopenharmony_ci      case PIPE_PRIM_TRIANGLE_STRIP:
434bf215546Sopenharmony_ci         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
435bf215546Sopenharmony_ci         break;
436bf215546Sopenharmony_ci      case PIPE_PRIM_POINTS:
437bf215546Sopenharmony_ci      default:
438bf215546Sopenharmony_ci         assert(info_out.prop.gp.outputPrim == PIPE_PRIM_POINTS);
439bf215546Sopenharmony_ci         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
440bf215546Sopenharmony_ci         break;
441bf215546Sopenharmony_ci      }
442bf215546Sopenharmony_ci      prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024);
443bf215546Sopenharmony_ci   } else
444bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_COMPUTE) {
445bf215546Sopenharmony_ci      for (i = 0; i < NV50_MAX_GLOBALS; i++) {
446bf215546Sopenharmony_ci         prog->cp.gmem[i] = (struct nv50_gmem_state){
447bf215546Sopenharmony_ci            .valid = info_out.prop.cp.gmem[i].valid,
448bf215546Sopenharmony_ci            .image = info_out.prop.cp.gmem[i].image,
449bf215546Sopenharmony_ci            .slot  = info_out.prop.cp.gmem[i].slot
450bf215546Sopenharmony_ci         };
451bf215546Sopenharmony_ci      }
452bf215546Sopenharmony_ci   }
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   if (prog->pipe.stream_output.num_outputs)
455bf215546Sopenharmony_ci      prog->so = nv50_program_create_strmout_state(&info_out,
456bf215546Sopenharmony_ci                                                   &prog->pipe.stream_output);
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci   util_debug_message(debug, SHADER_INFO,
459bf215546Sopenharmony_ci                      "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, loops: %d, bytes: %d",
460bf215546Sopenharmony_ci                      prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize,
461bf215546Sopenharmony_ci                      prog->max_gpr, info_out.bin.instructions, info_out.loops,
462bf215546Sopenharmony_ci                      info_out.bin.codeSize);
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ciout:
465bf215546Sopenharmony_ci   if (info->bin.sourceRep == PIPE_SHADER_IR_NIR)
466bf215546Sopenharmony_ci      ralloc_free((void *)info->bin.source);
467bf215546Sopenharmony_ci   FREE(info);
468bf215546Sopenharmony_ci   return !ret;
469bf215546Sopenharmony_ci}
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_cibool
472bf215546Sopenharmony_cinv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
473bf215546Sopenharmony_ci{
474bf215546Sopenharmony_ci   struct nouveau_heap *heap;
475bf215546Sopenharmony_ci   int ret;
476bf215546Sopenharmony_ci   uint32_t size = align(prog->code_size, 0x40);
477bf215546Sopenharmony_ci   uint8_t prog_type;
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   switch (prog->type) {
480bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
481bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
482bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
483bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:  heap = nv50->screen->fp_code_heap; break;
484bf215546Sopenharmony_ci   default:
485bf215546Sopenharmony_ci      assert(!"invalid program type");
486bf215546Sopenharmony_ci      return false;
487bf215546Sopenharmony_ci   }
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
490bf215546Sopenharmony_ci   if (ret) {
491bf215546Sopenharmony_ci      /* Out of space: evict everything to compactify the code segment, hoping
492bf215546Sopenharmony_ci       * the working set is much smaller and drifts slowly. Improve me !
493bf215546Sopenharmony_ci       */
494bf215546Sopenharmony_ci      while (heap->next) {
495bf215546Sopenharmony_ci         struct nv50_program *evict = heap->next->priv;
496bf215546Sopenharmony_ci         if (evict)
497bf215546Sopenharmony_ci            nouveau_heap_free(&evict->mem);
498bf215546Sopenharmony_ci      }
499bf215546Sopenharmony_ci      debug_printf("WARNING: out of code space, evicting all shaders.\n");
500bf215546Sopenharmony_ci      ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
501bf215546Sopenharmony_ci      if (ret) {
502bf215546Sopenharmony_ci         NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
503bf215546Sopenharmony_ci         return false;
504bf215546Sopenharmony_ci      }
505bf215546Sopenharmony_ci   }
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci   if (prog->type == PIPE_SHADER_COMPUTE) {
508bf215546Sopenharmony_ci      /* CP code must be uploaded in FP code segment. */
509bf215546Sopenharmony_ci      prog_type = 1;
510bf215546Sopenharmony_ci   } else {
511bf215546Sopenharmony_ci      prog->code_base = prog->mem->start;
512bf215546Sopenharmony_ci      prog_type = prog->type;
513bf215546Sopenharmony_ci   }
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
516bf215546Sopenharmony_ci   if (ret < 0) {
517bf215546Sopenharmony_ci      nouveau_heap_free(&prog->mem);
518bf215546Sopenharmony_ci      return false;
519bf215546Sopenharmony_ci   }
520bf215546Sopenharmony_ci   if (ret > 0)
521bf215546Sopenharmony_ci      nv50->state.new_tls_space = true;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   if (prog->fixups)
524bf215546Sopenharmony_ci      nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
525bf215546Sopenharmony_ci   if (prog->interps)
526bf215546Sopenharmony_ci      nv50_ir_apply_fixups(prog->interps, prog->code,
527bf215546Sopenharmony_ci                           prog->fp.force_persample_interp,
528bf215546Sopenharmony_ci                           false /* flatshade */,
529bf215546Sopenharmony_ci                           prog->fp.alphatest - 1,
530bf215546Sopenharmony_ci                           false /* msaa */);
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
533bf215546Sopenharmony_ci                       (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
534bf215546Sopenharmony_ci                       NOUVEAU_BO_VRAM, prog->code_size, prog->code);
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci   BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
537bf215546Sopenharmony_ci   PUSH_DATA (nv50->base.pushbuf, 0);
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci   return true;
540bf215546Sopenharmony_ci}
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_civoid
543bf215546Sopenharmony_cinv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
544bf215546Sopenharmony_ci{
545bf215546Sopenharmony_ci   const struct pipe_shader_state pipe = p->pipe;
546bf215546Sopenharmony_ci   const ubyte type = p->type;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   if (p->mem)
549bf215546Sopenharmony_ci      nouveau_heap_free(&p->mem);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   FREE(p->code);
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   FREE(p->fixups);
554bf215546Sopenharmony_ci   FREE(p->interps);
555bf215546Sopenharmony_ci   FREE(p->so);
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   memset(p, 0, sizeof(*p));
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   p->pipe = pipe;
560bf215546Sopenharmony_ci   p->type = type;
561bf215546Sopenharmony_ci}
562