1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2013 Nouveau Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci *
22bf215546Sopenharmony_ci * Authors: Christoph Bumiller, Samuel Pitoiset
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "nvc0/nvc0_context.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "nvc0/nvc0_compute.xml.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ciint
30bf215546Sopenharmony_cinvc0_screen_compute_setup(struct nvc0_screen *screen,
31bf215546Sopenharmony_ci                          struct nouveau_pushbuf *push)
32bf215546Sopenharmony_ci{
33bf215546Sopenharmony_ci   struct nouveau_object *chan = screen->base.channel;
34bf215546Sopenharmony_ci   struct nouveau_device *dev = screen->base.device;
35bf215546Sopenharmony_ci   uint32_t obj_class;
36bf215546Sopenharmony_ci   int ret;
37bf215546Sopenharmony_ci   int i;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci   switch (dev->chipset & ~0xf) {
40bf215546Sopenharmony_ci   case 0xc0:
41bf215546Sopenharmony_ci   case 0xd0:
42bf215546Sopenharmony_ci      /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
43bf215546Sopenharmony_ci       * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
44bf215546Sopenharmony_ci      obj_class = NVC0_COMPUTE_CLASS;
45bf215546Sopenharmony_ci      break;
46bf215546Sopenharmony_ci   default:
47bf215546Sopenharmony_ci      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
48bf215546Sopenharmony_ci      return -1;
49bf215546Sopenharmony_ci   }
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
52bf215546Sopenharmony_ci                            &screen->compute);
53bf215546Sopenharmony_ci   if (ret) {
54bf215546Sopenharmony_ci      NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
55bf215546Sopenharmony_ci      return ret;
56bf215546Sopenharmony_ci   }
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
59bf215546Sopenharmony_ci   PUSH_DATA (push, screen->compute->oclass);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   /* hardware limit */
62bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
63bf215546Sopenharmony_ci   PUSH_DATA (push, screen->mp_count);
64bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
65bf215546Sopenharmony_ci   PUSH_DATA (push, 0xf);
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
68bf215546Sopenharmony_ci   PUSH_DATA (push, 0x8000);
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   /* global memory setup */
71bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
72bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
73bf215546Sopenharmony_ci   BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
74bf215546Sopenharmony_ci   for (i = 0; i <= 0xff; i++)
75bf215546Sopenharmony_ci      PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
76bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
77bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   /* local memory and cstack setup */
80bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
81bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls->offset);
82bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->offset);
83bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
84bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls->size);
85bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->size);
86bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
87bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
88bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
89bf215546Sopenharmony_ci   PUSH_DATA (push, 0xff << 24);
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   /* shared memory setup */
92bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
93bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
94bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
95bf215546Sopenharmony_ci   PUSH_DATA (push, 0xfe << 24);
96bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
97bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   /* code segment setup */
100bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
101bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->text->offset);
102bf215546Sopenharmony_ci   PUSH_DATA (push, screen->text->offset);
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci   /* textures */
105bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
106bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset);
107bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset);
108bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   /* samplers */
111bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
112bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset + 65536);
113bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset + 65536);
114bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   /* MS sample coordinate offsets */
117bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
118bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
119bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
120bf215546Sopenharmony_ci   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
121bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
122bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
123bf215546Sopenharmony_ci   PUSH_DATA (push, 0); /* 0 */
124bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
125bf215546Sopenharmony_ci   PUSH_DATA (push, 1); /* 1 */
126bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
127bf215546Sopenharmony_ci   PUSH_DATA (push, 0); /* 2 */
128bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
129bf215546Sopenharmony_ci   PUSH_DATA (push, 1); /* 3 */
130bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
131bf215546Sopenharmony_ci   PUSH_DATA (push, 2); /* 4 */
132bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
133bf215546Sopenharmony_ci   PUSH_DATA (push, 3); /* 5 */
134bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
135bf215546Sopenharmony_ci   PUSH_DATA (push, 2); /* 6 */
136bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
137bf215546Sopenharmony_ci   PUSH_DATA (push, 3); /* 7 */
138bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   return 0;
141bf215546Sopenharmony_ci}
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_cistatic void
144bf215546Sopenharmony_cinvc0_compute_validate_samplers(struct nvc0_context *nvc0)
145bf215546Sopenharmony_ci{
146bf215546Sopenharmony_ci   bool need_flush = nvc0_validate_tsc(nvc0, 5);
147bf215546Sopenharmony_ci   if (need_flush) {
148bf215546Sopenharmony_ci      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
149bf215546Sopenharmony_ci      PUSH_DATA (nvc0->base.pushbuf, 0);
150bf215546Sopenharmony_ci   }
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   /* Invalidate all 3D samplers because they are aliased. */
153bf215546Sopenharmony_ci   for (int s = 0; s < 5; s++)
154bf215546Sopenharmony_ci      nvc0->samplers_dirty[s] = ~0;
155bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
156bf215546Sopenharmony_ci}
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_cistatic void
159bf215546Sopenharmony_cinvc0_compute_validate_textures(struct nvc0_context *nvc0)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci   bool need_flush = nvc0_validate_tic(nvc0, 5);
162bf215546Sopenharmony_ci   if (need_flush) {
163bf215546Sopenharmony_ci      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
164bf215546Sopenharmony_ci      PUSH_DATA (nvc0->base.pushbuf, 0);
165bf215546Sopenharmony_ci   }
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   /* Invalidate all 3D textures because they are aliased. */
168bf215546Sopenharmony_ci   for (int s = 0; s < 5; s++) {
169bf215546Sopenharmony_ci      for (int i = 0; i < nvc0->num_textures[s]; i++)
170bf215546Sopenharmony_ci         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
171bf215546Sopenharmony_ci      nvc0->textures_dirty[s] = ~0;
172bf215546Sopenharmony_ci   }
173bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic inline void
177bf215546Sopenharmony_cinvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   int s;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
182bf215546Sopenharmony_ci   for (s = 0; s < 5; s++) {
183bf215546Sopenharmony_ci      nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
184bf215546Sopenharmony_ci      nvc0->state.uniform_buffer_bound[s] = false;
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
187bf215546Sopenharmony_ci}
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_cistatic void
190bf215546Sopenharmony_cinvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
191bf215546Sopenharmony_ci{
192bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
193bf215546Sopenharmony_ci   const int s = 5;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   while (nvc0->constbuf_dirty[s]) {
196bf215546Sopenharmony_ci      int i = ffs(nvc0->constbuf_dirty[s]) - 1;
197bf215546Sopenharmony_ci      nvc0->constbuf_dirty[s] &= ~(1 << i);
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci      if (nvc0->constbuf[s][i].user) {
200bf215546Sopenharmony_ci         struct nouveau_bo *bo = nvc0->screen->uniform_bo;
201bf215546Sopenharmony_ci         const unsigned base = NVC0_CB_USR_INFO(s);
202bf215546Sopenharmony_ci         const unsigned size = nvc0->constbuf[s][0].size;
203bf215546Sopenharmony_ci         assert(i == 0); /* we really only want OpenGL uniforms here */
204bf215546Sopenharmony_ci         assert(nvc0->constbuf[s][0].u.data);
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci         if (!nvc0->state.uniform_buffer_bound[s]) {
207bf215546Sopenharmony_ci            nvc0->state.uniform_buffer_bound[s] = true;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci            BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
210bf215546Sopenharmony_ci            PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
211bf215546Sopenharmony_ci            PUSH_DATAh(push, bo->offset + base);
212bf215546Sopenharmony_ci            PUSH_DATA (push, bo->offset + base);
213bf215546Sopenharmony_ci            BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
214bf215546Sopenharmony_ci            PUSH_DATA (push, (0 << 8) | 1);
215bf215546Sopenharmony_ci         }
216bf215546Sopenharmony_ci         nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
217bf215546Sopenharmony_ci                         base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
218bf215546Sopenharmony_ci                         nvc0->constbuf[s][0].u.data);
219bf215546Sopenharmony_ci      } else {
220bf215546Sopenharmony_ci         struct nv04_resource *res =
221bf215546Sopenharmony_ci            nv04_resource(nvc0->constbuf[s][i].u.buf);
222bf215546Sopenharmony_ci         if (res) {
223bf215546Sopenharmony_ci            BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
224bf215546Sopenharmony_ci            PUSH_DATA (push, nvc0->constbuf[s][i].size);
225bf215546Sopenharmony_ci            PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
226bf215546Sopenharmony_ci            PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
227bf215546Sopenharmony_ci            BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
228bf215546Sopenharmony_ci            PUSH_DATA (push, (i << 8) | 1);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci            BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci            res->cb_bindings[s] |= 1 << i;
233bf215546Sopenharmony_ci         } else {
234bf215546Sopenharmony_ci            BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
235bf215546Sopenharmony_ci            PUSH_DATA (push, (i << 8) | 0);
236bf215546Sopenharmony_ci         }
237bf215546Sopenharmony_ci         if (i == 0)
238bf215546Sopenharmony_ci            nvc0->state.uniform_buffer_bound[s] = false;
239bf215546Sopenharmony_ci      }
240bf215546Sopenharmony_ci   }
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   nvc0_compute_invalidate_constbufs(nvc0);
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
245bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_cistatic void
249bf215546Sopenharmony_cinvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
252bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
255bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
256bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
257bf215546Sopenharmony_ci   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
258bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
259bf215546Sopenharmony_ci   PUSH_DATA (push, (15 << 8) | 1);
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
262bf215546Sopenharmony_ci}
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_cistatic void
265bf215546Sopenharmony_cinvc0_compute_validate_buffers(struct nvc0_context *nvc0)
266bf215546Sopenharmony_ci{
267bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
268bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
269bf215546Sopenharmony_ci   const int s = 5;
270bf215546Sopenharmony_ci   int i;
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
273bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
274bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
275bf215546Sopenharmony_ci   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
276bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
277bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
280bf215546Sopenharmony_ci      if (nvc0->buffers[s][i].buffer) {
281bf215546Sopenharmony_ci         struct nv04_resource *res =
282bf215546Sopenharmony_ci            nv04_resource(nvc0->buffers[s][i].buffer);
283bf215546Sopenharmony_ci         PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
284bf215546Sopenharmony_ci         PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
285bf215546Sopenharmony_ci         PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
286bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
287bf215546Sopenharmony_ci         BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
288bf215546Sopenharmony_ci         util_range_add(&res->base, &res->valid_buffer_range,
289bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_offset,
290bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_offset +
291bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_size);
292bf215546Sopenharmony_ci      } else {
293bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
294bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
295bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
296bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
297bf215546Sopenharmony_ci      }
298bf215546Sopenharmony_ci   }
299bf215546Sopenharmony_ci}
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_civoid
302bf215546Sopenharmony_cinvc0_compute_validate_globals(struct nvc0_context *nvc0)
303bf215546Sopenharmony_ci{
304bf215546Sopenharmony_ci   unsigned i;
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci   for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
307bf215546Sopenharmony_ci        ++i) {
308bf215546Sopenharmony_ci      struct pipe_resource *res = *util_dynarray_element(
309bf215546Sopenharmony_ci         &nvc0->global_residents, struct pipe_resource *, i);
310bf215546Sopenharmony_ci      if (res)
311bf215546Sopenharmony_ci         nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
312bf215546Sopenharmony_ci                           nv04_resource(res), NOUVEAU_BO_RDWR);
313bf215546Sopenharmony_ci   }
314bf215546Sopenharmony_ci}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_cistatic inline void
317bf215546Sopenharmony_cinvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)
318bf215546Sopenharmony_ci{
319bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
320bf215546Sopenharmony_ci   int i;
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
323bf215546Sopenharmony_ci      if (s == 5)
324bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
325bf215546Sopenharmony_ci      else
326bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
327bf215546Sopenharmony_ci      PUSH_DATA(push, 0);
328bf215546Sopenharmony_ci      PUSH_DATA(push, 0);
329bf215546Sopenharmony_ci      PUSH_DATA(push, 0);
330bf215546Sopenharmony_ci      PUSH_DATA(push, 0);
331bf215546Sopenharmony_ci      PUSH_DATA(push, 0x14000);
332bf215546Sopenharmony_ci      PUSH_DATA(push, 0);
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci}
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_cistatic void
337bf215546Sopenharmony_cinvc0_compute_validate_surfaces(struct nvc0_context *nvc0)
338bf215546Sopenharmony_ci{
339bf215546Sopenharmony_ci   /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
340bf215546Sopenharmony_ci    * compute is probably not really necessary, but we didn't find any better
341bf215546Sopenharmony_ci    * solutions for now. This fixes some invalidation issues when compute and
342bf215546Sopenharmony_ci    * fragment shaders are used inside the same context. Anyway, we definitely
343bf215546Sopenharmony_ci    * have invalidation issues between 3D and CP for other resources like SSBO
344bf215546Sopenharmony_ci    * and atomic counters. */
345bf215546Sopenharmony_ci   nvc0_compute_invalidate_surfaces(nvc0, 4);
346bf215546Sopenharmony_ci   nvc0_compute_invalidate_surfaces(nvc0, 5);
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   nvc0_validate_suf(nvc0, 5);
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
351bf215546Sopenharmony_ci   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
352bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
353bf215546Sopenharmony_ci   nvc0->images_dirty[4] |= nvc0->images_valid[4];
354bf215546Sopenharmony_ci}
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_cistatic struct nvc0_state_validate
357bf215546Sopenharmony_civalidate_list_cp[] = {
358bf215546Sopenharmony_ci   { nvc0_compprog_validate,              NVC0_NEW_CP_PROGRAM     },
359bf215546Sopenharmony_ci   { nvc0_compute_validate_constbufs,     NVC0_NEW_CP_CONSTBUF    },
360bf215546Sopenharmony_ci   { nvc0_compute_validate_driverconst,   NVC0_NEW_CP_DRIVERCONST },
361bf215546Sopenharmony_ci   { nvc0_compute_validate_buffers,       NVC0_NEW_CP_BUFFERS     },
362bf215546Sopenharmony_ci   { nvc0_compute_validate_textures,      NVC0_NEW_CP_TEXTURES    },
363bf215546Sopenharmony_ci   { nvc0_compute_validate_samplers,      NVC0_NEW_CP_SAMPLERS    },
364bf215546Sopenharmony_ci   { nvc0_compute_validate_globals,       NVC0_NEW_CP_GLOBALS     },
365bf215546Sopenharmony_ci   { nvc0_compute_validate_surfaces,      NVC0_NEW_CP_SURFACES    },
366bf215546Sopenharmony_ci};
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_cistatic bool
369bf215546Sopenharmony_cinvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
370bf215546Sopenharmony_ci{
371bf215546Sopenharmony_ci   bool ret;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
374bf215546Sopenharmony_ci                             ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
375bf215546Sopenharmony_ci                             nvc0->bufctx_cp);
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   if (unlikely(nvc0->state.flushed))
378bf215546Sopenharmony_ci      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
379bf215546Sopenharmony_ci   return ret;
380bf215546Sopenharmony_ci}
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_cistatic void
383bf215546Sopenharmony_cinvc0_compute_upload_input(struct nvc0_context *nvc0,
384bf215546Sopenharmony_ci                          const struct pipe_grid_info *info)
385bf215546Sopenharmony_ci{
386bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
387bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
388bf215546Sopenharmony_ci   struct nvc0_program *cp = nvc0->compprog;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   if (cp->parm_size) {
391bf215546Sopenharmony_ci      struct nouveau_bo *bo = screen->uniform_bo;
392bf215546Sopenharmony_ci      const unsigned base = NVC0_CB_USR_INFO(5);
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
395bf215546Sopenharmony_ci      PUSH_DATA (push, align(cp->parm_size, 0x100));
396bf215546Sopenharmony_ci      PUSH_DATAh(push, bo->offset + base);
397bf215546Sopenharmony_ci      PUSH_DATA (push, bo->offset + base);
398bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
399bf215546Sopenharmony_ci      PUSH_DATA (push, (0 << 8) | 1);
400bf215546Sopenharmony_ci      /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
401bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
402bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
403bf215546Sopenharmony_ci      PUSH_DATAp(push, info->input, cp->parm_size / 4);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci      nvc0_compute_invalidate_constbufs(nvc0);
406bf215546Sopenharmony_ci   }
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
409bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
410bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
411bf215546Sopenharmony_ci   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
414bf215546Sopenharmony_ci   /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
415bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
416bf215546Sopenharmony_ci   PUSH_DATA (push, info->work_dim);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
419bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
420bf215546Sopenharmony_ci}
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_civoid
423bf215546Sopenharmony_cinvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
424bf215546Sopenharmony_ci{
425bf215546Sopenharmony_ci   struct nvc0_context *nvc0 = nvc0_context(pipe);
426bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
427bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
428bf215546Sopenharmony_ci   struct nvc0_program *cp = nvc0->compprog;
429bf215546Sopenharmony_ci   int ret;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci   ret = !nvc0_state_validate_cp(nvc0, ~0);
432bf215546Sopenharmony_ci   if (ret) {
433bf215546Sopenharmony_ci      NOUVEAU_ERR("Failed to launch grid !\n");
434bf215546Sopenharmony_ci      return;
435bf215546Sopenharmony_ci   }
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   nvc0_compute_upload_input(nvc0, info);
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
440bf215546Sopenharmony_ci   PUSH_DATA (push, cp->code_base);
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
443bf215546Sopenharmony_ci   PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10));
444bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
445bf215546Sopenharmony_ci   PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
448bf215546Sopenharmony_ci   PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
449bf215546Sopenharmony_ci   PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
450bf215546Sopenharmony_ci   PUSH_DATA (push, cp->num_barriers);
451bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
452bf215546Sopenharmony_ci   PUSH_DATA (push, cp->num_gprs);
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   /* launch preliminary setup */
455bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
456bf215546Sopenharmony_ci   PUSH_DATA (push, 0x1);
457bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
458bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
459bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
460bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   /* block setup */
463bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
464bf215546Sopenharmony_ci   PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
465bf215546Sopenharmony_ci   PUSH_DATA (push, info->block[2]);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   nouveau_pushbuf_space(push, 32, 2, 1);
468bf215546Sopenharmony_ci   PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   if (unlikely(info->indirect)) {
471bf215546Sopenharmony_ci      struct nv04_resource *res = nv04_resource(info->indirect);
472bf215546Sopenharmony_ci      uint32_t offset = res->offset + info->indirect_offset;
473bf215546Sopenharmony_ci      unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
476bf215546Sopenharmony_ci      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
477bf215546Sopenharmony_ci      nouveau_pushbuf_data(push, res->bo, offset,
478bf215546Sopenharmony_ci                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
479bf215546Sopenharmony_ci   } else {
480bf215546Sopenharmony_ci      /* grid setup */
481bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
482bf215546Sopenharmony_ci      PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
483bf215546Sopenharmony_ci      PUSH_DATA (push, info->grid[2]);
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci      /* kernel launching */
486bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
487bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
488bf215546Sopenharmony_ci      BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
489bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
490bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
491bf215546Sopenharmony_ci      PUSH_DATA (push, 0x1000);
492bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
493bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
494bf215546Sopenharmony_ci      BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
495bf215546Sopenharmony_ci      PUSH_DATA (push, 0x1);
496bf215546Sopenharmony_ci   }
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   /* TODO: Not sure if this is really necessary. */
499bf215546Sopenharmony_ci   nvc0_compute_invalidate_surfaces(nvc0, 5);
500bf215546Sopenharmony_ci   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
501bf215546Sopenharmony_ci   nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
502bf215546Sopenharmony_ci   nvc0->images_dirty[5] |= nvc0->images_valid[5];
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci   nvc0_update_compute_invocations_counter(nvc0, info);
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_cistatic void
508bf215546Sopenharmony_cinvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0,
509bf215546Sopenharmony_ci                                         const struct pipe_grid_info *info) {
510bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
511bf215546Sopenharmony_ci   struct nv04_resource *res = nv04_resource(info->indirect);
512bf215546Sopenharmony_ci   uint32_t offset = res->offset + info->indirect_offset;
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   nouveau_pushbuf_space(push, 16, 0, 8);
515bf215546Sopenharmony_ci   PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
516bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7);
517bf215546Sopenharmony_ci   PUSH_DATA(push, 6);
518bf215546Sopenharmony_ci   PUSH_DATA(push, info->block[0]);
519bf215546Sopenharmony_ci   PUSH_DATA(push, info->block[1]);
520bf215546Sopenharmony_ci   PUSH_DATA(push, info->block[2]);
521bf215546Sopenharmony_ci   nouveau_pushbuf_data(push, res->bo, offset,
522bf215546Sopenharmony_ci                        NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_civoid
526bf215546Sopenharmony_cinvc0_update_compute_invocations_counter(struct nvc0_context *nvc0,
527bf215546Sopenharmony_ci                                        const struct pipe_grid_info *info) {
528bf215546Sopenharmony_ci   if (unlikely(info->indirect)) {
529bf215546Sopenharmony_ci      nvc0_compute_update_indirect_invocations(nvc0, info);
530bf215546Sopenharmony_ci   } else {
531bf215546Sopenharmony_ci      uint64_t invocations = info->block[0] * info->block[1] * info->block[2];
532bf215546Sopenharmony_ci      invocations *= info->grid[0] * info->grid[1] * info->grid[2];
533bf215546Sopenharmony_ci      nvc0->compute_invocations += invocations;
534bf215546Sopenharmony_ci   }
535bf215546Sopenharmony_ci}
536