1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2012 Francisco Jerez
3bf215546Sopenharmony_ci * Copyright 2015 Samuel Pitoiset
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining
6bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the
7bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
8bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
9bf215546Sopenharmony_ci * distribute, sublicense, and/or sell copies of the Software, and to
10bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
11bf215546Sopenharmony_ci * the following conditions:
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
14bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial
15bf215546Sopenharmony_ci * portions of the Software.
16bf215546Sopenharmony_ci *
17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20bf215546Sopenharmony_ci * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21bf215546Sopenharmony_ci * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22bf215546Sopenharmony_ci * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23bf215546Sopenharmony_ci * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24bf215546Sopenharmony_ci *
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/format/u_format.h"
28bf215546Sopenharmony_ci#include "nv50/nv50_context.h"
29bf215546Sopenharmony_ci#include "nv50/nv50_compute.xml.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "nv50_ir_driver.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ciint
34bf215546Sopenharmony_cinv50_screen_compute_setup(struct nv50_screen *screen,
35bf215546Sopenharmony_ci                          struct nouveau_pushbuf *push)
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_ci   struct nouveau_device *dev = screen->base.device;
38bf215546Sopenharmony_ci   struct nouveau_object *chan = screen->base.channel;
39bf215546Sopenharmony_ci   struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
40bf215546Sopenharmony_ci   unsigned obj_class;
41bf215546Sopenharmony_ci   int i, ret;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   switch (dev->chipset & 0xf0) {
44bf215546Sopenharmony_ci   case 0x50:
45bf215546Sopenharmony_ci   case 0x80:
46bf215546Sopenharmony_ci   case 0x90:
47bf215546Sopenharmony_ci      obj_class = NV50_COMPUTE_CLASS;
48bf215546Sopenharmony_ci      break;
49bf215546Sopenharmony_ci   case 0xa0:
50bf215546Sopenharmony_ci      switch (dev->chipset) {
51bf215546Sopenharmony_ci      case 0xa3:
52bf215546Sopenharmony_ci      case 0xa5:
53bf215546Sopenharmony_ci      case 0xa8:
54bf215546Sopenharmony_ci         obj_class = NVA3_COMPUTE_CLASS;
55bf215546Sopenharmony_ci         break;
56bf215546Sopenharmony_ci      default:
57bf215546Sopenharmony_ci         obj_class = NV50_COMPUTE_CLASS;
58bf215546Sopenharmony_ci         break;
59bf215546Sopenharmony_ci      }
60bf215546Sopenharmony_ci      break;
61bf215546Sopenharmony_ci   default:
62bf215546Sopenharmony_ci      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
63bf215546Sopenharmony_ci      return -1;
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
67bf215546Sopenharmony_ci                            &screen->compute);
68bf215546Sopenharmony_ci   if (ret)
69bf215546Sopenharmony_ci      return ret;
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
72bf215546Sopenharmony_ci   PUSH_DATA (push, screen->compute->handle);
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
75bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
76bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
77bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
78bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
79bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->stack_bo->offset);
80bf215546Sopenharmony_ci   PUSH_DATA (push, screen->stack_bo->offset);
81bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
82bf215546Sopenharmony_ci   PUSH_DATA (push, 4);
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(UNK0290), 1);
85bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
86bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
87bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
88bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
89bf215546Sopenharmony_ci   PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
90bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(UNK0384), 1);
91bf215546Sopenharmony_ci   PUSH_DATA (push, 0x100);
92bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
93bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   for (i = 0; i < 15; i++) {
96bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
97bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
98bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
99bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
100bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
101bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
102bf215546Sopenharmony_ci      PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
103bf215546Sopenharmony_ci   }
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
106bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
107bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
108bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
109bf215546Sopenharmony_ci   PUSH_DATA (push, ~0);
110bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
111bf215546Sopenharmony_ci   PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
114bf215546Sopenharmony_ci   PUSH_DATA (push, 7);
115bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
116bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
117bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
118bf215546Sopenharmony_ci   PUSH_DATA (push, 7);
119bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
120bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
121bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
122bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
125bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
126bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
127bf215546Sopenharmony_ci   PUSH_DATA (push, 0x54);
128bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
129bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
132bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
133bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
134bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset);
135bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset);
136bf215546Sopenharmony_ci   PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
139bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
140bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
141bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset + 65536);
142bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset + 65536);
143bf215546Sopenharmony_ci   PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
146bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
149bf215546Sopenharmony_ci   PUSH_DATA (push, fifo->vram);
150bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
151bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls_bo->offset + 65536);
152bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls_bo->offset + 65536);
153bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
154bf215546Sopenharmony_ci   PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
157bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
158bf215546Sopenharmony_ci   PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
159bf215546Sopenharmony_ci   PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000);
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2);
162bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->fence.bo->offset + 16);
163bf215546Sopenharmony_ci   PUSH_DATA (push, screen->fence.bo->offset + 16);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   return 0;
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cistatic void
169bf215546Sopenharmony_cinv50_compute_validate_samplers(struct nv50_context *nv50)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE);
172bf215546Sopenharmony_ci   if (need_flush) {
173bf215546Sopenharmony_ci      BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
174bf215546Sopenharmony_ci      PUSH_DATA (nv50->base.pushbuf, 0);
175bf215546Sopenharmony_ci   }
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci   /* Invalidate all 3D samplers because they are aliased. */
178bf215546Sopenharmony_ci   nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_cistatic void
182bf215546Sopenharmony_cinv50_compute_validate_textures(struct nv50_context *nv50)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE);
185bf215546Sopenharmony_ci   if (need_flush) {
186bf215546Sopenharmony_ci      BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1);
187bf215546Sopenharmony_ci      PUSH_DATA (nv50->base.pushbuf, 0);
188bf215546Sopenharmony_ci   }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* Invalidate all 3D textures because they are aliased. */
191bf215546Sopenharmony_ci   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
192bf215546Sopenharmony_ci   nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cistatic inline void
196bf215546Sopenharmony_cinv50_compute_invalidate_constbufs(struct nv50_context *nv50)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci   int s;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
201bf215546Sopenharmony_ci   for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) {
202bf215546Sopenharmony_ci      nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s];
203bf215546Sopenharmony_ci      nv50->state.uniform_buffer_bound[s] = false;
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci   nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_cistatic void
209bf215546Sopenharmony_cinv50_compute_validate_constbufs(struct nv50_context *nv50)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nv50->base.pushbuf;
212bf215546Sopenharmony_ci   const int s = NV50_SHADER_STAGE_COMPUTE;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   while (nv50->constbuf_dirty[s]) {
215bf215546Sopenharmony_ci      int i = ffs(nv50->constbuf_dirty[s]) - 1;
216bf215546Sopenharmony_ci      nv50->constbuf_dirty[s] &= ~(1 << i);
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci      if (nv50->constbuf[s][i].user) {
219bf215546Sopenharmony_ci         const unsigned b = NV50_CB_PVP + s;
220bf215546Sopenharmony_ci         unsigned start = 0;
221bf215546Sopenharmony_ci         unsigned words = nv50->constbuf[s][0].size / 4;
222bf215546Sopenharmony_ci         if (i) {
223bf215546Sopenharmony_ci            NOUVEAU_ERR("user constbufs only supported in slot 0\n");
224bf215546Sopenharmony_ci            continue;
225bf215546Sopenharmony_ci         }
226bf215546Sopenharmony_ci         if (!nv50->state.uniform_buffer_bound[s]) {
227bf215546Sopenharmony_ci            nv50->state.uniform_buffer_bound[s] = true;
228bf215546Sopenharmony_ci            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
229bf215546Sopenharmony_ci            PUSH_DATA (push, (b << 12) | (i << 8) | 1);
230bf215546Sopenharmony_ci         }
231bf215546Sopenharmony_ci         while (words) {
232bf215546Sopenharmony_ci            unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci            PUSH_SPACE(push, nr + 3);
235bf215546Sopenharmony_ci            BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
236bf215546Sopenharmony_ci            PUSH_DATA (push, (start << 8) | b);
237bf215546Sopenharmony_ci            BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr);
238bf215546Sopenharmony_ci            PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci            start += nr;
241bf215546Sopenharmony_ci            words -= nr;
242bf215546Sopenharmony_ci         }
243bf215546Sopenharmony_ci      } else {
244bf215546Sopenharmony_ci         struct nv04_resource *res =
245bf215546Sopenharmony_ci            nv04_resource(nv50->constbuf[s][i].u.buf);
246bf215546Sopenharmony_ci         if (res) {
247bf215546Sopenharmony_ci            /* TODO: allocate persistent bindings */
248bf215546Sopenharmony_ci            const unsigned b = s * 16 + i;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci            assert(nouveau_resource_mapped_by_gpu(&res->base));
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci            BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
253bf215546Sopenharmony_ci            PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
254bf215546Sopenharmony_ci            PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
255bf215546Sopenharmony_ci            PUSH_DATA (push, (b << 16) |
256bf215546Sopenharmony_ci                       (nv50->constbuf[s][i].size & 0xffff));
257bf215546Sopenharmony_ci            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
258bf215546Sopenharmony_ci            PUSH_DATA (push, (b << 12) | (i << 8) | 1);
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci            BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci            nv50->cb_dirty = 1; /* Force cache flush for UBO. */
263bf215546Sopenharmony_ci            res->cb_bindings[s] |= 1 << i;
264bf215546Sopenharmony_ci         } else {
265bf215546Sopenharmony_ci            BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
266bf215546Sopenharmony_ci            PUSH_DATA (push, (i << 8) | 0);
267bf215546Sopenharmony_ci         }
268bf215546Sopenharmony_ci         if (i == 0)
269bf215546Sopenharmony_ci            nv50->state.uniform_buffer_bound[s] = false;
270bf215546Sopenharmony_ci      }
271bf215546Sopenharmony_ci   }
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   // TODO: Check if having orthogonal slots means the two don't trample over
274bf215546Sopenharmony_ci   // each other.
275bf215546Sopenharmony_ci   nv50_compute_invalidate_constbufs(nv50);
276bf215546Sopenharmony_ci}
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_cistatic void
279bf215546Sopenharmony_cinv50_get_surface_dims(const struct pipe_image_view *view,
280bf215546Sopenharmony_ci                      int *width, int *height, int *depth)
281bf215546Sopenharmony_ci{
282bf215546Sopenharmony_ci   struct nv04_resource *res = nv04_resource(view->resource);
283bf215546Sopenharmony_ci   int level;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   *width = *height = *depth = 1;
286bf215546Sopenharmony_ci   if (res->base.target == PIPE_BUFFER) {
287bf215546Sopenharmony_ci      *width = view->u.buf.size / util_format_get_blocksize(view->format);
288bf215546Sopenharmony_ci      return;
289bf215546Sopenharmony_ci   }
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   level = view->u.tex.level;
292bf215546Sopenharmony_ci   *width = u_minify(view->resource->width0, level);
293bf215546Sopenharmony_ci   *height = u_minify(view->resource->height0, level);
294bf215546Sopenharmony_ci   *depth = u_minify(view->resource->depth0, level);
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   switch (res->base.target) {
297bf215546Sopenharmony_ci   case PIPE_TEXTURE_1D_ARRAY:
298bf215546Sopenharmony_ci   case PIPE_TEXTURE_2D_ARRAY:
299bf215546Sopenharmony_ci   case PIPE_TEXTURE_CUBE:
300bf215546Sopenharmony_ci   case PIPE_TEXTURE_CUBE_ARRAY:
301bf215546Sopenharmony_ci      *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
302bf215546Sopenharmony_ci      break;
303bf215546Sopenharmony_ci   case PIPE_TEXTURE_1D:
304bf215546Sopenharmony_ci   case PIPE_TEXTURE_2D:
305bf215546Sopenharmony_ci   case PIPE_TEXTURE_RECT:
306bf215546Sopenharmony_ci   case PIPE_TEXTURE_3D:
307bf215546Sopenharmony_ci      break;
308bf215546Sopenharmony_ci   default:
309bf215546Sopenharmony_ci      assert(!"unexpected texture target");
310bf215546Sopenharmony_ci      break;
311bf215546Sopenharmony_ci   }
312bf215546Sopenharmony_ci}
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_cistatic void
315bf215546Sopenharmony_cinv50_mark_image_range_valid(const struct pipe_image_view *view)
316bf215546Sopenharmony_ci{
317bf215546Sopenharmony_ci   struct nv04_resource *res = (struct nv04_resource *)view->resource;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   assert(view->resource->target == PIPE_BUFFER);
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   util_range_add(&res->base, &res->valid_buffer_range,
322bf215546Sopenharmony_ci                  view->u.buf.offset,
323bf215546Sopenharmony_ci                  view->u.buf.offset + view->u.buf.size);
324bf215546Sopenharmony_ci}
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_cistatic inline void
327bf215546Sopenharmony_cinv50_set_surface_info(struct nouveau_pushbuf *push,
328bf215546Sopenharmony_ci                      const struct pipe_image_view *view,
329bf215546Sopenharmony_ci                      int width, int height, int depth)
330bf215546Sopenharmony_ci{
331bf215546Sopenharmony_ci   struct nv04_resource *res;
332bf215546Sopenharmony_ci   uint32_t *const info = push->cur;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   push->cur += 12;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   /* Make sure to always initialize the surface information area because it's
337bf215546Sopenharmony_ci    * used to check if the given image is bound or not. */
338bf215546Sopenharmony_ci   memset(info, 0, 12 * sizeof(*info));
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   if (!view || !view->resource)
341bf215546Sopenharmony_ci      return;
342bf215546Sopenharmony_ci   res = nv04_resource(view->resource);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   /* Stick the image dimensions for the imageSize() builtin. */
345bf215546Sopenharmony_ci   info[0] = width;
346bf215546Sopenharmony_ci   info[1] = height;
347bf215546Sopenharmony_ci   info[2] = depth;
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
350bf215546Sopenharmony_ci    * offset and to check if the format doesn't mismatch. */
351bf215546Sopenharmony_ci   info[3] = util_format_get_blocksize(view->format);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   if (res->base.target != PIPE_BUFFER) {
354bf215546Sopenharmony_ci      struct nv50_miptree *mt = nv50_miptree(&res->base);
355bf215546Sopenharmony_ci      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
356bf215546Sopenharmony_ci      unsigned nby = align(util_format_get_nblocksy(view->format, height),
357bf215546Sopenharmony_ci                           NV50_TILE_SIZE_Y(lvl->tile_mode));
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      if (mt->layout_3d) {
360bf215546Sopenharmony_ci         info[4] = nby;
361bf215546Sopenharmony_ci         info[11] = view->u.tex.first_layer;
362bf215546Sopenharmony_ci      } else {
363bf215546Sopenharmony_ci         info[4] = mt->layer_stride / lvl->pitch;
364bf215546Sopenharmony_ci      }
365bf215546Sopenharmony_ci      info[6] = mt->ms_x;
366bf215546Sopenharmony_ci      info[7] = mt->ms_y;
367bf215546Sopenharmony_ci      info[8] = NV50_TILE_SHIFT_X(lvl->tile_mode);
368bf215546Sopenharmony_ci      info[9] = NV50_TILE_SHIFT_Y(lvl->tile_mode);
369bf215546Sopenharmony_ci      info[10] = NV50_TILE_SHIFT_Z(lvl->tile_mode);
370bf215546Sopenharmony_ci   }
371bf215546Sopenharmony_ci}
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_cistatic void
374bf215546Sopenharmony_cinv50_compute_validate_surfaces(struct nv50_context *nv50)
375bf215546Sopenharmony_ci{
376bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nv50->base.pushbuf;
377bf215546Sopenharmony_ci   int i;
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) {
380bf215546Sopenharmony_ci      struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i];
381bf215546Sopenharmony_ci      int width, height, depth;
382bf215546Sopenharmony_ci      uint64_t address = 0;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci      if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) {
387bf215546Sopenharmony_ci         struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot];
388bf215546Sopenharmony_ci         struct nv04_resource *res = nv04_resource(buffer->buffer);
389bf215546Sopenharmony_ci         PUSH_DATAh(push, res->address + buffer->buffer_offset);
390bf215546Sopenharmony_ci         PUSH_DATA (push, res->address + buffer->buffer_offset);
391bf215546Sopenharmony_ci         PUSH_DATA (push, 0); /* pitch? */
392bf215546Sopenharmony_ci         PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
393bf215546Sopenharmony_ci         PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
394bf215546Sopenharmony_ci         BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
395bf215546Sopenharmony_ci         util_range_add(&res->base, &res->valid_buffer_range,
396bf215546Sopenharmony_ci                        buffer->buffer_offset,
397bf215546Sopenharmony_ci                        buffer->buffer_offset +
398bf215546Sopenharmony_ci                        buffer->buffer_size);
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci         PUSH_SPACE(push, 1 + 3);
401bf215546Sopenharmony_ci         BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
402bf215546Sopenharmony_ci         PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
403bf215546Sopenharmony_ci         BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
404bf215546Sopenharmony_ci         PUSH_DATA (push, buffer->buffer_size);
405bf215546Sopenharmony_ci      } else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) {
406bf215546Sopenharmony_ci         struct pipe_image_view *view = &nv50->images[gmem->slot];
407bf215546Sopenharmony_ci         struct nv04_resource *res = nv04_resource(view->resource);
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci         /* get surface dimensions based on the target. */
410bf215546Sopenharmony_ci         nv50_get_surface_dims(view, &width, &height, &depth);
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci         address = res->address;
413bf215546Sopenharmony_ci         if (res->base.target == PIPE_BUFFER) {
414bf215546Sopenharmony_ci            address += view->u.buf.offset;
415bf215546Sopenharmony_ci            assert(!(address & 0xff));
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci            if (view->access & PIPE_IMAGE_ACCESS_WRITE)
418bf215546Sopenharmony_ci               nv50_mark_image_range_valid(view);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci            PUSH_DATAh(push, address);
421bf215546Sopenharmony_ci            PUSH_DATA (push, address);
422bf215546Sopenharmony_ci            PUSH_DATA (push, 0); /* pitch? */
423bf215546Sopenharmony_ci            PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
424bf215546Sopenharmony_ci            PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
425bf215546Sopenharmony_ci         } else {
426bf215546Sopenharmony_ci            struct nv50_miptree *mt = nv50_miptree(view->resource);
427bf215546Sopenharmony_ci            struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
428bf215546Sopenharmony_ci            const unsigned z = view->u.tex.first_layer;
429bf215546Sopenharmony_ci            unsigned max_size;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci            if (mt->layout_3d) {
432bf215546Sopenharmony_ci               address += nv50_mt_zslice_offset(mt, view->u.tex.level, 0);
433bf215546Sopenharmony_ci               max_size = mt->total_size;
434bf215546Sopenharmony_ci            } else {
435bf215546Sopenharmony_ci               address += mt->layer_stride * z;
436bf215546Sopenharmony_ci               max_size = mt->layer_stride * (view->u.tex.last_layer - view->u.tex.first_layer + 1);
437bf215546Sopenharmony_ci            }
438bf215546Sopenharmony_ci            address += lvl->offset;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci            PUSH_DATAh(push, address);
441bf215546Sopenharmony_ci            PUSH_DATA (push, address);
442bf215546Sopenharmony_ci            if (mt->layout_3d) {
443bf215546Sopenharmony_ci               // We have to adjust the size of the 3d surface to be
444bf215546Sopenharmony_ci               // accessible within 2d limits. The size of each z tile goes
445bf215546Sopenharmony_ci               // into the x direction, while the number of z tiles goes into
446bf215546Sopenharmony_ci               // the y direction.
447bf215546Sopenharmony_ci               const unsigned nby = util_format_get_nblocksy(view->format, height);
448bf215546Sopenharmony_ci               const unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
449bf215546Sopenharmony_ci               const unsigned tsz = NV50_TILE_SIZE_Z(lvl->tile_mode);
450bf215546Sopenharmony_ci               const unsigned pitch = lvl->pitch * tsz;
451bf215546Sopenharmony_ci               const unsigned maxy = align(nby, tsy) * align(depth, tsz) >> NV50_TILE_SHIFT_Z(lvl->tile_mode);
452bf215546Sopenharmony_ci               PUSH_DATA (push, pitch * tsy);
453bf215546Sopenharmony_ci               PUSH_DATA (push, (maxy - 1) << 16 | (pitch - 1));
454bf215546Sopenharmony_ci               PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
455bf215546Sopenharmony_ci            } else if (nouveau_bo_memtype(res->bo)) {
456bf215546Sopenharmony_ci               PUSH_DATA (push, lvl->pitch * NV50_TILE_SIZE_Y(lvl->tile_mode));
457bf215546Sopenharmony_ci               PUSH_DATA (push, (max_size / lvl->pitch - 1) << 16 | (lvl->pitch - 1));
458bf215546Sopenharmony_ci               PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
459bf215546Sopenharmony_ci            } else {
460bf215546Sopenharmony_ci               PUSH_DATA (push, lvl->pitch);
461bf215546Sopenharmony_ci               PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1);
462bf215546Sopenharmony_ci               PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
463bf215546Sopenharmony_ci            }
464bf215546Sopenharmony_ci         }
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci         BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci         PUSH_SPACE(push, 12 + 3);
469bf215546Sopenharmony_ci         BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
470bf215546Sopenharmony_ci         PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
471bf215546Sopenharmony_ci         BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
472bf215546Sopenharmony_ci         nv50_set_surface_info(push, view, width, height, depth);
473bf215546Sopenharmony_ci      } else {
474bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
475bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
476bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
477bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
478bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
479bf215546Sopenharmony_ci      }
480bf215546Sopenharmony_ci   }
481bf215546Sopenharmony_ci}
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_cistatic void
484bf215546Sopenharmony_cinv50_compute_validate_globals(struct nv50_context *nv50)
485bf215546Sopenharmony_ci{
486bf215546Sopenharmony_ci   unsigned i;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
489bf215546Sopenharmony_ci        ++i) {
490bf215546Sopenharmony_ci      struct pipe_resource *res = *util_dynarray_element(
491bf215546Sopenharmony_ci         &nv50->global_residents, struct pipe_resource *, i);
492bf215546Sopenharmony_ci      if (res)
493bf215546Sopenharmony_ci         nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
494bf215546Sopenharmony_ci                                  nv04_resource(res), NOUVEAU_BO_RDWR);
495bf215546Sopenharmony_ci   }
496bf215546Sopenharmony_ci}
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_cistatic struct nv50_state_validate
499bf215546Sopenharmony_civalidate_list_cp[] = {
500bf215546Sopenharmony_ci   { nv50_compprog_validate,              NV50_NEW_CP_PROGRAM     },
501bf215546Sopenharmony_ci   { nv50_compute_validate_constbufs,     NV50_NEW_CP_CONSTBUF    },
502bf215546Sopenharmony_ci   { nv50_compute_validate_surfaces,      NV50_NEW_CP_SURFACES |
503bf215546Sopenharmony_ci                                          NV50_NEW_CP_BUFFERS  |
504bf215546Sopenharmony_ci                                          NV50_NEW_CP_PROGRAM     },
505bf215546Sopenharmony_ci   { nv50_compute_validate_textures,      NV50_NEW_CP_TEXTURES    },
506bf215546Sopenharmony_ci   { nv50_compute_validate_samplers,      NV50_NEW_CP_SAMPLERS    },
507bf215546Sopenharmony_ci   { nv50_compute_validate_globals,       NV50_NEW_CP_GLOBALS     },
508bf215546Sopenharmony_ci};
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_cistatic bool
511bf215546Sopenharmony_cinv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
512bf215546Sopenharmony_ci{
513bf215546Sopenharmony_ci   bool ret;
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   /* TODO: validate textures, samplers, surfaces */
516bf215546Sopenharmony_ci   ret = nv50_state_validate(nv50, mask, validate_list_cp,
517bf215546Sopenharmony_ci                             ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
518bf215546Sopenharmony_ci                             nv50->bufctx_cp);
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci   if (unlikely(nv50->state.flushed))
521bf215546Sopenharmony_ci      nv50_bufctx_fence(nv50->bufctx_cp, true);
522bf215546Sopenharmony_ci   return ret;
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_cistatic void
526bf215546Sopenharmony_cinv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
527bf215546Sopenharmony_ci{
528bf215546Sopenharmony_ci   struct nv50_screen *screen = nv50->screen;
529bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nv50->base.pushbuf;
530bf215546Sopenharmony_ci   unsigned size = align(nv50->compprog->parm_size, 0x4);
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
533bf215546Sopenharmony_ci   PUSH_DATA (push, (1 + (size / 4)) << 8);
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   if (size) {
536bf215546Sopenharmony_ci      struct nouveau_mm_allocation *mm;
537bf215546Sopenharmony_ci      struct nouveau_bo *bo = NULL;
538bf215546Sopenharmony_ci      unsigned offset;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci      mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
541bf215546Sopenharmony_ci      assert(mm);
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci      nouveau_bo_map(bo, 0, nv50->base.client);
544bf215546Sopenharmony_ci      memcpy(bo->map + offset, input, size);
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci      nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
547bf215546Sopenharmony_ci      nouveau_pushbuf_bufctx(push, nv50->bufctx);
548bf215546Sopenharmony_ci      nouveau_pushbuf_validate(push);
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci      nouveau_pushbuf_space(push, 0, 0, 1);
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4);
553bf215546Sopenharmony_ci      nouveau_pushbuf_data(push, bo, offset, size);
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
556bf215546Sopenharmony_ci      nouveau_bo_ref(NULL, &bo);
557bf215546Sopenharmony_ci      nouveau_bufctx_reset(nv50->bufctx, 0);
558bf215546Sopenharmony_ci   }
559bf215546Sopenharmony_ci}
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_civoid
562bf215546Sopenharmony_cinv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
563bf215546Sopenharmony_ci{
564bf215546Sopenharmony_ci   struct nv50_context *nv50 = nv50_context(pipe);
565bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nv50->base.pushbuf;
566bf215546Sopenharmony_ci   unsigned block_size = info->block[0] * info->block[1] * info->block[2];
567bf215546Sopenharmony_ci   struct nv50_program *cp = nv50->compprog;
568bf215546Sopenharmony_ci   bool ret;
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci   ret = !nv50_state_validate_cp(nv50, ~0);
571bf215546Sopenharmony_ci   if (ret) {
572bf215546Sopenharmony_ci      NOUVEAU_ERR("Failed to launch grid !\n");
573bf215546Sopenharmony_ci      return;
574bf215546Sopenharmony_ci   }
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci   nv50_compute_upload_input(nv50, info->input);
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
579bf215546Sopenharmony_ci   PUSH_DATA (push, cp->code_base);
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
582bf215546Sopenharmony_ci   PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x14, 0x40));
583bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
584bf215546Sopenharmony_ci   PUSH_DATA (push, cp->max_gpr);
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   /* no indirect support - just read the parameters out */
587bf215546Sopenharmony_ci   uint32_t grid[3];
588bf215546Sopenharmony_ci   if (unlikely(info->indirect)) {
589bf215546Sopenharmony_ci      pipe_buffer_read(pipe, info->indirect, info->indirect_offset,
590bf215546Sopenharmony_ci                       sizeof(grid), grid);
591bf215546Sopenharmony_ci   } else {
592bf215546Sopenharmony_ci      memcpy(grid, info->grid, sizeof(grid));
593bf215546Sopenharmony_ci   }
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   /* grid/block setup */
596bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
597bf215546Sopenharmony_ci   PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
598bf215546Sopenharmony_ci   PUSH_DATA (push, info->block[2]);
599bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
600bf215546Sopenharmony_ci   PUSH_DATA (push, 1 << 16 | block_size);
601bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
602bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
603bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
604bf215546Sopenharmony_ci   PUSH_DATA (push, grid[1] << 16 | grid[0]);
605bf215546Sopenharmony_ci   BEGIN_NV04(push, NV50_CP(GRIDID), 1);
606bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   for (int i = 0; i < grid[2]; i++) {
609bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1);
610bf215546Sopenharmony_ci      PUSH_DATA (push, grid[2] | i << 16);
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci      /* kernel launching */
613bf215546Sopenharmony_ci      BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
614bf215546Sopenharmony_ci      PUSH_DATA (push, 0);
615bf215546Sopenharmony_ci   }
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci   BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
618bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci   /* bind a compute shader clobbers fragment shader state */
621bf215546Sopenharmony_ci   nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] *
624bf215546Sopenharmony_ci      grid[0] * grid[1] * grid[2];
625bf215546Sopenharmony_ci}
626