1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2012 Nouveau Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci *
22bf215546Sopenharmony_ci * Authors: Christoph Bumiller
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "nvc0/nvc0_context.h"
26bf215546Sopenharmony_ci#include "nvc0/nve4_compute.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "nv50_ir_driver.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "drf.h"
31bf215546Sopenharmony_ci#include "qmd.h"
32bf215546Sopenharmony_ci#include "cla0c0qmd.h"
33bf215546Sopenharmony_ci#include "clc0c0qmd.h"
34bf215546Sopenharmony_ci#include "clc3c0qmd.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
37bf215546Sopenharmony_ci#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
38bf215546Sopenharmony_ci#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
39bf215546Sopenharmony_ci#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
40bf215546Sopenharmony_ci#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
41bf215546Sopenharmony_ci#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ciint
44bf215546Sopenharmony_cinve4_screen_compute_setup(struct nvc0_screen *screen,
45bf215546Sopenharmony_ci                          struct nouveau_pushbuf *push)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   struct nouveau_device *dev = screen->base.device;
48bf215546Sopenharmony_ci   struct nouveau_object *chan = screen->base.channel;
49bf215546Sopenharmony_ci   int i;
50bf215546Sopenharmony_ci   int ret;
51bf215546Sopenharmony_ci   uint32_t obj_class;
52bf215546Sopenharmony_ci   uint64_t address;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   switch (dev->chipset & ~0xf) {
55bf215546Sopenharmony_ci   case 0x160:
56bf215546Sopenharmony_ci      obj_class = TU102_COMPUTE_CLASS;
57bf215546Sopenharmony_ci      break;
58bf215546Sopenharmony_ci   case 0x140:
59bf215546Sopenharmony_ci      obj_class = GV100_COMPUTE_CLASS;
60bf215546Sopenharmony_ci      break;
61bf215546Sopenharmony_ci   case 0x100:
62bf215546Sopenharmony_ci   case 0xf0:
63bf215546Sopenharmony_ci      obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
64bf215546Sopenharmony_ci      break;
65bf215546Sopenharmony_ci   case 0xe0:
66bf215546Sopenharmony_ci      obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
67bf215546Sopenharmony_ci      break;
68bf215546Sopenharmony_ci   case 0x110:
69bf215546Sopenharmony_ci      obj_class = GM107_COMPUTE_CLASS;
70bf215546Sopenharmony_ci      break;
71bf215546Sopenharmony_ci   case 0x120:
72bf215546Sopenharmony_ci      obj_class = GM200_COMPUTE_CLASS;
73bf215546Sopenharmony_ci      break;
74bf215546Sopenharmony_ci   case 0x130:
75bf215546Sopenharmony_ci      obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ?
76bf215546Sopenharmony_ci                      GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS;
77bf215546Sopenharmony_ci      break;
78bf215546Sopenharmony_ci   default:
79bf215546Sopenharmony_ci      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
80bf215546Sopenharmony_ci      return -1;
81bf215546Sopenharmony_ci   }
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
84bf215546Sopenharmony_ci                            &screen->compute);
85bf215546Sopenharmony_ci   if (ret) {
86bf215546Sopenharmony_ci      NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
87bf215546Sopenharmony_ci      return ret;
88bf215546Sopenharmony_ci   }
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
91bf215546Sopenharmony_ci   PUSH_DATA (push, screen->compute->oclass);
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2);
94bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls->offset);
95bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->offset);
96bf215546Sopenharmony_ci   /* No idea why there are 2. Divide size by 2 to be safe.
97bf215546Sopenharmony_ci    * Actually this might be per-MP TEMP size and looks like I'm only using
98bf215546Sopenharmony_ci    * 2 MPs instead of all 8.
99bf215546Sopenharmony_ci    */
100bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3);
101bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
102bf215546Sopenharmony_ci   PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
103bf215546Sopenharmony_ci   PUSH_DATA (push, 0xff);
104bf215546Sopenharmony_ci   if (obj_class < GV100_COMPUTE_CLASS) {
105bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
106bf215546Sopenharmony_ci      PUSH_DATAh(push, screen->tls->size / screen->mp_count);
107bf215546Sopenharmony_ci      PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
108bf215546Sopenharmony_ci      PUSH_DATA (push, 0xff);
109bf215546Sopenharmony_ci   }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   /* Unified address space ? Who needs that ? Certainly not OpenCL.
112bf215546Sopenharmony_ci    *
113bf215546Sopenharmony_ci    * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
114bf215546Sopenharmony_ci    *  accessible. We cannot prevent that at the moment, so expect failure.
115bf215546Sopenharmony_ci    */
116bf215546Sopenharmony_ci   if (obj_class < GV100_COMPUTE_CLASS) {
117bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
118bf215546Sopenharmony_ci      PUSH_DATA (push, 0xff << 24);
119bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
120bf215546Sopenharmony_ci      PUSH_DATA (push, 0xfe << 24);
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
123bf215546Sopenharmony_ci      PUSH_DATAh(push, screen->text->offset);
124bf215546Sopenharmony_ci      PUSH_DATA (push, screen->text->offset);
125bf215546Sopenharmony_ci   } else {
126bf215546Sopenharmony_ci      BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
127bf215546Sopenharmony_ci      PUSH_DATAh(push, 0xfeULL << 24);
128bf215546Sopenharmony_ci      PUSH_DATA (push, 0xfeULL << 24);
129bf215546Sopenharmony_ci      BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
130bf215546Sopenharmony_ci      PUSH_DATAh(push, 0xffULL << 24);
131bf215546Sopenharmony_ci      PUSH_DATA (push, 0xffULL << 24);
132bf215546Sopenharmony_ci   }
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
135bf215546Sopenharmony_ci   PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   /* NOTE: these do not affect the state used by the 3D object */
138bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3);
139bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset);
140bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset);
141bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
142bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3);
143bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->txc->offset + 65536);
144bf215546Sopenharmony_ci   PUSH_DATA (push, screen->txc->offset + 65536);
145bf215546Sopenharmony_ci   PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   if (obj_class >= NVF0_COMPUTE_CLASS) {
148bf215546Sopenharmony_ci      /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1)
149bf215546Sopenharmony_ci       * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently
150bf215546Sopenharmony_ci       * disabled because our firmware doesn't support these commands and the
151bf215546Sopenharmony_ci       * GPU hangs if they are used. */
152bf215546Sopenharmony_ci      BEGIN_NIC0(push, SUBC_CP(0x0248), 64);
153bf215546Sopenharmony_ci      for (i = 63; i >= 0; i--)
154bf215546Sopenharmony_ci         PUSH_DATA(push, 0x38000 | i);
155bf215546Sopenharmony_ci      IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
156bf215546Sopenharmony_ci   }
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
159bf215546Sopenharmony_ci   PUSH_DATA (push, 7); /* does not interfere with 3D */
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   /* Disabling this UNK command avoid a read fault when using texelFetch()
162bf215546Sopenharmony_ci    * from a compute shader for weird reasons.
163bf215546Sopenharmony_ci   if (obj_class == NVF0_COMPUTE_CLASS)
164bf215546Sopenharmony_ci      IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
165bf215546Sopenharmony_ci   */
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   /* MS sample coordinate offsets: these do not work with _ALT modes ! */
170bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
171bf215546Sopenharmony_ci   PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
172bf215546Sopenharmony_ci   PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
173bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
174bf215546Sopenharmony_ci   PUSH_DATA (push, 64);
175bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
176bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
177bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
178bf215546Sopenharmony_ci   PUSH_DATA (push, 0); /* 0 */
179bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
180bf215546Sopenharmony_ci   PUSH_DATA (push, 1); /* 1 */
181bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
182bf215546Sopenharmony_ci   PUSH_DATA (push, 0); /* 2 */
183bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
184bf215546Sopenharmony_ci   PUSH_DATA (push, 1); /* 3 */
185bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
186bf215546Sopenharmony_ci   PUSH_DATA (push, 2); /* 4 */
187bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
188bf215546Sopenharmony_ci   PUSH_DATA (push, 3); /* 5 */
189bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
190bf215546Sopenharmony_ci   PUSH_DATA (push, 2); /* 6 */
191bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
192bf215546Sopenharmony_ci   PUSH_DATA (push, 3); /* 7 */
193bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
196bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
197bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
198bf215546Sopenharmony_ci   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
199bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
200bf215546Sopenharmony_ci   PUSH_DATA (push, 28);
201bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
202bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8);
203bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
204bf215546Sopenharmony_ci   PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
205bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
206bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->offset);
207bf215546Sopenharmony_ci   PUSH_DATAh(push, screen->tls->offset);
208bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */
209bf215546Sopenharmony_ci   PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */
210bf215546Sopenharmony_ci   PUSH_DATA (push, 0); /* warp cfstack size */
211bf215546Sopenharmony_ci#endif
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
214bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   return 0;
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_cistatic void
220bf215546Sopenharmony_cigm107_compute_validate_surfaces(struct nvc0_context *nvc0,
221bf215546Sopenharmony_ci                                struct pipe_image_view *view, int slot)
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   struct nv04_resource *res = nv04_resource(view->resource);
224bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
225bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
226bf215546Sopenharmony_ci   struct nouveau_bo *txc = nvc0->screen->txc;
227bf215546Sopenharmony_ci   struct nv50_tic_entry *tic;
228bf215546Sopenharmony_ci   uint64_t address;
229bf215546Sopenharmony_ci   const int s = 5;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   tic = nv50_tic_entry(nvc0->images_tic[s][slot]);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   res = nv04_resource(tic->pipe.texture);
234bf215546Sopenharmony_ci   nvc0_update_tic(nvc0, tic, res);
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   if (tic->id < 0) {
237bf215546Sopenharmony_ci      tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci      /* upload the texture view */
240bf215546Sopenharmony_ci      PUSH_SPACE(push, 16);
241bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
242bf215546Sopenharmony_ci      PUSH_DATAh(push, txc->offset + (tic->id * 32));
243bf215546Sopenharmony_ci      PUSH_DATA (push, txc->offset + (tic->id * 32));
244bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
245bf215546Sopenharmony_ci      PUSH_DATA (push, 32);
246bf215546Sopenharmony_ci      PUSH_DATA (push, 1);
247bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);
248bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
249bf215546Sopenharmony_ci      PUSH_DATAp(push, &tic->tic[0], 8);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci      BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), 1);
252bf215546Sopenharmony_ci      PUSH_DATA (push, (tic->id << 4) | 1);
253bf215546Sopenharmony_ci   } else
254bf215546Sopenharmony_ci   if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
255bf215546Sopenharmony_ci      BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), 1);
256bf215546Sopenharmony_ci      PUSH_DATA (push, (tic->id << 4) | 1);
257bf215546Sopenharmony_ci   }
258bf215546Sopenharmony_ci   nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci   res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
261bf215546Sopenharmony_ci   res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   /* upload the texture handle */
268bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
269bf215546Sopenharmony_ci   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(slot + 32));
270bf215546Sopenharmony_ci   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(slot + 32));
271bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
272bf215546Sopenharmony_ci   PUSH_DATA (push, 4);
273bf215546Sopenharmony_ci   PUSH_DATA (push, 0x1);
274bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 2);
275bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
276bf215546Sopenharmony_ci   PUSH_DATA (push, tic->id);
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
279bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_cistatic void
283bf215546Sopenharmony_cinve4_compute_validate_surfaces(struct nvc0_context *nvc0)
284bf215546Sopenharmony_ci{
285bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
286bf215546Sopenharmony_ci   uint64_t address;
287bf215546Sopenharmony_ci   const int s = 5;
288bf215546Sopenharmony_ci   int i, j;
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   if (!nvc0->images_dirty[s])
291bf215546Sopenharmony_ci      return;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
296bf215546Sopenharmony_ci      struct pipe_image_view *view = &nvc0->images[s][i];
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
299bf215546Sopenharmony_ci      PUSH_DATAh(push, address + NVC0_CB_AUX_SU_INFO(i));
300bf215546Sopenharmony_ci      PUSH_DATA (push, address + NVC0_CB_AUX_SU_INFO(i));
301bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
302bf215546Sopenharmony_ci      PUSH_DATA (push, 16 * 4);
303bf215546Sopenharmony_ci      PUSH_DATA (push, 0x1);
304bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 16);
305bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci      if (view->resource) {
308bf215546Sopenharmony_ci         struct nv04_resource *res = nv04_resource(view->resource);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci         if (res->base.target == PIPE_BUFFER) {
311bf215546Sopenharmony_ci            if (view->access & PIPE_IMAGE_ACCESS_WRITE)
312bf215546Sopenharmony_ci               nvc0_mark_image_range_valid(view);
313bf215546Sopenharmony_ci         }
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci         nve4_set_surface_info(push, view, nvc0);
316bf215546Sopenharmony_ci         BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci         if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
319bf215546Sopenharmony_ci            gm107_compute_validate_surfaces(nvc0, view, i);
320bf215546Sopenharmony_ci      } else {
321bf215546Sopenharmony_ci         for (j = 0; j < 16; j++)
322bf215546Sopenharmony_ci            PUSH_DATA(push, 0);
323bf215546Sopenharmony_ci      }
324bf215546Sopenharmony_ci   }
325bf215546Sopenharmony_ci}
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci/* Thankfully, textures with samplers follow the normal rules. */
328bf215546Sopenharmony_cistatic void
329bf215546Sopenharmony_cinve4_compute_validate_samplers(struct nvc0_context *nvc0)
330bf215546Sopenharmony_ci{
331bf215546Sopenharmony_ci   bool need_flush = nve4_validate_tsc(nvc0, 5);
332bf215546Sopenharmony_ci   if (need_flush) {
333bf215546Sopenharmony_ci      BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
334bf215546Sopenharmony_ci      PUSH_DATA (nvc0->base.pushbuf, 0);
335bf215546Sopenharmony_ci   }
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   /* Invalidate all 3D samplers because they are aliased. */
338bf215546Sopenharmony_ci   for (int s = 0; s < 5; s++)
339bf215546Sopenharmony_ci      nvc0->samplers_dirty[s] = ~0;
340bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci/* (Code duplicated at bottom for various non-convincing reasons.
344bf215546Sopenharmony_ci *  E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
345bf215546Sopenharmony_ci *  entries to avoid a subchannel switch.
346bf215546Sopenharmony_ci *  Same for texture cache flushes.
347bf215546Sopenharmony_ci *  Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
348bf215546Sopenharmony_ci */
349bf215546Sopenharmony_cistatic void nve4_compute_validate_textures(struct nvc0_context *);
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_cistatic void
352bf215546Sopenharmony_cinve4_compute_set_tex_handles(struct nvc0_context *nvc0)
353bf215546Sopenharmony_ci{
354bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
355bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
356bf215546Sopenharmony_ci   uint64_t address;
357bf215546Sopenharmony_ci   const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
358bf215546Sopenharmony_ci   unsigned i, n;
359bf215546Sopenharmony_ci   uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   if (!dirty)
362bf215546Sopenharmony_ci      return;
363bf215546Sopenharmony_ci   i = ffs(dirty) - 1;
364bf215546Sopenharmony_ci   n = util_logbase2(dirty) + 1 - i;
365bf215546Sopenharmony_ci   assert(n);
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
370bf215546Sopenharmony_ci   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
371bf215546Sopenharmony_ci   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
372bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
373bf215546Sopenharmony_ci   PUSH_DATA (push, n * 4);
374bf215546Sopenharmony_ci   PUSH_DATA (push, 0x1);
375bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n);
376bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
377bf215546Sopenharmony_ci   PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
380bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   nvc0->textures_dirty[s] = 0;
383bf215546Sopenharmony_ci   nvc0->samplers_dirty[s] = 0;
384bf215546Sopenharmony_ci}
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_cistatic void
387bf215546Sopenharmony_cinve4_compute_validate_constbufs(struct nvc0_context *nvc0)
388bf215546Sopenharmony_ci{
389bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
390bf215546Sopenharmony_ci   const int s = 5;
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   while (nvc0->constbuf_dirty[s]) {
393bf215546Sopenharmony_ci      int i = ffs(nvc0->constbuf_dirty[s]) - 1;
394bf215546Sopenharmony_ci      nvc0->constbuf_dirty[s] &= ~(1 << i);
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci      if (nvc0->constbuf[s][i].user) {
397bf215546Sopenharmony_ci         struct nouveau_bo *bo = nvc0->screen->uniform_bo;
398bf215546Sopenharmony_ci         const unsigned base = NVC0_CB_USR_INFO(s);
399bf215546Sopenharmony_ci         const unsigned size = nvc0->constbuf[s][0].size;
400bf215546Sopenharmony_ci         assert(i == 0); /* we really only want OpenGL uniforms here */
401bf215546Sopenharmony_ci         assert(nvc0->constbuf[s][0].u.data);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
404bf215546Sopenharmony_ci         PUSH_DATAh(push, bo->offset + base);
405bf215546Sopenharmony_ci         PUSH_DATA (push, bo->offset + base);
406bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
407bf215546Sopenharmony_ci         PUSH_DATA (push, size);
408bf215546Sopenharmony_ci         PUSH_DATA (push, 0x1);
409bf215546Sopenharmony_ci         BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4));
410bf215546Sopenharmony_ci         PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
411bf215546Sopenharmony_ci         PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4);
412bf215546Sopenharmony_ci      }
413bf215546Sopenharmony_ci      else {
414bf215546Sopenharmony_ci         struct nv04_resource *res =
415bf215546Sopenharmony_ci            nv04_resource(nvc0->constbuf[s][i].u.buf);
416bf215546Sopenharmony_ci         if (res) {
417bf215546Sopenharmony_ci            uint64_t address
418bf215546Sopenharmony_ci               = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci            /* constbufs above 0 will are fetched via ubo info in the shader */
421bf215546Sopenharmony_ci            if (i > 0) {
422bf215546Sopenharmony_ci               BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
423bf215546Sopenharmony_ci               PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
424bf215546Sopenharmony_ci               PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
425bf215546Sopenharmony_ci               BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
426bf215546Sopenharmony_ci               PUSH_DATA (push, 4 * 4);
427bf215546Sopenharmony_ci               PUSH_DATA (push, 0x1);
428bf215546Sopenharmony_ci               BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
429bf215546Sopenharmony_ci               PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci               PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
432bf215546Sopenharmony_ci               PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
433bf215546Sopenharmony_ci               PUSH_DATA (push, nvc0->constbuf[s][i].size);
434bf215546Sopenharmony_ci               PUSH_DATA (push, 0);
435bf215546Sopenharmony_ci            }
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci            BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
438bf215546Sopenharmony_ci            res->cb_bindings[s] |= 1 << i;
439bf215546Sopenharmony_ci         }
440bf215546Sopenharmony_ci      }
441bf215546Sopenharmony_ci   }
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
444bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
445bf215546Sopenharmony_ci}
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_cistatic void
448bf215546Sopenharmony_cinve4_compute_validate_buffers(struct nvc0_context *nvc0)
449bf215546Sopenharmony_ci{
450bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
451bf215546Sopenharmony_ci   uint64_t address;
452bf215546Sopenharmony_ci   const int s = 5;
453bf215546Sopenharmony_ci   int i;
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
458bf215546Sopenharmony_ci   PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(0));
459bf215546Sopenharmony_ci   PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(0));
460bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
461bf215546Sopenharmony_ci   PUSH_DATA (push, 4 * NVC0_MAX_BUFFERS * 4);
462bf215546Sopenharmony_ci   PUSH_DATA (push, 0x1);
463bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4 * NVC0_MAX_BUFFERS);
464bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci   for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
467bf215546Sopenharmony_ci      if (nvc0->buffers[s][i].buffer) {
468bf215546Sopenharmony_ci         struct nv04_resource *res =
469bf215546Sopenharmony_ci            nv04_resource(nvc0->buffers[s][i].buffer);
470bf215546Sopenharmony_ci         PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
471bf215546Sopenharmony_ci         PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
472bf215546Sopenharmony_ci         PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
473bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
474bf215546Sopenharmony_ci         BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
475bf215546Sopenharmony_ci         util_range_add(&res->base, &res->valid_buffer_range,
476bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_offset,
477bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_offset +
478bf215546Sopenharmony_ci                        nvc0->buffers[s][i].buffer_size);
479bf215546Sopenharmony_ci      } else {
480bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
481bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
482bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
483bf215546Sopenharmony_ci         PUSH_DATA (push, 0);
484bf215546Sopenharmony_ci      }
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci}
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_cistatic struct nvc0_state_validate
489bf215546Sopenharmony_civalidate_list_cp[] = {
490bf215546Sopenharmony_ci   { nvc0_compprog_validate,              NVC0_NEW_CP_PROGRAM     },
491bf215546Sopenharmony_ci   { nve4_compute_validate_textures,      NVC0_NEW_CP_TEXTURES    },
492bf215546Sopenharmony_ci   { nve4_compute_validate_samplers,      NVC0_NEW_CP_SAMPLERS    },
493bf215546Sopenharmony_ci   { nve4_compute_set_tex_handles,        NVC0_NEW_CP_TEXTURES |
494bf215546Sopenharmony_ci                                          NVC0_NEW_CP_SAMPLERS    },
495bf215546Sopenharmony_ci   { nve4_compute_validate_surfaces,      NVC0_NEW_CP_SURFACES    },
496bf215546Sopenharmony_ci   { nvc0_compute_validate_globals,       NVC0_NEW_CP_GLOBALS     },
497bf215546Sopenharmony_ci   { nve4_compute_validate_buffers,       NVC0_NEW_CP_BUFFERS     },
498bf215546Sopenharmony_ci   { nve4_compute_validate_constbufs,     NVC0_NEW_CP_CONSTBUF    },
499bf215546Sopenharmony_ci};
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_cistatic bool
502bf215546Sopenharmony_cinve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
503bf215546Sopenharmony_ci{
504bf215546Sopenharmony_ci   bool ret;
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
507bf215546Sopenharmony_ci                             ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
508bf215546Sopenharmony_ci                             nvc0->bufctx_cp);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci   if (unlikely(nvc0->state.flushed))
511bf215546Sopenharmony_ci      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
512bf215546Sopenharmony_ci   return ret;
513bf215546Sopenharmony_ci}
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_cistatic void
516bf215546Sopenharmony_cinve4_compute_upload_input(struct nvc0_context *nvc0,
517bf215546Sopenharmony_ci                          const struct pipe_grid_info *info)
518bf215546Sopenharmony_ci{
519bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
520bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
521bf215546Sopenharmony_ci   struct nvc0_program *cp = nvc0->compprog;
522bf215546Sopenharmony_ci   uint64_t address;
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   if (cp->parm_size) {
527bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
528bf215546Sopenharmony_ci      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));
529bf215546Sopenharmony_ci      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));
530bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
531bf215546Sopenharmony_ci      PUSH_DATA (push, cp->parm_size);
532bf215546Sopenharmony_ci      PUSH_DATA (push, 0x1);
533bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + DIV_ROUND_UP(cp->parm_size, 4));
534bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
535bf215546Sopenharmony_ci      PUSH_DATAb(push, info->input, cp->parm_size);
536bf215546Sopenharmony_ci   }
537bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
538bf215546Sopenharmony_ci   PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0));
539bf215546Sopenharmony_ci   PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0));
540bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
541bf215546Sopenharmony_ci   PUSH_DATA (push, 8 * 4);
542bf215546Sopenharmony_ci   PUSH_DATA (push, 0x1);
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   if (unlikely(info->indirect)) {
545bf215546Sopenharmony_ci      struct nv04_resource *res = nv04_resource(info->indirect);
546bf215546Sopenharmony_ci      uint32_t offset = res->offset + info->indirect_offset;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci      nouveau_pushbuf_space(push, 32, 0, 1);
549bf215546Sopenharmony_ci      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
552bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
553bf215546Sopenharmony_ci      PUSH_DATAp(push, info->block, 3);
554bf215546Sopenharmony_ci      nouveau_pushbuf_data(push, res->bo, offset,
555bf215546Sopenharmony_ci                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
556bf215546Sopenharmony_ci   } else {
557bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
558bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
559bf215546Sopenharmony_ci      PUSH_DATAp(push, info->block, 3);
560bf215546Sopenharmony_ci      PUSH_DATAp(push, info->grid, 3);
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
563bf215546Sopenharmony_ci   PUSH_DATA (push, info->work_dim);
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
566bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
567bf215546Sopenharmony_ci}
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_cistatic inline void
570bf215546Sopenharmony_cigp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
571bf215546Sopenharmony_ci                            struct nouveau_bo *bo, uint32_t base, uint32_t size)
572bf215546Sopenharmony_ci{
573bf215546Sopenharmony_ci   uint64_t address = bo->offset + base;
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_ci   assert(index < 8);
576bf215546Sopenharmony_ci   assert(!(base & 0xff));
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
579bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
580bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
581bf215546Sopenharmony_ci                                 DIV_ROUND_UP(size, 16));
582bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
583bf215546Sopenharmony_ci}
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_cistatic inline void
586bf215546Sopenharmony_cinve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo,
587bf215546Sopenharmony_ci                           uint32_t base, uint32_t size)
588bf215546Sopenharmony_ci{
589bf215546Sopenharmony_ci   uint64_t address = bo->offset + base;
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   assert(index < 8);
592bf215546Sopenharmony_ci   assert(!(base & 0xff));
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
595bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
596bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
597bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
598bf215546Sopenharmony_ci}
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_cistatic void
601bf215546Sopenharmony_cinve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
602bf215546Sopenharmony_ci{
603bf215546Sopenharmony_ci   // only user constant buffers 0-6 can be put in the descriptor, the rest are
604bf215546Sopenharmony_ci   // loaded through global memory
605bf215546Sopenharmony_ci   for (int i = 0; i <= 6; i++) {
606bf215546Sopenharmony_ci      if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf)
607bf215546Sopenharmony_ci         continue;
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci      struct nv04_resource *res =
610bf215546Sopenharmony_ci         nv04_resource(nvc0->constbuf[5][i].u.buf);
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci      uint32_t base = res->offset + nvc0->constbuf[5][i].offset;
613bf215546Sopenharmony_ci      uint32_t size = nvc0->constbuf[5][i].size;
614bf215546Sopenharmony_ci      if (gp100)
615bf215546Sopenharmony_ci         gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size);
616bf215546Sopenharmony_ci      else
617bf215546Sopenharmony_ci         nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size);
618bf215546Sopenharmony_ci   }
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci   // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because
621bf215546Sopenharmony_ci   // nve4_compute_upload_input() does it later
622bf215546Sopenharmony_ci}
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_cistatic void
625bf215546Sopenharmony_cinve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
626bf215546Sopenharmony_ci                               const struct pipe_grid_info *info)
627bf215546Sopenharmony_ci{
628bf215546Sopenharmony_ci   const struct nvc0_screen *screen = nvc0->screen;
629bf215546Sopenharmony_ci   const struct nvc0_program *cp = nvc0->compprog;
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
632bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
633bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);
634bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);
635bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);
636bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
637bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
638bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
639bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
644bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
645bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
646bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
647bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
648bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,
651bf215546Sopenharmony_ci                                 align(cp->cp.smem_size, 0x100));
652bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
653bf215546Sopenharmony_ci                                 (cp->hdr[1] & 0xfffff0) +
654bf215546Sopenharmony_ci                                 align(cp->cp.lmem_size, 0x10));
655bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
656bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci   if (cp->cp.smem_size > (32 << 10))
659bf215546Sopenharmony_ci      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
660bf215546Sopenharmony_ci                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
661bf215546Sopenharmony_ci   else
662bf215546Sopenharmony_ci   if (cp->cp.smem_size > (16 << 10))
663bf215546Sopenharmony_ci      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
664bf215546Sopenharmony_ci                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
665bf215546Sopenharmony_ci   else
666bf215546Sopenharmony_ci      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
667bf215546Sopenharmony_ci                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
670bf215546Sopenharmony_ci   NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci   // Only bind user uniforms and the driver constant buffer through the
673bf215546Sopenharmony_ci   // launch descriptor because UBOs are sticked to the driver cb to avoid the
674bf215546Sopenharmony_ci   // limitation of 8 CBs.
675bf215546Sopenharmony_ci   if (nvc0->constbuf[5][0].user || cp->parm_size) {
676bf215546Sopenharmony_ci      nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
677bf215546Sopenharmony_ci                                 NVC0_CB_USR_INFO(5), 1 << 16);
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci      // Later logic will attempt to bind a real buffer at position 0. That
680bf215546Sopenharmony_ci      // should not happen if we've bound a user buffer.
681bf215546Sopenharmony_ci      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
682bf215546Sopenharmony_ci   }
683bf215546Sopenharmony_ci   nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
684bf215546Sopenharmony_ci                              NVC0_CB_AUX_INFO(5), 1 << 11);
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci   nve4_compute_setup_buf_cb(nvc0, false, qmd);
687bf215546Sopenharmony_ci}
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_cistatic void
690bf215546Sopenharmony_cigp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
691bf215546Sopenharmony_ci                                const struct pipe_grid_info *info)
692bf215546Sopenharmony_ci{
693bf215546Sopenharmony_ci   const struct nvc0_screen *screen = nvc0->screen;
694bf215546Sopenharmony_ci   const struct nvc0_program *cp = nvc0->compprog;
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
697bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
698bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
699bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base);
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
704bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
705bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
706bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
707bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
708bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,
711bf215546Sopenharmony_ci                                 align(cp->cp.smem_size, 0x100));
712bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
713bf215546Sopenharmony_ci                                 (cp->hdr[1] & 0xfffff0) +
714bf215546Sopenharmony_ci                                 align(cp->cp.lmem_size, 0x10));
715bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
716bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
719bf215546Sopenharmony_ci   NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   // Only bind user uniforms and the driver constant buffer through the
722bf215546Sopenharmony_ci   // launch descriptor because UBOs are sticked to the driver cb to avoid the
723bf215546Sopenharmony_ci   // limitation of 8 CBs.
724bf215546Sopenharmony_ci   if (nvc0->constbuf[5][0].user || cp->parm_size) {
725bf215546Sopenharmony_ci      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
726bf215546Sopenharmony_ci                                  NVC0_CB_USR_INFO(5), 1 << 16);
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci      // Later logic will attempt to bind a real buffer at position 0. That
729bf215546Sopenharmony_ci      // should not happen if we've bound a user buffer.
730bf215546Sopenharmony_ci      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
731bf215546Sopenharmony_ci   }
732bf215546Sopenharmony_ci   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
733bf215546Sopenharmony_ci                               NVC0_CB_AUX_INFO(5), 1 << 11);
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   nve4_compute_setup_buf_cb(nvc0, true, qmd);
736bf215546Sopenharmony_ci}
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_cistatic int
739bf215546Sopenharmony_cigv100_sm_config_smem_size(u32 size)
740bf215546Sopenharmony_ci{
741bf215546Sopenharmony_ci   if      (size > 64 * 1024) size = 96 * 1024;
742bf215546Sopenharmony_ci   else if (size > 32 * 1024) size = 64 * 1024;
743bf215546Sopenharmony_ci   else if (size > 16 * 1024) size = 32 * 1024;
744bf215546Sopenharmony_ci   else if (size >  8 * 1024) size = 16 * 1024;
745bf215546Sopenharmony_ci   else                       size =  8 * 1024;
746bf215546Sopenharmony_ci   return (size / 4096) + 1;
747bf215546Sopenharmony_ci}
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_cistatic void
750bf215546Sopenharmony_cigv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
751bf215546Sopenharmony_ci                                const struct pipe_grid_info *info)
752bf215546Sopenharmony_ci{
753bf215546Sopenharmony_ci   struct nvc0_program *cp = nvc0->compprog;
754bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
755bf215546Sopenharmony_ci   uint64_t entry = screen->text->offset + cp->code_base;
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
758bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
759bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);
760bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
761bf215546Sopenharmony_ci                                  align(cp->cp.smem_size, 0x100));
762bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
763bf215546Sopenharmony_ci                                 (cp->hdr[1] & 0xfffff0) +
764bf215546Sopenharmony_ci                                 align(cp->cp.lmem_size, 0x10));
765bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
766bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
767bf215546Sopenharmony_ci                                  gv100_sm_config_smem_size(8 * 1024));
768bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
769bf215546Sopenharmony_ci                                  gv100_sm_config_smem_size(96 * 1024));
770bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
771bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
772bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
773bf215546Sopenharmony_ci                                  gv100_sm_config_smem_size(cp->cp.smem_size));
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
776bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
777bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
778bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
779bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
780bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
781bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
782bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci   // Only bind user uniforms and the driver constant buffer through the
785bf215546Sopenharmony_ci   // launch descriptor because UBOs are sticked to the driver cb to avoid the
786bf215546Sopenharmony_ci   // limitation of 8 CBs.
787bf215546Sopenharmony_ci   if (nvc0->constbuf[5][0].user || cp->parm_size) {
788bf215546Sopenharmony_ci      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
789bf215546Sopenharmony_ci                                  NVC0_CB_USR_INFO(5), 1 << 16);
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci      // Later logic will attempt to bind a real buffer at position 0. That
792bf215546Sopenharmony_ci      // should not happen if we've bound a user buffer.
793bf215546Sopenharmony_ci      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
794bf215546Sopenharmony_ci   }
795bf215546Sopenharmony_ci   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
796bf215546Sopenharmony_ci                               NVC0_CB_AUX_INFO(5), 1 << 11);
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   nve4_compute_setup_buf_cb(nvc0, true, qmd);
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
801bf215546Sopenharmony_ci   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
802bf215546Sopenharmony_ci}
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_cistatic inline void *
805bf215546Sopenharmony_cinve4_compute_alloc_launch_desc(struct nouveau_context *nv,
806bf215546Sopenharmony_ci                               struct nouveau_bo **pbo, uint64_t *pgpuaddr)
807bf215546Sopenharmony_ci{
808bf215546Sopenharmony_ci   uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
809bf215546Sopenharmony_ci   if (!ptr)
810bf215546Sopenharmony_ci      return NULL;
811bf215546Sopenharmony_ci   if (*pgpuaddr & 255) {
812bf215546Sopenharmony_ci      unsigned adj = 256 - (*pgpuaddr & 255);
813bf215546Sopenharmony_ci      ptr += adj;
814bf215546Sopenharmony_ci      *pgpuaddr += adj;
815bf215546Sopenharmony_ci   }
816bf215546Sopenharmony_ci   memset(ptr, 0x00, 256);
817bf215546Sopenharmony_ci   return ptr;
818bf215546Sopenharmony_ci}
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_cistatic void
821bf215546Sopenharmony_cinve4_upload_indirect_desc(struct nouveau_pushbuf *push,
822bf215546Sopenharmony_ci                          struct nv04_resource *res,  uint64_t gpuaddr,
823bf215546Sopenharmony_ci                          uint32_t length, uint32_t bo_offset)
824bf215546Sopenharmony_ci{
825bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
826bf215546Sopenharmony_ci   PUSH_DATAh(push, gpuaddr);
827bf215546Sopenharmony_ci   PUSH_DATA (push, gpuaddr);
828bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
829bf215546Sopenharmony_ci   PUSH_DATA (push, length);
830bf215546Sopenharmony_ci   PUSH_DATA (push, 1);
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   nouveau_pushbuf_space(push, 32, 0, 1);
833bf215546Sopenharmony_ci   PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (length / 4));
836bf215546Sopenharmony_ci   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
837bf215546Sopenharmony_ci   nouveau_pushbuf_data(push, res->bo, bo_offset,
838bf215546Sopenharmony_ci                        NVC0_IB_ENTRY_1_NO_PREFETCH | length);
839bf215546Sopenharmony_ci}
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_civoid
842bf215546Sopenharmony_cinve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
843bf215546Sopenharmony_ci{
844bf215546Sopenharmony_ci   struct nvc0_context *nvc0 = nvc0_context(pipe);
845bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
846bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
847bf215546Sopenharmony_ci   void *desc;
848bf215546Sopenharmony_ci   uint64_t desc_gpuaddr;
849bf215546Sopenharmony_ci   struct nouveau_bo *desc_bo;
850bf215546Sopenharmony_ci   int ret;
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci   desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
853bf215546Sopenharmony_ci   if (!desc) {
854bf215546Sopenharmony_ci      ret = -1;
855bf215546Sopenharmony_ci      goto out;
856bf215546Sopenharmony_ci   }
857bf215546Sopenharmony_ci   BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
858bf215546Sopenharmony_ci                desc_bo);
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci   list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) {
861bf215546Sopenharmony_ci      nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
862bf215546Sopenharmony_ci                        resident->flags);
863bf215546Sopenharmony_ci   }
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
866bf215546Sopenharmony_ci      nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
867bf215546Sopenharmony_ci                        resident->flags);
868bf215546Sopenharmony_ci   }
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci   ret = !nve4_state_validate_cp(nvc0, ~0);
871bf215546Sopenharmony_ci   if (ret)
872bf215546Sopenharmony_ci      goto out;
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_ci   if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
875bf215546Sopenharmony_ci      gv100_compute_setup_launch_desc(nvc0, desc, info);
876bf215546Sopenharmony_ci   else
877bf215546Sopenharmony_ci   if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
878bf215546Sopenharmony_ci      gp100_compute_setup_launch_desc(nvc0, desc, info);
879bf215546Sopenharmony_ci   else
880bf215546Sopenharmony_ci      nve4_compute_setup_launch_desc(nvc0, desc, info);
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci   nve4_compute_upload_input(nvc0, info);
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci#ifndef NDEBUG
885bf215546Sopenharmony_ci   if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
886bf215546Sopenharmony_ci      debug_printf("Queue Meta Data:\n");
887bf215546Sopenharmony_ci      if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
888bf215546Sopenharmony_ci         NVC3C0QmdDump_V02_02(desc);
889bf215546Sopenharmony_ci      else
890bf215546Sopenharmony_ci      if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
891bf215546Sopenharmony_ci         NVC0C0QmdDump_V02_01(desc);
892bf215546Sopenharmony_ci      else
893bf215546Sopenharmony_ci         NVA0C0QmdDump_V00_06(desc);
894bf215546Sopenharmony_ci   }
895bf215546Sopenharmony_ci#endif
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   if (unlikely(info->indirect)) {
898bf215546Sopenharmony_ci      struct nv04_resource *res = nv04_resource(info->indirect);
899bf215546Sopenharmony_ci      uint32_t offset = res->offset + info->indirect_offset;
900bf215546Sopenharmony_ci
901bf215546Sopenharmony_ci      /* upload the descriptor */
902bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
903bf215546Sopenharmony_ci      PUSH_DATAh(push, desc_gpuaddr);
904bf215546Sopenharmony_ci      PUSH_DATA (push, desc_gpuaddr);
905bf215546Sopenharmony_ci      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
906bf215546Sopenharmony_ci      PUSH_DATA (push, 256);
907bf215546Sopenharmony_ci      PUSH_DATA (push, 1);
908bf215546Sopenharmony_ci      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
909bf215546Sopenharmony_ci      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
910bf215546Sopenharmony_ci      PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci      if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) {
913bf215546Sopenharmony_ci         nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 12, offset);
914bf215546Sopenharmony_ci      } else {
915bf215546Sopenharmony_ci         /* overwrite griddim_x and griddim_y as two 32-bits integers even
916bf215546Sopenharmony_ci          * if griddim_y must be a 16-bits integer */
917bf215546Sopenharmony_ci         nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 8, offset);
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci         /* overwrite the 16 high bits of griddim_y with griddim_z because
920bf215546Sopenharmony_ci          * we need (z << 16) | x */
921bf215546Sopenharmony_ci         nve4_upload_indirect_desc(push, res, desc_gpuaddr + 54, 4, offset + 8);
922bf215546Sopenharmony_ci      }
923bf215546Sopenharmony_ci   }
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci   /* upload descriptor and flush */
926bf215546Sopenharmony_ci   nouveau_pushbuf_space(push, 32, 1, 0);
927bf215546Sopenharmony_ci   PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
928bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
929bf215546Sopenharmony_ci   PUSH_DATA (push, desc_gpuaddr >> 8);
930bf215546Sopenharmony_ci   BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
931bf215546Sopenharmony_ci   PUSH_DATA (push, 0x3);
932bf215546Sopenharmony_ci   BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
933bf215546Sopenharmony_ci   PUSH_DATA (push, 0);
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci   nvc0_update_compute_invocations_counter(nvc0, info);
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ciout:
938bf215546Sopenharmony_ci   if (ret)
939bf215546Sopenharmony_ci      NOUVEAU_ERR("Failed to launch grid !\n");
940bf215546Sopenharmony_ci   nouveau_scratch_done(&nvc0->base);
941bf215546Sopenharmony_ci   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
942bf215546Sopenharmony_ci   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS);
943bf215546Sopenharmony_ci}
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci#define NVE4_TIC_ENTRY_INVALID 0x000fffff
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_cistatic void
949bf215546Sopenharmony_cinve4_compute_validate_textures(struct nvc0_context *nvc0)
950bf215546Sopenharmony_ci{
951bf215546Sopenharmony_ci   struct nouveau_bo *txc = nvc0->screen->txc;
952bf215546Sopenharmony_ci   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
953bf215546Sopenharmony_ci   const unsigned s = 5;
954bf215546Sopenharmony_ci   unsigned i;
955bf215546Sopenharmony_ci   uint32_t commands[2][32];
956bf215546Sopenharmony_ci   unsigned n[2] = { 0, 0 };
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci   for (i = 0; i < nvc0->num_textures[s]; ++i) {
959bf215546Sopenharmony_ci      struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
960bf215546Sopenharmony_ci      struct nv04_resource *res;
961bf215546Sopenharmony_ci      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci      if (!tic) {
964bf215546Sopenharmony_ci         nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
965bf215546Sopenharmony_ci         continue;
966bf215546Sopenharmony_ci      }
967bf215546Sopenharmony_ci      res = nv04_resource(tic->pipe.texture);
968bf215546Sopenharmony_ci      nvc0_update_tic(nvc0, tic, res);
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci      if (tic->id < 0) {
971bf215546Sopenharmony_ci         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci         PUSH_SPACE(push, 16);
974bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
975bf215546Sopenharmony_ci         PUSH_DATAh(push, txc->offset + (tic->id * 32));
976bf215546Sopenharmony_ci         PUSH_DATA (push, txc->offset + (tic->id * 32));
977bf215546Sopenharmony_ci         BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
978bf215546Sopenharmony_ci         PUSH_DATA (push, 32);
979bf215546Sopenharmony_ci         PUSH_DATA (push, 1);
980bf215546Sopenharmony_ci         BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);
981bf215546Sopenharmony_ci         PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
982bf215546Sopenharmony_ci         PUSH_DATAp(push, &tic->tic[0], 8);
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci         commands[0][n[0]++] = (tic->id << 4) | 1;
985bf215546Sopenharmony_ci      } else
986bf215546Sopenharmony_ci      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
987bf215546Sopenharmony_ci         commands[1][n[1]++] = (tic->id << 4) | 1;
988bf215546Sopenharmony_ci      }
989bf215546Sopenharmony_ci      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
992bf215546Sopenharmony_ci      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci      nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
995bf215546Sopenharmony_ci      nvc0->tex_handles[s][i] |= tic->id;
996bf215546Sopenharmony_ci      if (dirty)
997bf215546Sopenharmony_ci         BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
998bf215546Sopenharmony_ci   }
999bf215546Sopenharmony_ci   for (; i < nvc0->state.num_textures[s]; ++i) {
1000bf215546Sopenharmony_ci      nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
1001bf215546Sopenharmony_ci      nvc0->textures_dirty[s] |= 1 << i;
1002bf215546Sopenharmony_ci   }
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci   if (n[0]) {
1005bf215546Sopenharmony_ci      BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
1006bf215546Sopenharmony_ci      PUSH_DATAp(push, commands[0], n[0]);
1007bf215546Sopenharmony_ci   }
1008bf215546Sopenharmony_ci   if (n[1]) {
1009bf215546Sopenharmony_ci      BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);
1010bf215546Sopenharmony_ci      PUSH_DATAp(push, commands[1], n[1]);
1011bf215546Sopenharmony_ci   }
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci   nvc0->state.num_textures[s] = nvc0->num_textures[s];
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci   /* Invalidate all 3D textures because they are aliased. */
1016bf215546Sopenharmony_ci   for (int s = 0; s < 5; s++) {
1017bf215546Sopenharmony_ci      for (int i = 0; i < nvc0->num_textures[s]; i++)
1018bf215546Sopenharmony_ci         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
1019bf215546Sopenharmony_ci      nvc0->textures_dirty[s] = ~0;
1020bf215546Sopenharmony_ci   }
1021bf215546Sopenharmony_ci   nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
1022bf215546Sopenharmony_ci}
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
1025bf215546Sopenharmony_cistatic void
1026bf215546Sopenharmony_cinve4_compute_trap_info(struct nvc0_context *nvc0)
1027bf215546Sopenharmony_ci{
1028bf215546Sopenharmony_ci   struct nvc0_screen *screen = nvc0->screen;
1029bf215546Sopenharmony_ci   struct nouveau_bo *bo = screen->parm;
1030bf215546Sopenharmony_ci   int ret, i;
1031bf215546Sopenharmony_ci   volatile struct nve4_mp_trap_info *info;
1032bf215546Sopenharmony_ci   uint8_t *map;
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_ci   ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client);
1035bf215546Sopenharmony_ci   if (ret)
1036bf215546Sopenharmony_ci      return;
1037bf215546Sopenharmony_ci   map = (uint8_t *)bo->map;
1038bf215546Sopenharmony_ci   info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO);
1039bf215546Sopenharmony_ci
1040bf215546Sopenharmony_ci   if (info->lock) {
1041bf215546Sopenharmony_ci      debug_printf("trapstat = %08x\n", info->trapstat);
1042bf215546Sopenharmony_ci      debug_printf("warperr = %08x\n", info->warperr);
1043bf215546Sopenharmony_ci      debug_printf("PC = %x\n", info->pc);
1044bf215546Sopenharmony_ci      debug_printf("tid = %u %u %u\n",
1045bf215546Sopenharmony_ci                   info->tid[0], info->tid[1], info->tid[2]);
1046bf215546Sopenharmony_ci      debug_printf("ctaid = %u %u %u\n",
1047bf215546Sopenharmony_ci                   info->ctaid[0], info->ctaid[1], info->ctaid[2]);
1048bf215546Sopenharmony_ci      for (i = 0; i <= 63; ++i)
1049bf215546Sopenharmony_ci         debug_printf("$r%i = %08x\n", i, info->r[i]);
1050bf215546Sopenharmony_ci      for (i = 0; i <= 6; ++i)
1051bf215546Sopenharmony_ci         debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1);
1052bf215546Sopenharmony_ci      debug_printf("$c = %x\n", info->flags >> 12);
1053bf215546Sopenharmony_ci   }
1054bf215546Sopenharmony_ci   info->lock = 0;
1055bf215546Sopenharmony_ci}
1056bf215546Sopenharmony_ci#endif
1057