1/* 2 * Copyright 2013 Nouveau Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Christoph Bumiller, Samuel Pitoiset 23 */ 24 25#include "nvc0/nvc0_context.h" 26 27#include "nvc0/nvc0_compute.xml.h" 28 29int 30nvc0_screen_compute_setup(struct nvc0_screen *screen, 31 struct nouveau_pushbuf *push) 32{ 33 struct nouveau_object *chan = screen->base.channel; 34 struct nouveau_device *dev = screen->base.device; 35 uint32_t obj_class; 36 int ret; 37 int i; 38 39 switch (dev->chipset & ~0xf) { 40 case 0xc0: 41 case 0xd0: 42 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, 43 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ 44 obj_class = NVC0_COMPUTE_CLASS; 45 break; 46 default: 47 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 48 return -1; 49 } 50 51 ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, 52 &screen->compute); 53 if (ret) { 54 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); 55 return ret; 56 } 57 58 BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 59 PUSH_DATA (push, screen->compute->oclass); 60 61 /* hardware limit */ 62 BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); 63 PUSH_DATA (push, screen->mp_count); 64 BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); 65 PUSH_DATA (push, 0xf); 66 67 BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); 68 PUSH_DATA (push, 0x8000); 69 70 /* global memory setup */ 71 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); 72 PUSH_DATA (push, 0); 73 BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); 74 for (i = 0; i <= 0xff; i++) 75 PUSH_DATA (push, (0xc << 28) | (i << 16) | i); 76 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); 77 PUSH_DATA (push, 1); 78 79 /* local memory and cstack setup */ 80 BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); 81 PUSH_DATAh(push, screen->tls->offset); 82 PUSH_DATA (push, screen->tls->offset); 83 BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); 84 PUSH_DATAh(push, screen->tls->size); 85 PUSH_DATA (push, screen->tls->size); 86 BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); 87 PUSH_DATA (push, 0); 88 BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); 89 PUSH_DATA (push, 0xff << 24); 90 91 /* shared memory setup */ 92 BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); 93 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); 94 BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); 95 PUSH_DATA (push, 0xfe << 24); 96 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); 97 PUSH_DATA (push, 0); 98 99 /* code segment setup */ 100 BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); 101 PUSH_DATAh(push, screen->text->offset); 102 PUSH_DATA (push, screen->text->offset); 103 104 /* textures */ 105 BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); 106 PUSH_DATAh(push, screen->txc->offset); 107 PUSH_DATA (push, screen->txc->offset); 108 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); 109 110 /* samplers */ 111 BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); 112 PUSH_DATAh(push, screen->txc->offset + 65536); 113 PUSH_DATA (push, screen->txc->offset + 65536); 114 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); 115 116 /* MS sample coordinate offsets */ 117 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 118 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 119 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 120 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 121 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8); 122 PUSH_DATA (push, NVC0_CB_AUX_MS_INFO); 123 PUSH_DATA (push, 0); /* 0 */ 124 PUSH_DATA (push, 0); 125 PUSH_DATA (push, 1); /* 1 */ 126 PUSH_DATA (push, 0); 127 PUSH_DATA (push, 0); /* 2 */ 128 PUSH_DATA (push, 1); 129 PUSH_DATA (push, 1); /* 3 */ 130 PUSH_DATA (push, 1); 131 PUSH_DATA (push, 2); /* 4 */ 132 PUSH_DATA (push, 0); 133 PUSH_DATA (push, 3); /* 5 */ 134 PUSH_DATA (push, 0); 135 PUSH_DATA (push, 2); /* 6 */ 136 PUSH_DATA (push, 1); 137 PUSH_DATA (push, 3); /* 7 */ 138 PUSH_DATA (push, 1); 139 140 return 0; 141} 142 143static void 144nvc0_compute_validate_samplers(struct nvc0_context *nvc0) 145{ 146 bool need_flush = nvc0_validate_tsc(nvc0, 5); 147 if (need_flush) { 148 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); 149 PUSH_DATA (nvc0->base.pushbuf, 0); 150 } 151 152 /* Invalidate all 3D samplers because they are aliased. */ 153 for (int s = 0; s < 5; s++) 154 nvc0->samplers_dirty[s] = ~0; 155 nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; 156} 157 158static void 159nvc0_compute_validate_textures(struct nvc0_context *nvc0) 160{ 161 bool need_flush = nvc0_validate_tic(nvc0, 5); 162 if (need_flush) { 163 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); 164 PUSH_DATA (nvc0->base.pushbuf, 0); 165 } 166 167 /* Invalidate all 3D textures because they are aliased. */ 168 for (int s = 0; s < 5; s++) { 169 for (int i = 0; i < nvc0->num_textures[s]; i++) 170 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); 171 nvc0->textures_dirty[s] = ~0; 172 } 173 nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; 174} 175 176static inline void 177nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0) 178{ 179 int s; 180 181 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ 182 for (s = 0; s < 5; s++) { 183 nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; 184 nvc0->state.uniform_buffer_bound[s] = false; 185 } 186 nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; 187} 188 189static void 190nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) 191{ 192 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 193 const int s = 5; 194 195 while (nvc0->constbuf_dirty[s]) { 196 int i = ffs(nvc0->constbuf_dirty[s]) - 1; 197 nvc0->constbuf_dirty[s] &= ~(1 << i); 198 199 if (nvc0->constbuf[s][i].user) { 200 struct nouveau_bo *bo = nvc0->screen->uniform_bo; 201 const unsigned base = NVC0_CB_USR_INFO(s); 202 const unsigned size = nvc0->constbuf[s][0].size; 203 assert(i == 0); /* we really only want OpenGL uniforms here */ 204 assert(nvc0->constbuf[s][0].u.data); 205 206 if (!nvc0->state.uniform_buffer_bound[s]) { 207 nvc0->state.uniform_buffer_bound[s] = true; 208 209 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 210 PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE); 211 PUSH_DATAh(push, bo->offset + base); 212 PUSH_DATA (push, bo->offset + base); 213 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 214 PUSH_DATA (push, (0 << 8) | 1); 215 } 216 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), 217 base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, 218 nvc0->constbuf[s][0].u.data); 219 } else { 220 struct nv04_resource *res = 221 nv04_resource(nvc0->constbuf[s][i].u.buf); 222 if (res) { 223 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 224 PUSH_DATA (push, nvc0->constbuf[s][i].size); 225 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); 226 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); 227 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 228 PUSH_DATA (push, (i << 8) | 1); 229 230 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); 231 232 res->cb_bindings[s] |= 1 << i; 233 } else { 234 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 235 PUSH_DATA (push, (i << 8) | 0); 236 } 237 if (i == 0) 238 nvc0->state.uniform_buffer_bound[s] = false; 239 } 240 } 241 242 nvc0_compute_invalidate_constbufs(nvc0); 243 244 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 245 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); 246} 247 248static void 249nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) 250{ 251 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 252 struct nvc0_screen *screen = nvc0->screen; 253 254 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 255 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 256 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 257 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 258 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 259 PUSH_DATA (push, (15 << 8) | 1); 260 261 nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST; 262} 263 264static void 265nvc0_compute_validate_buffers(struct nvc0_context *nvc0) 266{ 267 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 268 struct nvc0_screen *screen = nvc0->screen; 269 const int s = 5; 270 int i; 271 272 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 273 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 274 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 275 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 276 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); 277 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); 278 279 for (i = 0; i < NVC0_MAX_BUFFERS; i++) { 280 if (nvc0->buffers[s][i].buffer) { 281 struct nv04_resource *res = 282 nv04_resource(nvc0->buffers[s][i].buffer); 283 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); 284 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); 285 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); 286 PUSH_DATA (push, 0); 287 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); 288 util_range_add(&res->base, &res->valid_buffer_range, 289 nvc0->buffers[s][i].buffer_offset, 290 nvc0->buffers[s][i].buffer_offset + 291 nvc0->buffers[s][i].buffer_size); 292 } else { 293 PUSH_DATA (push, 0); 294 PUSH_DATA (push, 0); 295 PUSH_DATA (push, 0); 296 PUSH_DATA (push, 0); 297 } 298 } 299} 300 301void 302nvc0_compute_validate_globals(struct nvc0_context *nvc0) 303{ 304 unsigned i; 305 306 for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); 307 ++i) { 308 struct pipe_resource *res = *util_dynarray_element( 309 &nvc0->global_residents, struct pipe_resource *, i); 310 if (res) 311 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL, 312 nv04_resource(res), NOUVEAU_BO_RDWR); 313 } 314} 315 316static inline void 317nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s) 318{ 319 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 320 int i; 321 322 for (i = 0; i < NVC0_MAX_IMAGES; ++i) { 323 if (s == 5) 324 BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6); 325 else 326 BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6); 327 PUSH_DATA(push, 0); 328 PUSH_DATA(push, 0); 329 PUSH_DATA(push, 0); 330 PUSH_DATA(push, 0); 331 PUSH_DATA(push, 0x14000); 332 PUSH_DATA(push, 0); 333 } 334} 335 336static void 337nvc0_compute_validate_surfaces(struct nvc0_context *nvc0) 338{ 339 /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for 340 * compute is probably not really necessary, but we didn't find any better 341 * solutions for now. This fixes some invalidation issues when compute and 342 * fragment shaders are used inside the same context. Anyway, we definitely 343 * have invalidation issues between 3D and CP for other resources like SSBO 344 * and atomic counters. */ 345 nvc0_compute_invalidate_surfaces(nvc0, 4); 346 nvc0_compute_invalidate_surfaces(nvc0, 5); 347 348 nvc0_validate_suf(nvc0, 5); 349 350 /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */ 351 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF); 352 nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES; 353 nvc0->images_dirty[4] |= nvc0->images_valid[4]; 354} 355 356static struct nvc0_state_validate 357validate_list_cp[] = { 358 { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM }, 359 { nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF }, 360 { nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST }, 361 { nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS }, 362 { nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES }, 363 { nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS }, 364 { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS }, 365 { nvc0_compute_validate_surfaces, NVC0_NEW_CP_SURFACES }, 366}; 367 368static bool 369nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) 370{ 371 bool ret; 372 373 ret = nvc0_state_validate(nvc0, mask, validate_list_cp, 374 ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp, 375 nvc0->bufctx_cp); 376 377 if (unlikely(nvc0->state.flushed)) 378 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true); 379 return ret; 380} 381 382static void 383nvc0_compute_upload_input(struct nvc0_context *nvc0, 384 const struct pipe_grid_info *info) 385{ 386 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 387 struct nvc0_screen *screen = nvc0->screen; 388 struct nvc0_program *cp = nvc0->compprog; 389 390 if (cp->parm_size) { 391 struct nouveau_bo *bo = screen->uniform_bo; 392 const unsigned base = NVC0_CB_USR_INFO(5); 393 394 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 395 PUSH_DATA (push, align(cp->parm_size, 0x100)); 396 PUSH_DATAh(push, bo->offset + base); 397 PUSH_DATA (push, bo->offset + base); 398 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 399 PUSH_DATA (push, (0 << 8) | 1); 400 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ 401 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); 402 PUSH_DATA (push, 0); 403 PUSH_DATAp(push, info->input, cp->parm_size / 4); 404 405 nvc0_compute_invalidate_constbufs(nvc0); 406 } 407 408 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 409 PUSH_DATA (push, NVC0_CB_AUX_SIZE); 410 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 411 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 412 413 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1); 414 /* (7) as we only upload work_dim on nvc0, the rest uses special regs */ 415 PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7)); 416 PUSH_DATA (push, info->work_dim); 417 418 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 419 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); 420} 421 422void 423nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 424{ 425 struct nvc0_context *nvc0 = nvc0_context(pipe); 426 struct nvc0_screen *screen = nvc0->screen; 427 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 428 struct nvc0_program *cp = nvc0->compprog; 429 int ret; 430 431 ret = !nvc0_state_validate_cp(nvc0, ~0); 432 if (ret) { 433 NOUVEAU_ERR("Failed to launch grid !\n"); 434 return; 435 } 436 437 nvc0_compute_upload_input(nvc0, info); 438 439 BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); 440 PUSH_DATA (push, cp->code_base); 441 442 BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); 443 PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10)); 444 PUSH_DATA (push, 0); 445 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ 446 447 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); 448 PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); 449 PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); 450 PUSH_DATA (push, cp->num_barriers); 451 BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); 452 PUSH_DATA (push, cp->num_gprs); 453 454 /* launch preliminary setup */ 455 BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); 456 PUSH_DATA (push, 0x1); 457 BEGIN_NVC0(push, SUBC_CP(0x036c), 1); 458 PUSH_DATA (push, 0); 459 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 460 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); 461 462 /* block setup */ 463 BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); 464 PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); 465 PUSH_DATA (push, info->block[2]); 466 467 nouveau_pushbuf_space(push, 32, 2, 1); 468 PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); 469 470 if (unlikely(info->indirect)) { 471 struct nv04_resource *res = nv04_resource(info->indirect); 472 uint32_t offset = res->offset + info->indirect_offset; 473 unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; 474 475 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 476 PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); 477 nouveau_pushbuf_data(push, res->bo, offset, 478 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 479 } else { 480 /* grid setup */ 481 BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); 482 PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); 483 PUSH_DATA (push, info->grid[2]); 484 485 /* kernel launching */ 486 BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); 487 PUSH_DATA (push, 0); 488 BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); 489 PUSH_DATA (push, 0); 490 BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); 491 PUSH_DATA (push, 0x1000); 492 BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); 493 PUSH_DATA (push, 0); 494 BEGIN_NVC0(push, SUBC_CP(0x0360), 1); 495 PUSH_DATA (push, 0x1); 496 } 497 498 /* TODO: Not sure if this is really necessary. */ 499 nvc0_compute_invalidate_surfaces(nvc0, 5); 500 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); 501 nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES; 502 nvc0->images_dirty[5] |= nvc0->images_valid[5]; 503 504 nvc0_update_compute_invocations_counter(nvc0, info); 505} 506 507static void 508nvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0, 509 const struct pipe_grid_info *info) { 510 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 511 struct nv04_resource *res = nv04_resource(info->indirect); 512 uint32_t offset = res->offset + info->indirect_offset; 513 514 nouveau_pushbuf_space(push, 16, 0, 8); 515 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 516 BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7); 517 PUSH_DATA(push, 6); 518 PUSH_DATA(push, info->block[0]); 519 PUSH_DATA(push, info->block[1]); 520 PUSH_DATA(push, info->block[2]); 521 nouveau_pushbuf_data(push, res->bo, offset, 522 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 523} 524 525void 526nvc0_update_compute_invocations_counter(struct nvc0_context *nvc0, 527 const struct pipe_grid_info *info) { 528 if (unlikely(info->indirect)) { 529 nvc0_compute_update_indirect_invocations(nvc0, info); 530 } else { 531 uint64_t invocations = info->block[0] * info->block[1] * info->block[2]; 532 invocations *= info->grid[0] * info->grid[1] * info->grid[2]; 533 nvc0->compute_invocations += invocations; 534 } 535} 536