1/* 2 * Copyright 2012 Francisco Jerez 3 * Copyright 2015 Samuel Pitoiset 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27#include "util/format/u_format.h" 28#include "nv50/nv50_context.h" 29#include "nv50/nv50_compute.xml.h" 30 31#include "nv50_ir_driver.h" 32 33int 34nv50_screen_compute_setup(struct nv50_screen *screen, 35 struct nouveau_pushbuf *push) 36{ 37 struct nouveau_device *dev = screen->base.device; 38 struct nouveau_object *chan = screen->base.channel; 39 struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; 40 unsigned obj_class; 41 int i, ret; 42 43 switch (dev->chipset & 0xf0) { 44 case 0x50: 45 case 0x80: 46 case 0x90: 47 obj_class = NV50_COMPUTE_CLASS; 48 break; 49 case 0xa0: 50 switch (dev->chipset) { 51 case 0xa3: 52 case 0xa5: 53 case 0xa8: 54 obj_class = NVA3_COMPUTE_CLASS; 55 break; 56 default: 57 obj_class = NV50_COMPUTE_CLASS; 58 break; 59 } 60 break; 61 default: 62 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 63 return -1; 64 } 65 66 ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, 67 &screen->compute); 68 if (ret) 69 return ret; 70 71 BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 72 PUSH_DATA (push, screen->compute->handle); 73 74 BEGIN_NV04(push, NV50_CP(UNK02A0), 1); 75 PUSH_DATA (push, 1); 76 BEGIN_NV04(push, NV50_CP(DMA_STACK), 1); 77 PUSH_DATA (push, fifo->vram); 78 BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2); 79 PUSH_DATAh(push, screen->stack_bo->offset); 80 PUSH_DATA (push, screen->stack_bo->offset); 81 BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1); 82 PUSH_DATA (push, 4); 83 84 BEGIN_NV04(push, NV50_CP(UNK0290), 1); 85 PUSH_DATA (push, 1); 86 BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1); 87 PUSH_DATA (push, 1); 88 BEGIN_NV04(push, NV50_CP(REG_MODE), 1); 89 PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); 90 BEGIN_NV04(push, NV50_CP(UNK0384), 1); 91 PUSH_DATA (push, 0x100); 92 BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1); 93 PUSH_DATA (push, fifo->vram); 94 95 for (i = 0; i < 15; i++) { 96 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2); 97 PUSH_DATA (push, 0); 98 PUSH_DATA (push, 0); 99 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1); 100 PUSH_DATA (push, 0); 101 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1); 102 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 103 } 104 105 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2); 106 PUSH_DATA (push, 0); 107 PUSH_DATA (push, 0); 108 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1); 109 PUSH_DATA (push, ~0); 110 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1); 111 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 112 113 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1); 114 PUSH_DATA (push, 7); 115 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1); 116 PUSH_DATA (push, 1); 117 BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1); 118 PUSH_DATA (push, 7); 119 BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1); 120 PUSH_DATA (push, 1); 121 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 122 PUSH_DATA (push, 0); 123 124 BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1); 125 PUSH_DATA (push, fifo->vram); 126 BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1); 127 PUSH_DATA (push, 0x54); 128 BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1); 129 PUSH_DATA (push, 0); 130 131 BEGIN_NV04(push, NV50_CP(DMA_TIC), 1); 132 PUSH_DATA (push, fifo->vram); 133 BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3); 134 PUSH_DATAh(push, screen->txc->offset); 135 PUSH_DATA (push, screen->txc->offset); 136 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); 137 138 BEGIN_NV04(push, NV50_CP(DMA_TSC), 1); 139 PUSH_DATA (push, fifo->vram); 140 BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3); 141 PUSH_DATAh(push, screen->txc->offset + 65536); 142 PUSH_DATA (push, screen->txc->offset + 65536); 143 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); 144 145 BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1); 146 PUSH_DATA (push, fifo->vram); 147 148 BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1); 149 PUSH_DATA (push, fifo->vram); 150 BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2); 151 PUSH_DATAh(push, screen->tls_bo->offset + 65536); 152 PUSH_DATA (push, screen->tls_bo->offset + 65536); 153 BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); 154 PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); 155 156 BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); 157 PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); 158 PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); 159 PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000); 160 161 BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2); 162 PUSH_DATAh(push, screen->fence.bo->offset + 16); 163 PUSH_DATA (push, screen->fence.bo->offset + 16); 164 165 return 0; 166} 167 168static void 169nv50_compute_validate_samplers(struct nv50_context *nv50) 170{ 171 bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE); 172 if (need_flush) { 173 BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1); 174 PUSH_DATA (nv50->base.pushbuf, 0); 175 } 176 177 /* Invalidate all 3D samplers because they are aliased. */ 178 nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; 179} 180 181static void 182nv50_compute_validate_textures(struct nv50_context *nv50) 183{ 184 bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE); 185 if (need_flush) { 186 BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1); 187 PUSH_DATA (nv50->base.pushbuf, 0); 188 } 189 190 /* Invalidate all 3D textures because they are aliased. */ 191 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); 192 nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; 193} 194 195static inline void 196nv50_compute_invalidate_constbufs(struct nv50_context *nv50) 197{ 198 int s; 199 200 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ 201 for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) { 202 nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s]; 203 nv50->state.uniform_buffer_bound[s] = false; 204 } 205 nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; 206} 207 208static void 209nv50_compute_validate_constbufs(struct nv50_context *nv50) 210{ 211 struct nouveau_pushbuf *push = nv50->base.pushbuf; 212 const int s = NV50_SHADER_STAGE_COMPUTE; 213 214 while (nv50->constbuf_dirty[s]) { 215 int i = ffs(nv50->constbuf_dirty[s]) - 1; 216 nv50->constbuf_dirty[s] &= ~(1 << i); 217 218 if (nv50->constbuf[s][i].user) { 219 const unsigned b = NV50_CB_PVP + s; 220 unsigned start = 0; 221 unsigned words = nv50->constbuf[s][0].size / 4; 222 if (i) { 223 NOUVEAU_ERR("user constbufs only supported in slot 0\n"); 224 continue; 225 } 226 if (!nv50->state.uniform_buffer_bound[s]) { 227 nv50->state.uniform_buffer_bound[s] = true; 228 BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 229 PUSH_DATA (push, (b << 12) | (i << 8) | 1); 230 } 231 while (words) { 232 unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); 233 234 PUSH_SPACE(push, nr + 3); 235 BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 236 PUSH_DATA (push, (start << 8) | b); 237 BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr); 238 PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); 239 240 start += nr; 241 words -= nr; 242 } 243 } else { 244 struct nv04_resource *res = 245 nv04_resource(nv50->constbuf[s][i].u.buf); 246 if (res) { 247 /* TODO: allocate persistent bindings */ 248 const unsigned b = s * 16 + i; 249 250 assert(nouveau_resource_mapped_by_gpu(&res->base)); 251 252 BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); 253 PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); 254 PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); 255 PUSH_DATA (push, (b << 16) | 256 (nv50->constbuf[s][i].size & 0xffff)); 257 BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 258 PUSH_DATA (push, (b << 12) | (i << 8) | 1); 259 260 BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD); 261 262 nv50->cb_dirty = 1; /* Force cache flush for UBO. */ 263 res->cb_bindings[s] |= 1 << i; 264 } else { 265 BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 266 PUSH_DATA (push, (i << 8) | 0); 267 } 268 if (i == 0) 269 nv50->state.uniform_buffer_bound[s] = false; 270 } 271 } 272 273 // TODO: Check if having orthogonal slots means the two don't trample over 274 // each other. 275 nv50_compute_invalidate_constbufs(nv50); 276} 277 278static void 279nv50_get_surface_dims(const struct pipe_image_view *view, 280 int *width, int *height, int *depth) 281{ 282 struct nv04_resource *res = nv04_resource(view->resource); 283 int level; 284 285 *width = *height = *depth = 1; 286 if (res->base.target == PIPE_BUFFER) { 287 *width = view->u.buf.size / util_format_get_blocksize(view->format); 288 return; 289 } 290 291 level = view->u.tex.level; 292 *width = u_minify(view->resource->width0, level); 293 *height = u_minify(view->resource->height0, level); 294 *depth = u_minify(view->resource->depth0, level); 295 296 switch (res->base.target) { 297 case PIPE_TEXTURE_1D_ARRAY: 298 case PIPE_TEXTURE_2D_ARRAY: 299 case PIPE_TEXTURE_CUBE: 300 case PIPE_TEXTURE_CUBE_ARRAY: 301 *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1; 302 break; 303 case PIPE_TEXTURE_1D: 304 case PIPE_TEXTURE_2D: 305 case PIPE_TEXTURE_RECT: 306 case PIPE_TEXTURE_3D: 307 break; 308 default: 309 assert(!"unexpected texture target"); 310 break; 311 } 312} 313 314static void 315nv50_mark_image_range_valid(const struct pipe_image_view *view) 316{ 317 struct nv04_resource *res = (struct nv04_resource *)view->resource; 318 319 assert(view->resource->target == PIPE_BUFFER); 320 321 util_range_add(&res->base, &res->valid_buffer_range, 322 view->u.buf.offset, 323 view->u.buf.offset + view->u.buf.size); 324} 325 326static inline void 327nv50_set_surface_info(struct nouveau_pushbuf *push, 328 const struct pipe_image_view *view, 329 int width, int height, int depth) 330{ 331 struct nv04_resource *res; 332 uint32_t *const info = push->cur; 333 334 push->cur += 12; 335 336 /* Make sure to always initialize the surface information area because it's 337 * used to check if the given image is bound or not. */ 338 memset(info, 0, 12 * sizeof(*info)); 339 340 if (!view || !view->resource) 341 return; 342 res = nv04_resource(view->resource); 343 344 /* Stick the image dimensions for the imageSize() builtin. */ 345 info[0] = width; 346 info[1] = height; 347 info[2] = depth; 348 349 /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel 350 * offset and to check if the format doesn't mismatch. */ 351 info[3] = util_format_get_blocksize(view->format); 352 353 if (res->base.target != PIPE_BUFFER) { 354 struct nv50_miptree *mt = nv50_miptree(&res->base); 355 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; 356 unsigned nby = align(util_format_get_nblocksy(view->format, height), 357 NV50_TILE_SIZE_Y(lvl->tile_mode)); 358 359 if (mt->layout_3d) { 360 info[4] = nby; 361 info[11] = view->u.tex.first_layer; 362 } else { 363 info[4] = mt->layer_stride / lvl->pitch; 364 } 365 info[6] = mt->ms_x; 366 info[7] = mt->ms_y; 367 info[8] = NV50_TILE_SHIFT_X(lvl->tile_mode); 368 info[9] = NV50_TILE_SHIFT_Y(lvl->tile_mode); 369 info[10] = NV50_TILE_SHIFT_Z(lvl->tile_mode); 370 } 371} 372 373static void 374nv50_compute_validate_surfaces(struct nv50_context *nv50) 375{ 376 struct nouveau_pushbuf *push = nv50->base.pushbuf; 377 int i; 378 379 for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) { 380 struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i]; 381 int width, height, depth; 382 uint64_t address = 0; 383 384 BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5); 385 386 if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) { 387 struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot]; 388 struct nv04_resource *res = nv04_resource(buffer->buffer); 389 PUSH_DATAh(push, res->address + buffer->buffer_offset); 390 PUSH_DATA (push, res->address + buffer->buffer_offset); 391 PUSH_DATA (push, 0); /* pitch? */ 392 PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1); 393 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 394 BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR); 395 util_range_add(&res->base, &res->valid_buffer_range, 396 buffer->buffer_offset, 397 buffer->buffer_offset + 398 buffer->buffer_size); 399 400 PUSH_SPACE(push, 1 + 3); 401 BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 402 PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX); 403 BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1); 404 PUSH_DATA (push, buffer->buffer_size); 405 } else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) { 406 struct pipe_image_view *view = &nv50->images[gmem->slot]; 407 struct nv04_resource *res = nv04_resource(view->resource); 408 409 /* get surface dimensions based on the target. */ 410 nv50_get_surface_dims(view, &width, &height, &depth); 411 412 address = res->address; 413 if (res->base.target == PIPE_BUFFER) { 414 address += view->u.buf.offset; 415 assert(!(address & 0xff)); 416 417 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 418 nv50_mark_image_range_valid(view); 419 420 PUSH_DATAh(push, address); 421 PUSH_DATA (push, address); 422 PUSH_DATA (push, 0); /* pitch? */ 423 PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1); 424 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 425 } else { 426 struct nv50_miptree *mt = nv50_miptree(view->resource); 427 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; 428 const unsigned z = view->u.tex.first_layer; 429 unsigned max_size; 430 431 if (mt->layout_3d) { 432 address += nv50_mt_zslice_offset(mt, view->u.tex.level, 0); 433 max_size = mt->total_size; 434 } else { 435 address += mt->layer_stride * z; 436 max_size = mt->layer_stride * (view->u.tex.last_layer - view->u.tex.first_layer + 1); 437 } 438 address += lvl->offset; 439 440 PUSH_DATAh(push, address); 441 PUSH_DATA (push, address); 442 if (mt->layout_3d) { 443 // We have to adjust the size of the 3d surface to be 444 // accessible within 2d limits. The size of each z tile goes 445 // into the x direction, while the number of z tiles goes into 446 // the y direction. 447 const unsigned nby = util_format_get_nblocksy(view->format, height); 448 const unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); 449 const unsigned tsz = NV50_TILE_SIZE_Z(lvl->tile_mode); 450 const unsigned pitch = lvl->pitch * tsz; 451 const unsigned maxy = align(nby, tsy) * align(depth, tsz) >> NV50_TILE_SHIFT_Z(lvl->tile_mode); 452 PUSH_DATA (push, pitch * tsy); 453 PUSH_DATA (push, (maxy - 1) << 16 | (pitch - 1)); 454 PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); 455 } else if (nouveau_bo_memtype(res->bo)) { 456 PUSH_DATA (push, lvl->pitch * NV50_TILE_SIZE_Y(lvl->tile_mode)); 457 PUSH_DATA (push, (max_size / lvl->pitch - 1) << 16 | (lvl->pitch - 1)); 458 PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); 459 } else { 460 PUSH_DATA (push, lvl->pitch); 461 PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1); 462 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 463 } 464 } 465 466 BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR); 467 468 PUSH_SPACE(push, 12 + 3); 469 BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 470 PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX); 471 BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12); 472 nv50_set_surface_info(push, view, width, height, depth); 473 } else { 474 PUSH_DATA (push, 0); 475 PUSH_DATA (push, 0); 476 PUSH_DATA (push, 0); 477 PUSH_DATA (push, 0); 478 PUSH_DATA (push, 0); 479 } 480 } 481} 482 483static void 484nv50_compute_validate_globals(struct nv50_context *nv50) 485{ 486 unsigned i; 487 488 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); 489 ++i) { 490 struct pipe_resource *res = *util_dynarray_element( 491 &nv50->global_residents, struct pipe_resource *, i); 492 if (res) 493 nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, 494 nv04_resource(res), NOUVEAU_BO_RDWR); 495 } 496} 497 498static struct nv50_state_validate 499validate_list_cp[] = { 500 { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, 501 { nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF }, 502 { nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES | 503 NV50_NEW_CP_BUFFERS | 504 NV50_NEW_CP_PROGRAM }, 505 { nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES }, 506 { nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS }, 507 { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, 508}; 509 510static bool 511nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask) 512{ 513 bool ret; 514 515 /* TODO: validate textures, samplers, surfaces */ 516 ret = nv50_state_validate(nv50, mask, validate_list_cp, 517 ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp, 518 nv50->bufctx_cp); 519 520 if (unlikely(nv50->state.flushed)) 521 nv50_bufctx_fence(nv50->bufctx_cp, true); 522 return ret; 523} 524 525static void 526nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) 527{ 528 struct nv50_screen *screen = nv50->screen; 529 struct nouveau_pushbuf *push = nv50->base.pushbuf; 530 unsigned size = align(nv50->compprog->parm_size, 0x4); 531 532 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 533 PUSH_DATA (push, (1 + (size / 4)) << 8); 534 535 if (size) { 536 struct nouveau_mm_allocation *mm; 537 struct nouveau_bo *bo = NULL; 538 unsigned offset; 539 540 mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); 541 assert(mm); 542 543 nouveau_bo_map(bo, 0, nv50->base.client); 544 memcpy(bo->map + offset, input, size); 545 546 nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 547 nouveau_pushbuf_bufctx(push, nv50->bufctx); 548 nouveau_pushbuf_validate(push); 549 550 nouveau_pushbuf_space(push, 0, 0, 1); 551 552 BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4); 553 nouveau_pushbuf_data(push, bo, offset, size); 554 555 nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); 556 nouveau_bo_ref(NULL, &bo); 557 nouveau_bufctx_reset(nv50->bufctx, 0); 558 } 559} 560 561void 562nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 563{ 564 struct nv50_context *nv50 = nv50_context(pipe); 565 struct nouveau_pushbuf *push = nv50->base.pushbuf; 566 unsigned block_size = info->block[0] * info->block[1] * info->block[2]; 567 struct nv50_program *cp = nv50->compprog; 568 bool ret; 569 570 ret = !nv50_state_validate_cp(nv50, ~0); 571 if (ret) { 572 NOUVEAU_ERR("Failed to launch grid !\n"); 573 return; 574 } 575 576 nv50_compute_upload_input(nv50, info->input); 577 578 BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); 579 PUSH_DATA (push, cp->code_base); 580 581 BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); 582 PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x14, 0x40)); 583 BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1); 584 PUSH_DATA (push, cp->max_gpr); 585 586 /* no indirect support - just read the parameters out */ 587 uint32_t grid[3]; 588 if (unlikely(info->indirect)) { 589 pipe_buffer_read(pipe, info->indirect, info->indirect_offset, 590 sizeof(grid), grid); 591 } else { 592 memcpy(grid, info->grid, sizeof(grid)); 593 } 594 595 /* grid/block setup */ 596 BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2); 597 PUSH_DATA (push, info->block[1] << 16 | info->block[0]); 598 PUSH_DATA (push, info->block[2]); 599 BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1); 600 PUSH_DATA (push, 1 << 16 | block_size); 601 BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1); 602 PUSH_DATA (push, 1); 603 BEGIN_NV04(push, NV50_CP(GRIDDIM), 1); 604 PUSH_DATA (push, grid[1] << 16 | grid[0]); 605 BEGIN_NV04(push, NV50_CP(GRIDID), 1); 606 PUSH_DATA (push, 1); 607 608 for (int i = 0; i < grid[2]; i++) { 609 BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1); 610 PUSH_DATA (push, grid[2] | i << 16); 611 612 /* kernel launching */ 613 BEGIN_NV04(push, NV50_CP(LAUNCH), 1); 614 PUSH_DATA (push, 0); 615 } 616 617 BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); 618 PUSH_DATA (push, 0); 619 620 /* bind a compute shader clobbers fragment shader state */ 621 nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; 622 623 nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] * 624 grid[0] * grid[1] * grid[2]; 625} 626