1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2013 Nouveau Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 12bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci * 22bf215546Sopenharmony_ci * Authors: Christoph Bumiller, Samuel Pitoiset 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "nvc0/nvc0_context.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "nvc0/nvc0_compute.xml.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ciint 30bf215546Sopenharmony_cinvc0_screen_compute_setup(struct nvc0_screen *screen, 31bf215546Sopenharmony_ci struct nouveau_pushbuf *push) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci struct nouveau_object *chan = screen->base.channel; 34bf215546Sopenharmony_ci struct nouveau_device *dev = screen->base.device; 35bf215546Sopenharmony_ci uint32_t obj_class; 36bf215546Sopenharmony_ci int ret; 37bf215546Sopenharmony_ci int i; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci switch (dev->chipset & ~0xf) { 40bf215546Sopenharmony_ci case 0xc0: 41bf215546Sopenharmony_ci case 0xd0: 42bf215546Sopenharmony_ci /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, 43bf215546Sopenharmony_ci * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ 44bf215546Sopenharmony_ci obj_class = NVC0_COMPUTE_CLASS; 45bf215546Sopenharmony_ci break; 46bf215546Sopenharmony_ci default: 47bf215546Sopenharmony_ci NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 48bf215546Sopenharmony_ci return -1; 49bf215546Sopenharmony_ci } 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, 52bf215546Sopenharmony_ci &screen->compute); 53bf215546Sopenharmony_ci if (ret) { 54bf215546Sopenharmony_ci NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); 55bf215546Sopenharmony_ci return ret; 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 59bf215546Sopenharmony_ci PUSH_DATA (push, screen->compute->oclass); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci /* hardware limit */ 62bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); 63bf215546Sopenharmony_ci PUSH_DATA (push, screen->mp_count); 64bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); 65bf215546Sopenharmony_ci PUSH_DATA (push, 0xf); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); 68bf215546Sopenharmony_ci PUSH_DATA (push, 0x8000); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* global memory setup */ 71bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); 72bf215546Sopenharmony_ci PUSH_DATA (push, 0); 73bf215546Sopenharmony_ci BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); 74bf215546Sopenharmony_ci for (i = 0; i <= 0xff; i++) 75bf215546Sopenharmony_ci PUSH_DATA (push, (0xc << 28) | (i << 16) | i); 76bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); 77bf215546Sopenharmony_ci PUSH_DATA (push, 1); 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci /* local memory and cstack setup */ 80bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); 81bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->offset); 82bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->offset); 83bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); 84bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->size); 85bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->size); 86bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); 87bf215546Sopenharmony_ci PUSH_DATA (push, 0); 88bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); 89bf215546Sopenharmony_ci PUSH_DATA (push, 0xff << 24); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci /* shared memory setup */ 92bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); 93bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); 94bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); 95bf215546Sopenharmony_ci PUSH_DATA (push, 0xfe << 24); 96bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); 97bf215546Sopenharmony_ci PUSH_DATA (push, 0); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* code segment setup */ 100bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); 101bf215546Sopenharmony_ci PUSH_DATAh(push, screen->text->offset); 102bf215546Sopenharmony_ci PUSH_DATA (push, screen->text->offset); 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci /* textures */ 105bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); 106bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset); 107bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset); 108bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci /* samplers */ 111bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); 112bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset + 65536); 113bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset + 65536); 114bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* MS sample coordinate offsets */ 117bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 118bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_SIZE); 119bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 120bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 121bf215546Sopenharmony_ci BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8); 122bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_MS_INFO); 123bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* 0 */ 124bf215546Sopenharmony_ci PUSH_DATA (push, 0); 125bf215546Sopenharmony_ci PUSH_DATA (push, 1); /* 1 */ 126bf215546Sopenharmony_ci PUSH_DATA (push, 0); 127bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* 2 */ 128bf215546Sopenharmony_ci PUSH_DATA (push, 1); 129bf215546Sopenharmony_ci PUSH_DATA (push, 1); /* 3 */ 130bf215546Sopenharmony_ci PUSH_DATA (push, 1); 131bf215546Sopenharmony_ci PUSH_DATA (push, 2); /* 4 */ 132bf215546Sopenharmony_ci PUSH_DATA (push, 0); 133bf215546Sopenharmony_ci PUSH_DATA (push, 3); /* 5 */ 134bf215546Sopenharmony_ci PUSH_DATA (push, 0); 135bf215546Sopenharmony_ci PUSH_DATA (push, 2); /* 6 */ 136bf215546Sopenharmony_ci PUSH_DATA (push, 1); 137bf215546Sopenharmony_ci PUSH_DATA (push, 3); /* 7 */ 138bf215546Sopenharmony_ci PUSH_DATA (push, 1); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci return 0; 141bf215546Sopenharmony_ci} 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_cistatic void 144bf215546Sopenharmony_cinvc0_compute_validate_samplers(struct nvc0_context *nvc0) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci bool need_flush = nvc0_validate_tsc(nvc0, 5); 147bf215546Sopenharmony_ci if (need_flush) { 148bf215546Sopenharmony_ci BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); 149bf215546Sopenharmony_ci PUSH_DATA (nvc0->base.pushbuf, 0); 150bf215546Sopenharmony_ci } 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* Invalidate all 3D samplers because they are aliased. */ 153bf215546Sopenharmony_ci for (int s = 0; s < 5; s++) 154bf215546Sopenharmony_ci nvc0->samplers_dirty[s] = ~0; 155bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; 156bf215546Sopenharmony_ci} 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cistatic void 159bf215546Sopenharmony_cinvc0_compute_validate_textures(struct nvc0_context *nvc0) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci bool need_flush = nvc0_validate_tic(nvc0, 5); 162bf215546Sopenharmony_ci if (need_flush) { 163bf215546Sopenharmony_ci BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); 164bf215546Sopenharmony_ci PUSH_DATA (nvc0->base.pushbuf, 0); 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* Invalidate all 3D textures because they are aliased. */ 168bf215546Sopenharmony_ci for (int s = 0; s < 5; s++) { 169bf215546Sopenharmony_ci for (int i = 0; i < nvc0->num_textures[s]; i++) 170bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); 171bf215546Sopenharmony_ci nvc0->textures_dirty[s] = ~0; 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic inline void 177bf215546Sopenharmony_cinvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci int s; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ 182bf215546Sopenharmony_ci for (s = 0; s < 5; s++) { 183bf215546Sopenharmony_ci nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; 184bf215546Sopenharmony_ci nvc0->state.uniform_buffer_bound[s] = false; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic void 190bf215546Sopenharmony_cinvc0_compute_validate_constbufs(struct nvc0_context *nvc0) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 193bf215546Sopenharmony_ci const int s = 5; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci while (nvc0->constbuf_dirty[s]) { 196bf215546Sopenharmony_ci int i = ffs(nvc0->constbuf_dirty[s]) - 1; 197bf215546Sopenharmony_ci nvc0->constbuf_dirty[s] &= ~(1 << i); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci if (nvc0->constbuf[s][i].user) { 200bf215546Sopenharmony_ci struct nouveau_bo *bo = nvc0->screen->uniform_bo; 201bf215546Sopenharmony_ci const unsigned base = NVC0_CB_USR_INFO(s); 202bf215546Sopenharmony_ci const unsigned size = nvc0->constbuf[s][0].size; 203bf215546Sopenharmony_ci assert(i == 0); /* we really only want OpenGL uniforms here */ 204bf215546Sopenharmony_ci assert(nvc0->constbuf[s][0].u.data); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (!nvc0->state.uniform_buffer_bound[s]) { 207bf215546Sopenharmony_ci nvc0->state.uniform_buffer_bound[s] = true; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 210bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE); 211bf215546Sopenharmony_ci PUSH_DATAh(push, bo->offset + base); 212bf215546Sopenharmony_ci PUSH_DATA (push, bo->offset + base); 213bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 214bf215546Sopenharmony_ci PUSH_DATA (push, (0 << 8) | 1); 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), 217bf215546Sopenharmony_ci base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, 218bf215546Sopenharmony_ci nvc0->constbuf[s][0].u.data); 219bf215546Sopenharmony_ci } else { 220bf215546Sopenharmony_ci struct nv04_resource *res = 221bf215546Sopenharmony_ci nv04_resource(nvc0->constbuf[s][i].u.buf); 222bf215546Sopenharmony_ci if (res) { 223bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 224bf215546Sopenharmony_ci PUSH_DATA (push, nvc0->constbuf[s][i].size); 225bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); 226bf215546Sopenharmony_ci PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); 227bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 228bf215546Sopenharmony_ci PUSH_DATA (push, (i << 8) | 1); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci res->cb_bindings[s] |= 1 << i; 233bf215546Sopenharmony_ci } else { 234bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 235bf215546Sopenharmony_ci PUSH_DATA (push, (i << 8) | 0); 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci if (i == 0) 238bf215546Sopenharmony_ci nvc0->state.uniform_buffer_bound[s] = false; 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci nvc0_compute_invalidate_constbufs(nvc0); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 245bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_cistatic void 249bf215546Sopenharmony_cinvc0_compute_validate_driverconst(struct nvc0_context *nvc0) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 252bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 255bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_SIZE); 256bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 257bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 258bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 259bf215546Sopenharmony_ci PUSH_DATA (push, (15 << 8) | 1); 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST; 262bf215546Sopenharmony_ci} 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_cistatic void 265bf215546Sopenharmony_cinvc0_compute_validate_buffers(struct nvc0_context *nvc0) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 268bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 269bf215546Sopenharmony_ci const int s = 5; 270bf215546Sopenharmony_ci int i; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 273bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_SIZE); 274bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 275bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); 276bf215546Sopenharmony_ci BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); 277bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci for (i = 0; i < NVC0_MAX_BUFFERS; i++) { 280bf215546Sopenharmony_ci if (nvc0->buffers[s][i].buffer) { 281bf215546Sopenharmony_ci struct nv04_resource *res = 282bf215546Sopenharmony_ci nv04_resource(nvc0->buffers[s][i].buffer); 283bf215546Sopenharmony_ci PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); 284bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); 285bf215546Sopenharmony_ci PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); 286bf215546Sopenharmony_ci PUSH_DATA (push, 0); 287bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); 288bf215546Sopenharmony_ci util_range_add(&res->base, &res->valid_buffer_range, 289bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_offset, 290bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_offset + 291bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_size); 292bf215546Sopenharmony_ci } else { 293bf215546Sopenharmony_ci PUSH_DATA (push, 0); 294bf215546Sopenharmony_ci PUSH_DATA (push, 0); 295bf215546Sopenharmony_ci PUSH_DATA (push, 0); 296bf215546Sopenharmony_ci PUSH_DATA (push, 0); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_civoid 302bf215546Sopenharmony_cinvc0_compute_validate_globals(struct nvc0_context *nvc0) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci unsigned i; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); 307bf215546Sopenharmony_ci ++i) { 308bf215546Sopenharmony_ci struct pipe_resource *res = *util_dynarray_element( 309bf215546Sopenharmony_ci &nvc0->global_residents, struct pipe_resource *, i); 310bf215546Sopenharmony_ci if (res) 311bf215546Sopenharmony_ci nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL, 312bf215546Sopenharmony_ci nv04_resource(res), NOUVEAU_BO_RDWR); 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci} 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_cistatic inline void 317bf215546Sopenharmony_cinvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s) 318bf215546Sopenharmony_ci{ 319bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 320bf215546Sopenharmony_ci int i; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci for (i = 0; i < NVC0_MAX_IMAGES; ++i) { 323bf215546Sopenharmony_ci if (s == 5) 324bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6); 325bf215546Sopenharmony_ci else 326bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6); 327bf215546Sopenharmony_ci PUSH_DATA(push, 0); 328bf215546Sopenharmony_ci PUSH_DATA(push, 0); 329bf215546Sopenharmony_ci PUSH_DATA(push, 0); 330bf215546Sopenharmony_ci PUSH_DATA(push, 0); 331bf215546Sopenharmony_ci PUSH_DATA(push, 0x14000); 332bf215546Sopenharmony_ci PUSH_DATA(push, 0); 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci} 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_cistatic void 337bf215546Sopenharmony_cinvc0_compute_validate_surfaces(struct nvc0_context *nvc0) 338bf215546Sopenharmony_ci{ 339bf215546Sopenharmony_ci /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for 340bf215546Sopenharmony_ci * compute is probably not really necessary, but we didn't find any better 341bf215546Sopenharmony_ci * solutions for now. This fixes some invalidation issues when compute and 342bf215546Sopenharmony_ci * fragment shaders are used inside the same context. Anyway, we definitely 343bf215546Sopenharmony_ci * have invalidation issues between 3D and CP for other resources like SSBO 344bf215546Sopenharmony_ci * and atomic counters. */ 345bf215546Sopenharmony_ci nvc0_compute_invalidate_surfaces(nvc0, 4); 346bf215546Sopenharmony_ci nvc0_compute_invalidate_surfaces(nvc0, 5); 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci nvc0_validate_suf(nvc0, 5); 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */ 351bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF); 352bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES; 353bf215546Sopenharmony_ci nvc0->images_dirty[4] |= nvc0->images_valid[4]; 354bf215546Sopenharmony_ci} 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_cistatic struct nvc0_state_validate 357bf215546Sopenharmony_civalidate_list_cp[] = { 358bf215546Sopenharmony_ci { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM }, 359bf215546Sopenharmony_ci { nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF }, 360bf215546Sopenharmony_ci { nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST }, 361bf215546Sopenharmony_ci { nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS }, 362bf215546Sopenharmony_ci { nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES }, 363bf215546Sopenharmony_ci { nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS }, 364bf215546Sopenharmony_ci { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS }, 365bf215546Sopenharmony_ci { nvc0_compute_validate_surfaces, NVC0_NEW_CP_SURFACES }, 366bf215546Sopenharmony_ci}; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_cistatic bool 369bf215546Sopenharmony_cinvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) 370bf215546Sopenharmony_ci{ 371bf215546Sopenharmony_ci bool ret; 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci ret = nvc0_state_validate(nvc0, mask, validate_list_cp, 374bf215546Sopenharmony_ci ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp, 375bf215546Sopenharmony_ci nvc0->bufctx_cp); 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci if (unlikely(nvc0->state.flushed)) 378bf215546Sopenharmony_ci nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true); 379bf215546Sopenharmony_ci return ret; 380bf215546Sopenharmony_ci} 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_cistatic void 383bf215546Sopenharmony_cinvc0_compute_upload_input(struct nvc0_context *nvc0, 384bf215546Sopenharmony_ci const struct pipe_grid_info *info) 385bf215546Sopenharmony_ci{ 386bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 387bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 388bf215546Sopenharmony_ci struct nvc0_program *cp = nvc0->compprog; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci if (cp->parm_size) { 391bf215546Sopenharmony_ci struct nouveau_bo *bo = screen->uniform_bo; 392bf215546Sopenharmony_ci const unsigned base = NVC0_CB_USR_INFO(5); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 395bf215546Sopenharmony_ci PUSH_DATA (push, align(cp->parm_size, 0x100)); 396bf215546Sopenharmony_ci PUSH_DATAh(push, bo->offset + base); 397bf215546Sopenharmony_ci PUSH_DATA (push, bo->offset + base); 398bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); 399bf215546Sopenharmony_ci PUSH_DATA (push, (0 << 8) | 1); 400bf215546Sopenharmony_ci /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ 401bf215546Sopenharmony_ci BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); 402bf215546Sopenharmony_ci PUSH_DATA (push, 0); 403bf215546Sopenharmony_ci PUSH_DATAp(push, info->input, cp->parm_size / 4); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci nvc0_compute_invalidate_constbufs(nvc0); 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); 409bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_SIZE); 410bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 411bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1); 414bf215546Sopenharmony_ci /* (7) as we only upload work_dim on nvc0, the rest uses special regs */ 415bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7)); 416bf215546Sopenharmony_ci PUSH_DATA (push, info->work_dim); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 419bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); 420bf215546Sopenharmony_ci} 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_civoid 423bf215546Sopenharmony_cinvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 424bf215546Sopenharmony_ci{ 425bf215546Sopenharmony_ci struct nvc0_context *nvc0 = nvc0_context(pipe); 426bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 427bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 428bf215546Sopenharmony_ci struct nvc0_program *cp = nvc0->compprog; 429bf215546Sopenharmony_ci int ret; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci ret = !nvc0_state_validate_cp(nvc0, ~0); 432bf215546Sopenharmony_ci if (ret) { 433bf215546Sopenharmony_ci NOUVEAU_ERR("Failed to launch grid !\n"); 434bf215546Sopenharmony_ci return; 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci nvc0_compute_upload_input(nvc0, info); 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); 440bf215546Sopenharmony_ci PUSH_DATA (push, cp->code_base); 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); 443bf215546Sopenharmony_ci PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10)); 444bf215546Sopenharmony_ci PUSH_DATA (push, 0); 445bf215546Sopenharmony_ci PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); 448bf215546Sopenharmony_ci PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); 449bf215546Sopenharmony_ci PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); 450bf215546Sopenharmony_ci PUSH_DATA (push, cp->num_barriers); 451bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); 452bf215546Sopenharmony_ci PUSH_DATA (push, cp->num_gprs); 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci /* launch preliminary setup */ 455bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); 456bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 457bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x036c), 1); 458bf215546Sopenharmony_ci PUSH_DATA (push, 0); 459bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); 460bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci /* block setup */ 463bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); 464bf215546Sopenharmony_ci PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); 465bf215546Sopenharmony_ci PUSH_DATA (push, info->block[2]); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 32, 2, 1); 468bf215546Sopenharmony_ci PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci if (unlikely(info->indirect)) { 471bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(info->indirect); 472bf215546Sopenharmony_ci uint32_t offset = res->offset + info->indirect_offset; 473bf215546Sopenharmony_ci unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 476bf215546Sopenharmony_ci PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); 477bf215546Sopenharmony_ci nouveau_pushbuf_data(push, res->bo, offset, 478bf215546Sopenharmony_ci NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 479bf215546Sopenharmony_ci } else { 480bf215546Sopenharmony_ci /* grid setup */ 481bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); 482bf215546Sopenharmony_ci PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); 483bf215546Sopenharmony_ci PUSH_DATA (push, info->grid[2]); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* kernel launching */ 486bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); 487bf215546Sopenharmony_ci PUSH_DATA (push, 0); 488bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); 489bf215546Sopenharmony_ci PUSH_DATA (push, 0); 490bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); 491bf215546Sopenharmony_ci PUSH_DATA (push, 0x1000); 492bf215546Sopenharmony_ci BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); 493bf215546Sopenharmony_ci PUSH_DATA (push, 0); 494bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x0360), 1); 495bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci /* TODO: Not sure if this is really necessary. */ 499bf215546Sopenharmony_ci nvc0_compute_invalidate_surfaces(nvc0, 5); 500bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); 501bf215546Sopenharmony_ci nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES; 502bf215546Sopenharmony_ci nvc0->images_dirty[5] |= nvc0->images_valid[5]; 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci nvc0_update_compute_invocations_counter(nvc0, info); 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_cistatic void 508bf215546Sopenharmony_cinvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0, 509bf215546Sopenharmony_ci const struct pipe_grid_info *info) { 510bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 511bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(info->indirect); 512bf215546Sopenharmony_ci uint32_t offset = res->offset + info->indirect_offset; 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 16, 0, 8); 515bf215546Sopenharmony_ci PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 516bf215546Sopenharmony_ci BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7); 517bf215546Sopenharmony_ci PUSH_DATA(push, 6); 518bf215546Sopenharmony_ci PUSH_DATA(push, info->block[0]); 519bf215546Sopenharmony_ci PUSH_DATA(push, info->block[1]); 520bf215546Sopenharmony_ci PUSH_DATA(push, info->block[2]); 521bf215546Sopenharmony_ci nouveau_pushbuf_data(push, res->bo, offset, 522bf215546Sopenharmony_ci NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_civoid 526bf215546Sopenharmony_cinvc0_update_compute_invocations_counter(struct nvc0_context *nvc0, 527bf215546Sopenharmony_ci const struct pipe_grid_info *info) { 528bf215546Sopenharmony_ci if (unlikely(info->indirect)) { 529bf215546Sopenharmony_ci nvc0_compute_update_indirect_invocations(nvc0, info); 530bf215546Sopenharmony_ci } else { 531bf215546Sopenharmony_ci uint64_t invocations = info->block[0] * info->block[1] * info->block[2]; 532bf215546Sopenharmony_ci invocations *= info->grid[0] * info->grid[1] * info->grid[2]; 533bf215546Sopenharmony_ci nvc0->compute_invocations += invocations; 534bf215546Sopenharmony_ci } 535bf215546Sopenharmony_ci} 536