1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2012 Nouveau Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 12bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci * 22bf215546Sopenharmony_ci * Authors: Christoph Bumiller 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "nvc0/nvc0_context.h" 26bf215546Sopenharmony_ci#include "nvc0/nve4_compute.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nv50_ir_driver.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "drf.h" 31bf215546Sopenharmony_ci#include "qmd.h" 32bf215546Sopenharmony_ci#include "cla0c0qmd.h" 33bf215546Sopenharmony_ci#include "clc0c0qmd.h" 34bf215546Sopenharmony_ci#include "clc3c0qmd.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a) 37bf215546Sopenharmony_ci#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a) 38bf215546Sopenharmony_ci#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a) 39bf215546Sopenharmony_ci#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a) 40bf215546Sopenharmony_ci#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a) 41bf215546Sopenharmony_ci#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a) 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ciint 44bf215546Sopenharmony_cinve4_screen_compute_setup(struct nvc0_screen *screen, 45bf215546Sopenharmony_ci struct nouveau_pushbuf *push) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci struct nouveau_device *dev = screen->base.device; 48bf215546Sopenharmony_ci struct nouveau_object *chan = screen->base.channel; 49bf215546Sopenharmony_ci int i; 50bf215546Sopenharmony_ci int ret; 51bf215546Sopenharmony_ci uint32_t obj_class; 52bf215546Sopenharmony_ci uint64_t address; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci switch (dev->chipset & ~0xf) { 55bf215546Sopenharmony_ci case 0x160: 56bf215546Sopenharmony_ci obj_class = TU102_COMPUTE_CLASS; 57bf215546Sopenharmony_ci break; 58bf215546Sopenharmony_ci case 0x140: 59bf215546Sopenharmony_ci obj_class = GV100_COMPUTE_CLASS; 60bf215546Sopenharmony_ci break; 61bf215546Sopenharmony_ci case 0x100: 62bf215546Sopenharmony_ci case 0xf0: 63bf215546Sopenharmony_ci obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ 64bf215546Sopenharmony_ci break; 65bf215546Sopenharmony_ci case 0xe0: 66bf215546Sopenharmony_ci obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ 67bf215546Sopenharmony_ci break; 68bf215546Sopenharmony_ci case 0x110: 69bf215546Sopenharmony_ci obj_class = GM107_COMPUTE_CLASS; 70bf215546Sopenharmony_ci break; 71bf215546Sopenharmony_ci case 0x120: 72bf215546Sopenharmony_ci obj_class = GM200_COMPUTE_CLASS; 73bf215546Sopenharmony_ci break; 74bf215546Sopenharmony_ci case 0x130: 75bf215546Sopenharmony_ci obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ? 76bf215546Sopenharmony_ci GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS; 77bf215546Sopenharmony_ci break; 78bf215546Sopenharmony_ci default: 79bf215546Sopenharmony_ci NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 80bf215546Sopenharmony_ci return -1; 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0, 84bf215546Sopenharmony_ci &screen->compute); 85bf215546Sopenharmony_ci if (ret) { 86bf215546Sopenharmony_ci NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); 87bf215546Sopenharmony_ci return ret; 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 91bf215546Sopenharmony_ci PUSH_DATA (push, screen->compute->oclass); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2); 94bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->offset); 95bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->offset); 96bf215546Sopenharmony_ci /* No idea why there are 2. Divide size by 2 to be safe. 97bf215546Sopenharmony_ci * Actually this might be per-MP TEMP size and looks like I'm only using 98bf215546Sopenharmony_ci * 2 MPs instead of all 8. 99bf215546Sopenharmony_ci */ 100bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3); 101bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->size / screen->mp_count); 102bf215546Sopenharmony_ci PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); 103bf215546Sopenharmony_ci PUSH_DATA (push, 0xff); 104bf215546Sopenharmony_ci if (obj_class < GV100_COMPUTE_CLASS) { 105bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); 106bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->size / screen->mp_count); 107bf215546Sopenharmony_ci PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); 108bf215546Sopenharmony_ci PUSH_DATA (push, 0xff); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci /* Unified address space ? Who needs that ? Certainly not OpenCL. 112bf215546Sopenharmony_ci * 113bf215546Sopenharmony_ci * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be 114bf215546Sopenharmony_ci * accessible. We cannot prevent that at the moment, so expect failure. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_ci if (obj_class < GV100_COMPUTE_CLASS) { 117bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); 118bf215546Sopenharmony_ci PUSH_DATA (push, 0xff << 24); 119bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); 120bf215546Sopenharmony_ci PUSH_DATA (push, 0xfe << 24); 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); 123bf215546Sopenharmony_ci PUSH_DATAh(push, screen->text->offset); 124bf215546Sopenharmony_ci PUSH_DATA (push, screen->text->offset); 125bf215546Sopenharmony_ci } else { 126bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x2a0), 2); 127bf215546Sopenharmony_ci PUSH_DATAh(push, 0xfeULL << 24); 128bf215546Sopenharmony_ci PUSH_DATA (push, 0xfeULL << 24); 129bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x7b0), 2); 130bf215546Sopenharmony_ci PUSH_DATAh(push, 0xffULL << 24); 131bf215546Sopenharmony_ci PUSH_DATA (push, 0xffULL << 24); 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(0x0310), 1); 135bf215546Sopenharmony_ci PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci /* NOTE: these do not affect the state used by the 3D object */ 138bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3); 139bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset); 140bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset); 141bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); 142bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3); 143bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset + 65536); 144bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset + 65536); 145bf215546Sopenharmony_ci PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci if (obj_class >= NVF0_COMPUTE_CLASS) { 148bf215546Sopenharmony_ci /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1) 149bf215546Sopenharmony_ci * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently 150bf215546Sopenharmony_ci * disabled because our firmware doesn't support these commands and the 151bf215546Sopenharmony_ci * GPU hangs if they are used. */ 152bf215546Sopenharmony_ci BEGIN_NIC0(push, SUBC_CP(0x0248), 64); 153bf215546Sopenharmony_ci for (i = 63; i >= 0; i--) 154bf215546Sopenharmony_ci PUSH_DATA(push, 0x38000 | i); 155bf215546Sopenharmony_ci IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1); 159bf215546Sopenharmony_ci PUSH_DATA (push, 7); /* does not interfere with 3D */ 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci /* Disabling this UNK command avoid a read fault when using texelFetch() 162bf215546Sopenharmony_ci * from a compute shader for weird reasons. 163bf215546Sopenharmony_ci if (obj_class == NVF0_COMPUTE_CLASS) 164bf215546Sopenharmony_ci IMMED_NVC0(push, SUBC_CP(0x02c4), 1); 165bf215546Sopenharmony_ci */ 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* MS sample coordinate offsets: these do not work with _ALT modes ! */ 170bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 171bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO); 172bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO); 173bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 174bf215546Sopenharmony_ci PUSH_DATA (push, 64); 175bf215546Sopenharmony_ci PUSH_DATA (push, 1); 176bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); 177bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 178bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* 0 */ 179bf215546Sopenharmony_ci PUSH_DATA (push, 0); 180bf215546Sopenharmony_ci PUSH_DATA (push, 1); /* 1 */ 181bf215546Sopenharmony_ci PUSH_DATA (push, 0); 182bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* 2 */ 183bf215546Sopenharmony_ci PUSH_DATA (push, 1); 184bf215546Sopenharmony_ci PUSH_DATA (push, 1); /* 3 */ 185bf215546Sopenharmony_ci PUSH_DATA (push, 1); 186bf215546Sopenharmony_ci PUSH_DATA (push, 2); /* 4 */ 187bf215546Sopenharmony_ci PUSH_DATA (push, 0); 188bf215546Sopenharmony_ci PUSH_DATA (push, 3); /* 5 */ 189bf215546Sopenharmony_ci PUSH_DATA (push, 0); 190bf215546Sopenharmony_ci PUSH_DATA (push, 2); /* 6 */ 191bf215546Sopenharmony_ci PUSH_DATA (push, 1); 192bf215546Sopenharmony_ci PUSH_DATA (push, 3); /* 7 */ 193bf215546Sopenharmony_ci PUSH_DATA (push, 1); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER 196bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 197bf215546Sopenharmony_ci PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); 198bf215546Sopenharmony_ci PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); 199bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 200bf215546Sopenharmony_ci PUSH_DATA (push, 28); 201bf215546Sopenharmony_ci PUSH_DATA (push, 1); 202bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8); 203bf215546Sopenharmony_ci PUSH_DATA (push, 1); 204bf215546Sopenharmony_ci PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); 205bf215546Sopenharmony_ci PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); 206bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->offset); 207bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls->offset); 208bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */ 209bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */ 210bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* warp cfstack size */ 211bf215546Sopenharmony_ci#endif 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 214bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci return 0; 217bf215546Sopenharmony_ci} 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_cistatic void 220bf215546Sopenharmony_cigm107_compute_validate_surfaces(struct nvc0_context *nvc0, 221bf215546Sopenharmony_ci struct pipe_image_view *view, int slot) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(view->resource); 224bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 225bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 226bf215546Sopenharmony_ci struct nouveau_bo *txc = nvc0->screen->txc; 227bf215546Sopenharmony_ci struct nv50_tic_entry *tic; 228bf215546Sopenharmony_ci uint64_t address; 229bf215546Sopenharmony_ci const int s = 5; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci tic = nv50_tic_entry(nvc0->images_tic[s][slot]); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci res = nv04_resource(tic->pipe.texture); 234bf215546Sopenharmony_ci nvc0_update_tic(nvc0, tic, res); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci if (tic->id < 0) { 237bf215546Sopenharmony_ci tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci /* upload the texture view */ 240bf215546Sopenharmony_ci PUSH_SPACE(push, 16); 241bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 242bf215546Sopenharmony_ci PUSH_DATAh(push, txc->offset + (tic->id * 32)); 243bf215546Sopenharmony_ci PUSH_DATA (push, txc->offset + (tic->id * 32)); 244bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 245bf215546Sopenharmony_ci PUSH_DATA (push, 32); 246bf215546Sopenharmony_ci PUSH_DATA (push, 1); 247bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); 248bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 249bf215546Sopenharmony_ci PUSH_DATAp(push, &tic->tic[0], 8); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), 1); 252bf215546Sopenharmony_ci PUSH_DATA (push, (tic->id << 4) | 1); 253bf215546Sopenharmony_ci } else 254bf215546Sopenharmony_ci if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { 255bf215546Sopenharmony_ci BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), 1); 256bf215546Sopenharmony_ci PUSH_DATA (push, (tic->id << 4) | 1); 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; 261bf215546Sopenharmony_ci res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci /* upload the texture handle */ 268bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 269bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(slot + 32)); 270bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(slot + 32)); 271bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 272bf215546Sopenharmony_ci PUSH_DATA (push, 4); 273bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 274bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 2); 275bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 276bf215546Sopenharmony_ci PUSH_DATA (push, tic->id); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 279bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 280bf215546Sopenharmony_ci} 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_cistatic void 283bf215546Sopenharmony_cinve4_compute_validate_surfaces(struct nvc0_context *nvc0) 284bf215546Sopenharmony_ci{ 285bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 286bf215546Sopenharmony_ci uint64_t address; 287bf215546Sopenharmony_ci const int s = 5; 288bf215546Sopenharmony_ci int i, j; 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci if (!nvc0->images_dirty[s]) 291bf215546Sopenharmony_ci return; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci for (i = 0; i < NVC0_MAX_IMAGES; ++i) { 296bf215546Sopenharmony_ci struct pipe_image_view *view = &nvc0->images[s][i]; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 299bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_SU_INFO(i)); 300bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_SU_INFO(i)); 301bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 302bf215546Sopenharmony_ci PUSH_DATA (push, 16 * 4); 303bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 304bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 16); 305bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci if (view->resource) { 308bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(view->resource); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci if (res->base.target == PIPE_BUFFER) { 311bf215546Sopenharmony_ci if (view->access & PIPE_IMAGE_ACCESS_WRITE) 312bf215546Sopenharmony_ci nvc0_mark_image_range_valid(view); 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci nve4_set_surface_info(push, view, nvc0); 316bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) 319bf215546Sopenharmony_ci gm107_compute_validate_surfaces(nvc0, view, i); 320bf215546Sopenharmony_ci } else { 321bf215546Sopenharmony_ci for (j = 0; j < 16; j++) 322bf215546Sopenharmony_ci PUSH_DATA(push, 0); 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci } 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci/* Thankfully, textures with samplers follow the normal rules. */ 328bf215546Sopenharmony_cistatic void 329bf215546Sopenharmony_cinve4_compute_validate_samplers(struct nvc0_context *nvc0) 330bf215546Sopenharmony_ci{ 331bf215546Sopenharmony_ci bool need_flush = nve4_validate_tsc(nvc0, 5); 332bf215546Sopenharmony_ci if (need_flush) { 333bf215546Sopenharmony_ci BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); 334bf215546Sopenharmony_ci PUSH_DATA (nvc0->base.pushbuf, 0); 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* Invalidate all 3D samplers because they are aliased. */ 338bf215546Sopenharmony_ci for (int s = 0; s < 5; s++) 339bf215546Sopenharmony_ci nvc0->samplers_dirty[s] = ~0; 340bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci/* (Code duplicated at bottom for various non-convincing reasons. 344bf215546Sopenharmony_ci * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC 345bf215546Sopenharmony_ci * entries to avoid a subchannel switch. 346bf215546Sopenharmony_ci * Same for texture cache flushes. 347bf215546Sopenharmony_ci * Also, the bufctx differs, and more IFs in the 3D version looks ugly.) 348bf215546Sopenharmony_ci */ 349bf215546Sopenharmony_cistatic void nve4_compute_validate_textures(struct nvc0_context *); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_cistatic void 352bf215546Sopenharmony_cinve4_compute_set_tex_handles(struct nvc0_context *nvc0) 353bf215546Sopenharmony_ci{ 354bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 355bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 356bf215546Sopenharmony_ci uint64_t address; 357bf215546Sopenharmony_ci const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE); 358bf215546Sopenharmony_ci unsigned i, n; 359bf215546Sopenharmony_ci uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci if (!dirty) 362bf215546Sopenharmony_ci return; 363bf215546Sopenharmony_ci i = ffs(dirty) - 1; 364bf215546Sopenharmony_ci n = util_logbase2(dirty) + 1 - i; 365bf215546Sopenharmony_ci assert(n); 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 370bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i)); 371bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i)); 372bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 373bf215546Sopenharmony_ci PUSH_DATA (push, n * 4); 374bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 375bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n); 376bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 377bf215546Sopenharmony_ci PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 380bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci nvc0->textures_dirty[s] = 0; 383bf215546Sopenharmony_ci nvc0->samplers_dirty[s] = 0; 384bf215546Sopenharmony_ci} 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_cistatic void 387bf215546Sopenharmony_cinve4_compute_validate_constbufs(struct nvc0_context *nvc0) 388bf215546Sopenharmony_ci{ 389bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 390bf215546Sopenharmony_ci const int s = 5; 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci while (nvc0->constbuf_dirty[s]) { 393bf215546Sopenharmony_ci int i = ffs(nvc0->constbuf_dirty[s]) - 1; 394bf215546Sopenharmony_ci nvc0->constbuf_dirty[s] &= ~(1 << i); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci if (nvc0->constbuf[s][i].user) { 397bf215546Sopenharmony_ci struct nouveau_bo *bo = nvc0->screen->uniform_bo; 398bf215546Sopenharmony_ci const unsigned base = NVC0_CB_USR_INFO(s); 399bf215546Sopenharmony_ci const unsigned size = nvc0->constbuf[s][0].size; 400bf215546Sopenharmony_ci assert(i == 0); /* we really only want OpenGL uniforms here */ 401bf215546Sopenharmony_ci assert(nvc0->constbuf[s][0].u.data); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 404bf215546Sopenharmony_ci PUSH_DATAh(push, bo->offset + base); 405bf215546Sopenharmony_ci PUSH_DATA (push, bo->offset + base); 406bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 407bf215546Sopenharmony_ci PUSH_DATA (push, size); 408bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 409bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4)); 410bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 411bf215546Sopenharmony_ci PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4); 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci else { 414bf215546Sopenharmony_ci struct nv04_resource *res = 415bf215546Sopenharmony_ci nv04_resource(nvc0->constbuf[s][i].u.buf); 416bf215546Sopenharmony_ci if (res) { 417bf215546Sopenharmony_ci uint64_t address 418bf215546Sopenharmony_ci = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci /* constbufs above 0 will are fetched via ubo info in the shader */ 421bf215546Sopenharmony_ci if (i > 0) { 422bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 423bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); 424bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); 425bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 426bf215546Sopenharmony_ci PUSH_DATA (push, 4 * 4); 427bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 428bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4); 429bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); 432bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); 433bf215546Sopenharmony_ci PUSH_DATA (push, nvc0->constbuf[s][i].size); 434bf215546Sopenharmony_ci PUSH_DATA (push, 0); 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); 438bf215546Sopenharmony_ci res->cb_bindings[s] |= 1 << i; 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci } 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 444bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 445bf215546Sopenharmony_ci} 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_cistatic void 448bf215546Sopenharmony_cinve4_compute_validate_buffers(struct nvc0_context *nvc0) 449bf215546Sopenharmony_ci{ 450bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 451bf215546Sopenharmony_ci uint64_t address; 452bf215546Sopenharmony_ci const int s = 5; 453bf215546Sopenharmony_ci int i; 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 458bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(0)); 459bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(0)); 460bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 461bf215546Sopenharmony_ci PUSH_DATA (push, 4 * NVC0_MAX_BUFFERS * 4); 462bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 463bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4 * NVC0_MAX_BUFFERS); 464bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci for (i = 0; i < NVC0_MAX_BUFFERS; i++) { 467bf215546Sopenharmony_ci if (nvc0->buffers[s][i].buffer) { 468bf215546Sopenharmony_ci struct nv04_resource *res = 469bf215546Sopenharmony_ci nv04_resource(nvc0->buffers[s][i].buffer); 470bf215546Sopenharmony_ci PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); 471bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); 472bf215546Sopenharmony_ci PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); 473bf215546Sopenharmony_ci PUSH_DATA (push, 0); 474bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); 475bf215546Sopenharmony_ci util_range_add(&res->base, &res->valid_buffer_range, 476bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_offset, 477bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_offset + 478bf215546Sopenharmony_ci nvc0->buffers[s][i].buffer_size); 479bf215546Sopenharmony_ci } else { 480bf215546Sopenharmony_ci PUSH_DATA (push, 0); 481bf215546Sopenharmony_ci PUSH_DATA (push, 0); 482bf215546Sopenharmony_ci PUSH_DATA (push, 0); 483bf215546Sopenharmony_ci PUSH_DATA (push, 0); 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci} 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_cistatic struct nvc0_state_validate 489bf215546Sopenharmony_civalidate_list_cp[] = { 490bf215546Sopenharmony_ci { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM }, 491bf215546Sopenharmony_ci { nve4_compute_validate_textures, NVC0_NEW_CP_TEXTURES }, 492bf215546Sopenharmony_ci { nve4_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS }, 493bf215546Sopenharmony_ci { nve4_compute_set_tex_handles, NVC0_NEW_CP_TEXTURES | 494bf215546Sopenharmony_ci NVC0_NEW_CP_SAMPLERS }, 495bf215546Sopenharmony_ci { nve4_compute_validate_surfaces, NVC0_NEW_CP_SURFACES }, 496bf215546Sopenharmony_ci { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS }, 497bf215546Sopenharmony_ci { nve4_compute_validate_buffers, NVC0_NEW_CP_BUFFERS }, 498bf215546Sopenharmony_ci { nve4_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF }, 499bf215546Sopenharmony_ci}; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_cistatic bool 502bf215546Sopenharmony_cinve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) 503bf215546Sopenharmony_ci{ 504bf215546Sopenharmony_ci bool ret; 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci ret = nvc0_state_validate(nvc0, mask, validate_list_cp, 507bf215546Sopenharmony_ci ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp, 508bf215546Sopenharmony_ci nvc0->bufctx_cp); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci if (unlikely(nvc0->state.flushed)) 511bf215546Sopenharmony_ci nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true); 512bf215546Sopenharmony_ci return ret; 513bf215546Sopenharmony_ci} 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic void 516bf215546Sopenharmony_cinve4_compute_upload_input(struct nvc0_context *nvc0, 517bf215546Sopenharmony_ci const struct pipe_grid_info *info) 518bf215546Sopenharmony_ci{ 519bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 520bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 521bf215546Sopenharmony_ci struct nvc0_program *cp = nvc0->compprog; 522bf215546Sopenharmony_ci uint64_t address; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci if (cp->parm_size) { 527bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 528bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5)); 529bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5)); 530bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 531bf215546Sopenharmony_ci PUSH_DATA (push, cp->parm_size); 532bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 533bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + DIV_ROUND_UP(cp->parm_size, 4)); 534bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 535bf215546Sopenharmony_ci PUSH_DATAb(push, info->input, cp->parm_size); 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 538bf215546Sopenharmony_ci PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0)); 539bf215546Sopenharmony_ci PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0)); 540bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 541bf215546Sopenharmony_ci PUSH_DATA (push, 8 * 4); 542bf215546Sopenharmony_ci PUSH_DATA (push, 0x1); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (unlikely(info->indirect)) { 545bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(info->indirect); 546bf215546Sopenharmony_ci uint32_t offset = res->offset + info->indirect_offset; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 32, 0, 1); 549bf215546Sopenharmony_ci PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); 552bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 553bf215546Sopenharmony_ci PUSH_DATAp(push, info->block, 3); 554bf215546Sopenharmony_ci nouveau_pushbuf_data(push, res->bo, offset, 555bf215546Sopenharmony_ci NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 556bf215546Sopenharmony_ci } else { 557bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); 558bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 559bf215546Sopenharmony_ci PUSH_DATAp(push, info->block, 3); 560bf215546Sopenharmony_ci PUSH_DATAp(push, info->grid, 3); 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci PUSH_DATA (push, 0); 563bf215546Sopenharmony_ci PUSH_DATA (push, info->work_dim); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 566bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 567bf215546Sopenharmony_ci} 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_cistatic inline void 570bf215546Sopenharmony_cigp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, 571bf215546Sopenharmony_ci struct nouveau_bo *bo, uint32_t base, uint32_t size) 572bf215546Sopenharmony_ci{ 573bf215546Sopenharmony_ci uint64_t address = bo->offset + base; 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci assert(index < 8); 576bf215546Sopenharmony_ci assert(!(base & 0xff)); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); 579bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); 580bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index, 581bf215546Sopenharmony_ci DIV_ROUND_UP(size, 16)); 582bf215546Sopenharmony_ci NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); 583bf215546Sopenharmony_ci} 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_cistatic inline void 586bf215546Sopenharmony_cinve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo, 587bf215546Sopenharmony_ci uint32_t base, uint32_t size) 588bf215546Sopenharmony_ci{ 589bf215546Sopenharmony_ci uint64_t address = bo->offset + base; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci assert(index < 8); 592bf215546Sopenharmony_ci assert(!(base & 0xff)); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); 595bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); 596bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size); 597bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); 598bf215546Sopenharmony_ci} 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_cistatic void 601bf215546Sopenharmony_cinve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) 602bf215546Sopenharmony_ci{ 603bf215546Sopenharmony_ci // only user constant buffers 0-6 can be put in the descriptor, the rest are 604bf215546Sopenharmony_ci // loaded through global memory 605bf215546Sopenharmony_ci for (int i = 0; i <= 6; i++) { 606bf215546Sopenharmony_ci if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf) 607bf215546Sopenharmony_ci continue; 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci struct nv04_resource *res = 610bf215546Sopenharmony_ci nv04_resource(nvc0->constbuf[5][i].u.buf); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci uint32_t base = res->offset + nvc0->constbuf[5][i].offset; 613bf215546Sopenharmony_ci uint32_t size = nvc0->constbuf[5][i].size; 614bf215546Sopenharmony_ci if (gp100) 615bf215546Sopenharmony_ci gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size); 616bf215546Sopenharmony_ci else 617bf215546Sopenharmony_ci nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size); 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because 621bf215546Sopenharmony_ci // nve4_compute_upload_input() does it later 622bf215546Sopenharmony_ci} 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_cistatic void 625bf215546Sopenharmony_cinve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, 626bf215546Sopenharmony_ci const struct pipe_grid_info *info) 627bf215546Sopenharmony_ci{ 628bf215546Sopenharmony_ci const struct nvc0_screen *screen = nvc0->screen; 629bf215546Sopenharmony_ci const struct nvc0_program *cp = nvc0->compprog; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE); 632bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE); 633bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE); 634bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE); 635bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE); 636bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); 637bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); 638bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); 639bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); 644bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); 645bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); 646bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); 647bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); 648bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, 651bf215546Sopenharmony_ci align(cp->cp.smem_size, 0x100)); 652bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, 653bf215546Sopenharmony_ci (cp->hdr[1] & 0xfffff0) + 654bf215546Sopenharmony_ci align(cp->cp.lmem_size, 0x10)); 655bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); 656bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci if (cp->cp.smem_size > (32 << 10)) 659bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, 660bf215546Sopenharmony_ci DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); 661bf215546Sopenharmony_ci else 662bf215546Sopenharmony_ci if (cp->cp.smem_size > (16 << 10)) 663bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, 664bf215546Sopenharmony_ci DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); 665bf215546Sopenharmony_ci else 666bf215546Sopenharmony_ci NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, 667bf215546Sopenharmony_ci DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); 670bf215546Sopenharmony_ci NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci // Only bind user uniforms and the driver constant buffer through the 673bf215546Sopenharmony_ci // launch descriptor because UBOs are sticked to the driver cb to avoid the 674bf215546Sopenharmony_ci // limitation of 8 CBs. 675bf215546Sopenharmony_ci if (nvc0->constbuf[5][0].user || cp->parm_size) { 676bf215546Sopenharmony_ci nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, 677bf215546Sopenharmony_ci NVC0_CB_USR_INFO(5), 1 << 16); 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci // Later logic will attempt to bind a real buffer at position 0. That 680bf215546Sopenharmony_ci // should not happen if we've bound a user buffer. 681bf215546Sopenharmony_ci assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, 684bf215546Sopenharmony_ci NVC0_CB_AUX_INFO(5), 1 << 11); 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci nve4_compute_setup_buf_cb(nvc0, false, qmd); 687bf215546Sopenharmony_ci} 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_cistatic void 690bf215546Sopenharmony_cigp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, 691bf215546Sopenharmony_ci const struct pipe_grid_info *info) 692bf215546Sopenharmony_ci{ 693bf215546Sopenharmony_ci const struct nvc0_screen *screen = nvc0->screen; 694bf215546Sopenharmony_ci const struct nvc0_program *cp = nvc0->compprog; 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); 697bf215546Sopenharmony_ci NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); 698bf215546Sopenharmony_ci NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); 699bf215546Sopenharmony_ci NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); 704bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); 705bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); 706bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); 707bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); 708bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, 711bf215546Sopenharmony_ci align(cp->cp.smem_size, 0x100)); 712bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, 713bf215546Sopenharmony_ci (cp->hdr[1] & 0xfffff0) + 714bf215546Sopenharmony_ci align(cp->cp.lmem_size, 0x10)); 715bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); 716bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); 719bf215546Sopenharmony_ci NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci // Only bind user uniforms and the driver constant buffer through the 722bf215546Sopenharmony_ci // launch descriptor because UBOs are sticked to the driver cb to avoid the 723bf215546Sopenharmony_ci // limitation of 8 CBs. 724bf215546Sopenharmony_ci if (nvc0->constbuf[5][0].user || cp->parm_size) { 725bf215546Sopenharmony_ci gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, 726bf215546Sopenharmony_ci NVC0_CB_USR_INFO(5), 1 << 16); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci // Later logic will attempt to bind a real buffer at position 0. That 729bf215546Sopenharmony_ci // should not happen if we've bound a user buffer. 730bf215546Sopenharmony_ci assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, 733bf215546Sopenharmony_ci NVC0_CB_AUX_INFO(5), 1 << 11); 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci nve4_compute_setup_buf_cb(nvc0, true, qmd); 736bf215546Sopenharmony_ci} 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_cistatic int 739bf215546Sopenharmony_cigv100_sm_config_smem_size(u32 size) 740bf215546Sopenharmony_ci{ 741bf215546Sopenharmony_ci if (size > 64 * 1024) size = 96 * 1024; 742bf215546Sopenharmony_ci else if (size > 32 * 1024) size = 64 * 1024; 743bf215546Sopenharmony_ci else if (size > 16 * 1024) size = 32 * 1024; 744bf215546Sopenharmony_ci else if (size > 8 * 1024) size = 16 * 1024; 745bf215546Sopenharmony_ci else size = 8 * 1024; 746bf215546Sopenharmony_ci return (size / 4096) + 1; 747bf215546Sopenharmony_ci} 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_cistatic void 750bf215546Sopenharmony_cigv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, 751bf215546Sopenharmony_ci const struct pipe_grid_info *info) 752bf215546Sopenharmony_ci{ 753bf215546Sopenharmony_ci struct nvc0_program *cp = nvc0->compprog; 754bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 755bf215546Sopenharmony_ci uint64_t entry = screen->text->offset + cp->code_base; 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); 758bf215546Sopenharmony_ci NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); 759bf215546Sopenharmony_ci NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY); 760bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, 761bf215546Sopenharmony_ci align(cp->cp.smem_size, 0x100)); 762bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, 763bf215546Sopenharmony_ci (cp->hdr[1] & 0xfffff0) + 764bf215546Sopenharmony_ci align(cp->cp.lmem_size, 0x10)); 765bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); 766bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, 767bf215546Sopenharmony_ci gv100_sm_config_smem_size(8 * 1024)); 768bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, 769bf215546Sopenharmony_ci gv100_sm_config_smem_size(96 * 1024)); 770bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2); 771bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2); 772bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, 773bf215546Sopenharmony_ci gv100_sm_config_smem_size(cp->cp.smem_size)); 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); 776bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); 777bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); 778bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); 779bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); 780bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); 781bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs); 782bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci // Only bind user uniforms and the driver constant buffer through the 785bf215546Sopenharmony_ci // launch descriptor because UBOs are sticked to the driver cb to avoid the 786bf215546Sopenharmony_ci // limitation of 8 CBs. 787bf215546Sopenharmony_ci if (nvc0->constbuf[5][0].user || cp->parm_size) { 788bf215546Sopenharmony_ci gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, 789bf215546Sopenharmony_ci NVC0_CB_USR_INFO(5), 1 << 16); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci // Later logic will attempt to bind a real buffer at position 0. That 792bf215546Sopenharmony_ci // should not happen if we've bound a user buffer. 793bf215546Sopenharmony_ci assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); 794bf215546Sopenharmony_ci } 795bf215546Sopenharmony_ci gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, 796bf215546Sopenharmony_ci NVC0_CB_AUX_INFO(5), 1 << 11); 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci nve4_compute_setup_buf_cb(nvc0, true, qmd); 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff); 801bf215546Sopenharmony_ci NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32); 802bf215546Sopenharmony_ci} 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_cistatic inline void * 805bf215546Sopenharmony_cinve4_compute_alloc_launch_desc(struct nouveau_context *nv, 806bf215546Sopenharmony_ci struct nouveau_bo **pbo, uint64_t *pgpuaddr) 807bf215546Sopenharmony_ci{ 808bf215546Sopenharmony_ci uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo); 809bf215546Sopenharmony_ci if (!ptr) 810bf215546Sopenharmony_ci return NULL; 811bf215546Sopenharmony_ci if (*pgpuaddr & 255) { 812bf215546Sopenharmony_ci unsigned adj = 256 - (*pgpuaddr & 255); 813bf215546Sopenharmony_ci ptr += adj; 814bf215546Sopenharmony_ci *pgpuaddr += adj; 815bf215546Sopenharmony_ci } 816bf215546Sopenharmony_ci memset(ptr, 0x00, 256); 817bf215546Sopenharmony_ci return ptr; 818bf215546Sopenharmony_ci} 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_cistatic void 821bf215546Sopenharmony_cinve4_upload_indirect_desc(struct nouveau_pushbuf *push, 822bf215546Sopenharmony_ci struct nv04_resource *res, uint64_t gpuaddr, 823bf215546Sopenharmony_ci uint32_t length, uint32_t bo_offset) 824bf215546Sopenharmony_ci{ 825bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 826bf215546Sopenharmony_ci PUSH_DATAh(push, gpuaddr); 827bf215546Sopenharmony_ci PUSH_DATA (push, gpuaddr); 828bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 829bf215546Sopenharmony_ci PUSH_DATA (push, length); 830bf215546Sopenharmony_ci PUSH_DATA (push, 1); 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 32, 0, 1); 833bf215546Sopenharmony_ci PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (length / 4)); 836bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); 837bf215546Sopenharmony_ci nouveau_pushbuf_data(push, res->bo, bo_offset, 838bf215546Sopenharmony_ci NVC0_IB_ENTRY_1_NO_PREFETCH | length); 839bf215546Sopenharmony_ci} 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_civoid 842bf215546Sopenharmony_cinve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 843bf215546Sopenharmony_ci{ 844bf215546Sopenharmony_ci struct nvc0_context *nvc0 = nvc0_context(pipe); 845bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 846bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 847bf215546Sopenharmony_ci void *desc; 848bf215546Sopenharmony_ci uint64_t desc_gpuaddr; 849bf215546Sopenharmony_ci struct nouveau_bo *desc_bo; 850bf215546Sopenharmony_ci int ret; 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr); 853bf215546Sopenharmony_ci if (!desc) { 854bf215546Sopenharmony_ci ret = -1; 855bf215546Sopenharmony_ci goto out; 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD, 858bf215546Sopenharmony_ci desc_bo); 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) { 861bf215546Sopenharmony_ci nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, 862bf215546Sopenharmony_ci resident->flags); 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { 866bf215546Sopenharmony_ci nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, 867bf215546Sopenharmony_ci resident->flags); 868bf215546Sopenharmony_ci } 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci ret = !nve4_state_validate_cp(nvc0, ~0); 871bf215546Sopenharmony_ci if (ret) 872bf215546Sopenharmony_ci goto out; 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) 875bf215546Sopenharmony_ci gv100_compute_setup_launch_desc(nvc0, desc, info); 876bf215546Sopenharmony_ci else 877bf215546Sopenharmony_ci if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) 878bf215546Sopenharmony_ci gp100_compute_setup_launch_desc(nvc0, desc, info); 879bf215546Sopenharmony_ci else 880bf215546Sopenharmony_ci nve4_compute_setup_launch_desc(nvc0, desc, info); 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci nve4_compute_upload_input(nvc0, info); 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci#ifndef NDEBUG 885bf215546Sopenharmony_ci if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { 886bf215546Sopenharmony_ci debug_printf("Queue Meta Data:\n"); 887bf215546Sopenharmony_ci if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) 888bf215546Sopenharmony_ci NVC3C0QmdDump_V02_02(desc); 889bf215546Sopenharmony_ci else 890bf215546Sopenharmony_ci if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) 891bf215546Sopenharmony_ci NVC0C0QmdDump_V02_01(desc); 892bf215546Sopenharmony_ci else 893bf215546Sopenharmony_ci NVA0C0QmdDump_V00_06(desc); 894bf215546Sopenharmony_ci } 895bf215546Sopenharmony_ci#endif 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci if (unlikely(info->indirect)) { 898bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(info->indirect); 899bf215546Sopenharmony_ci uint32_t offset = res->offset + info->indirect_offset; 900bf215546Sopenharmony_ci 901bf215546Sopenharmony_ci /* upload the descriptor */ 902bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 903bf215546Sopenharmony_ci PUSH_DATAh(push, desc_gpuaddr); 904bf215546Sopenharmony_ci PUSH_DATA (push, desc_gpuaddr); 905bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 906bf215546Sopenharmony_ci PUSH_DATA (push, 256); 907bf215546Sopenharmony_ci PUSH_DATA (push, 1); 908bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4)); 909bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); 910bf215546Sopenharmony_ci PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) { 913bf215546Sopenharmony_ci nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 12, offset); 914bf215546Sopenharmony_ci } else { 915bf215546Sopenharmony_ci /* overwrite griddim_x and griddim_y as two 32-bits integers even 916bf215546Sopenharmony_ci * if griddim_y must be a 16-bits integer */ 917bf215546Sopenharmony_ci nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 8, offset); 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci /* overwrite the 16 high bits of griddim_y with griddim_z because 920bf215546Sopenharmony_ci * we need (z << 16) | x */ 921bf215546Sopenharmony_ci nve4_upload_indirect_desc(push, res, desc_gpuaddr + 54, 4, offset + 8); 922bf215546Sopenharmony_ci } 923bf215546Sopenharmony_ci } 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci /* upload descriptor and flush */ 926bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 32, 1, 0); 927bf215546Sopenharmony_ci PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); 928bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); 929bf215546Sopenharmony_ci PUSH_DATA (push, desc_gpuaddr >> 8); 930bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); 931bf215546Sopenharmony_ci PUSH_DATA (push, 0x3); 932bf215546Sopenharmony_ci BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); 933bf215546Sopenharmony_ci PUSH_DATA (push, 0); 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci nvc0_update_compute_invocations_counter(nvc0, info); 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ciout: 938bf215546Sopenharmony_ci if (ret) 939bf215546Sopenharmony_ci NOUVEAU_ERR("Failed to launch grid !\n"); 940bf215546Sopenharmony_ci nouveau_scratch_done(&nvc0->base); 941bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC); 942bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS); 943bf215546Sopenharmony_ci} 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci#define NVE4_TIC_ENTRY_INVALID 0x000fffff 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_cistatic void 949bf215546Sopenharmony_cinve4_compute_validate_textures(struct nvc0_context *nvc0) 950bf215546Sopenharmony_ci{ 951bf215546Sopenharmony_ci struct nouveau_bo *txc = nvc0->screen->txc; 952bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nvc0->base.pushbuf; 953bf215546Sopenharmony_ci const unsigned s = 5; 954bf215546Sopenharmony_ci unsigned i; 955bf215546Sopenharmony_ci uint32_t commands[2][32]; 956bf215546Sopenharmony_ci unsigned n[2] = { 0, 0 }; 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci for (i = 0; i < nvc0->num_textures[s]; ++i) { 959bf215546Sopenharmony_ci struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); 960bf215546Sopenharmony_ci struct nv04_resource *res; 961bf215546Sopenharmony_ci const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i)); 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci if (!tic) { 964bf215546Sopenharmony_ci nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; 965bf215546Sopenharmony_ci continue; 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci res = nv04_resource(tic->pipe.texture); 968bf215546Sopenharmony_ci nvc0_update_tic(nvc0, tic, res); 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci if (tic->id < 0) { 971bf215546Sopenharmony_ci tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci PUSH_SPACE(push, 16); 974bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 975bf215546Sopenharmony_ci PUSH_DATAh(push, txc->offset + (tic->id * 32)); 976bf215546Sopenharmony_ci PUSH_DATA (push, txc->offset + (tic->id * 32)); 977bf215546Sopenharmony_ci BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 978bf215546Sopenharmony_ci PUSH_DATA (push, 32); 979bf215546Sopenharmony_ci PUSH_DATA (push, 1); 980bf215546Sopenharmony_ci BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); 981bf215546Sopenharmony_ci PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 982bf215546Sopenharmony_ci PUSH_DATAp(push, &tic->tic[0], 8); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci commands[0][n[0]++] = (tic->id << 4) | 1; 985bf215546Sopenharmony_ci } else 986bf215546Sopenharmony_ci if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { 987bf215546Sopenharmony_ci commands[1][n[1]++] = (tic->id << 4) | 1; 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; 992bf215546Sopenharmony_ci res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; 995bf215546Sopenharmony_ci nvc0->tex_handles[s][i] |= tic->id; 996bf215546Sopenharmony_ci if (dirty) 997bf215546Sopenharmony_ci BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); 998bf215546Sopenharmony_ci } 999bf215546Sopenharmony_ci for (; i < nvc0->state.num_textures[s]; ++i) { 1000bf215546Sopenharmony_ci nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; 1001bf215546Sopenharmony_ci nvc0->textures_dirty[s] |= 1 << i; 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (n[0]) { 1005bf215546Sopenharmony_ci BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); 1006bf215546Sopenharmony_ci PUSH_DATAp(push, commands[0], n[0]); 1007bf215546Sopenharmony_ci } 1008bf215546Sopenharmony_ci if (n[1]) { 1009bf215546Sopenharmony_ci BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); 1010bf215546Sopenharmony_ci PUSH_DATAp(push, commands[1], n[1]); 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci nvc0->state.num_textures[s] = nvc0->num_textures[s]; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci /* Invalidate all 3D textures because they are aliased. */ 1016bf215546Sopenharmony_ci for (int s = 0; s < 5; s++) { 1017bf215546Sopenharmony_ci for (int i = 0; i < nvc0->num_textures[s]; i++) 1018bf215546Sopenharmony_ci nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); 1019bf215546Sopenharmony_ci nvc0->textures_dirty[s] = ~0; 1020bf215546Sopenharmony_ci } 1021bf215546Sopenharmony_ci nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; 1022bf215546Sopenharmony_ci} 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER 1025bf215546Sopenharmony_cistatic void 1026bf215546Sopenharmony_cinve4_compute_trap_info(struct nvc0_context *nvc0) 1027bf215546Sopenharmony_ci{ 1028bf215546Sopenharmony_ci struct nvc0_screen *screen = nvc0->screen; 1029bf215546Sopenharmony_ci struct nouveau_bo *bo = screen->parm; 1030bf215546Sopenharmony_ci int ret, i; 1031bf215546Sopenharmony_ci volatile struct nve4_mp_trap_info *info; 1032bf215546Sopenharmony_ci uint8_t *map; 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client); 1035bf215546Sopenharmony_ci if (ret) 1036bf215546Sopenharmony_ci return; 1037bf215546Sopenharmony_ci map = (uint8_t *)bo->map; 1038bf215546Sopenharmony_ci info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO); 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_ci if (info->lock) { 1041bf215546Sopenharmony_ci debug_printf("trapstat = %08x\n", info->trapstat); 1042bf215546Sopenharmony_ci debug_printf("warperr = %08x\n", info->warperr); 1043bf215546Sopenharmony_ci debug_printf("PC = %x\n", info->pc); 1044bf215546Sopenharmony_ci debug_printf("tid = %u %u %u\n", 1045bf215546Sopenharmony_ci info->tid[0], info->tid[1], info->tid[2]); 1046bf215546Sopenharmony_ci debug_printf("ctaid = %u %u %u\n", 1047bf215546Sopenharmony_ci info->ctaid[0], info->ctaid[1], info->ctaid[2]); 1048bf215546Sopenharmony_ci for (i = 0; i <= 63; ++i) 1049bf215546Sopenharmony_ci debug_printf("$r%i = %08x\n", i, info->r[i]); 1050bf215546Sopenharmony_ci for (i = 0; i <= 6; ++i) 1051bf215546Sopenharmony_ci debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1); 1052bf215546Sopenharmony_ci debug_printf("$c = %x\n", info->flags >> 12); 1053bf215546Sopenharmony_ci } 1054bf215546Sopenharmony_ci info->lock = 0; 1055bf215546Sopenharmony_ci} 1056bf215546Sopenharmony_ci#endif 1057