1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2012 Francisco Jerez 3bf215546Sopenharmony_ci * Copyright 2015 Samuel Pitoiset 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining 6bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the 7bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 8bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 9bf215546Sopenharmony_ci * distribute, sublicense, and/or sell copies of the Software, and to 10bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 11bf215546Sopenharmony_ci * the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 14bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial 15bf215546Sopenharmony_ci * portions of the Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20bf215546Sopenharmony_ci * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21bf215546Sopenharmony_ci * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22bf215546Sopenharmony_ci * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23bf215546Sopenharmony_ci * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24bf215546Sopenharmony_ci * 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/format/u_format.h" 28bf215546Sopenharmony_ci#include "nv50/nv50_context.h" 29bf215546Sopenharmony_ci#include "nv50/nv50_compute.xml.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "nv50_ir_driver.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ciint 34bf215546Sopenharmony_cinv50_screen_compute_setup(struct nv50_screen *screen, 35bf215546Sopenharmony_ci struct nouveau_pushbuf *push) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci struct nouveau_device *dev = screen->base.device; 38bf215546Sopenharmony_ci struct nouveau_object *chan = screen->base.channel; 39bf215546Sopenharmony_ci struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; 40bf215546Sopenharmony_ci unsigned obj_class; 41bf215546Sopenharmony_ci int i, ret; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci switch (dev->chipset & 0xf0) { 44bf215546Sopenharmony_ci case 0x50: 45bf215546Sopenharmony_ci case 0x80: 46bf215546Sopenharmony_ci case 0x90: 47bf215546Sopenharmony_ci obj_class = NV50_COMPUTE_CLASS; 48bf215546Sopenharmony_ci break; 49bf215546Sopenharmony_ci case 0xa0: 50bf215546Sopenharmony_ci switch (dev->chipset) { 51bf215546Sopenharmony_ci case 0xa3: 52bf215546Sopenharmony_ci case 0xa5: 53bf215546Sopenharmony_ci case 0xa8: 54bf215546Sopenharmony_ci obj_class = NVA3_COMPUTE_CLASS; 55bf215546Sopenharmony_ci break; 56bf215546Sopenharmony_ci default: 57bf215546Sopenharmony_ci obj_class = NV50_COMPUTE_CLASS; 58bf215546Sopenharmony_ci break; 59bf215546Sopenharmony_ci } 60bf215546Sopenharmony_ci break; 61bf215546Sopenharmony_ci default: 62bf215546Sopenharmony_ci NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 63bf215546Sopenharmony_ci return -1; 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, 67bf215546Sopenharmony_ci &screen->compute); 68bf215546Sopenharmony_ci if (ret) 69bf215546Sopenharmony_ci return ret; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 72bf215546Sopenharmony_ci PUSH_DATA (push, screen->compute->handle); 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(UNK02A0), 1); 75bf215546Sopenharmony_ci PUSH_DATA (push, 1); 76bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_STACK), 1); 77bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 78bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2); 79bf215546Sopenharmony_ci PUSH_DATAh(push, screen->stack_bo->offset); 80bf215546Sopenharmony_ci PUSH_DATA (push, screen->stack_bo->offset); 81bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1); 82bf215546Sopenharmony_ci PUSH_DATA (push, 4); 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(UNK0290), 1); 85bf215546Sopenharmony_ci PUSH_DATA (push, 1); 86bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1); 87bf215546Sopenharmony_ci PUSH_DATA (push, 1); 88bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(REG_MODE), 1); 89bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); 90bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(UNK0384), 1); 91bf215546Sopenharmony_ci PUSH_DATA (push, 0x100); 92bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1); 93bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci for (i = 0; i < 15; i++) { 96bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2); 97bf215546Sopenharmony_ci PUSH_DATA (push, 0); 98bf215546Sopenharmony_ci PUSH_DATA (push, 0); 99bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1); 100bf215546Sopenharmony_ci PUSH_DATA (push, 0); 101bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1); 102bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 103bf215546Sopenharmony_ci } 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2); 106bf215546Sopenharmony_ci PUSH_DATA (push, 0); 107bf215546Sopenharmony_ci PUSH_DATA (push, 0); 108bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1); 109bf215546Sopenharmony_ci PUSH_DATA (push, ~0); 110bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1); 111bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1); 114bf215546Sopenharmony_ci PUSH_DATA (push, 7); 115bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1); 116bf215546Sopenharmony_ci PUSH_DATA (push, 1); 117bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1); 118bf215546Sopenharmony_ci PUSH_DATA (push, 7); 119bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1); 120bf215546Sopenharmony_ci PUSH_DATA (push, 1); 121bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 122bf215546Sopenharmony_ci PUSH_DATA (push, 0); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1); 125bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 126bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1); 127bf215546Sopenharmony_ci PUSH_DATA (push, 0x54); 128bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1); 129bf215546Sopenharmony_ci PUSH_DATA (push, 0); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_TIC), 1); 132bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 133bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3); 134bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset); 135bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset); 136bf215546Sopenharmony_ci PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_TSC), 1); 139bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 140bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3); 141bf215546Sopenharmony_ci PUSH_DATAh(push, screen->txc->offset + 65536); 142bf215546Sopenharmony_ci PUSH_DATA (push, screen->txc->offset + 65536); 143bf215546Sopenharmony_ci PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1); 146bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1); 149bf215546Sopenharmony_ci PUSH_DATA (push, fifo->vram); 150bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2); 151bf215546Sopenharmony_ci PUSH_DATAh(push, screen->tls_bo->offset + 65536); 152bf215546Sopenharmony_ci PUSH_DATA (push, screen->tls_bo->offset + 65536); 153bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); 154bf215546Sopenharmony_ci PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); 157bf215546Sopenharmony_ci PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); 158bf215546Sopenharmony_ci PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); 159bf215546Sopenharmony_ci PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2); 162bf215546Sopenharmony_ci PUSH_DATAh(push, screen->fence.bo->offset + 16); 163bf215546Sopenharmony_ci PUSH_DATA (push, screen->fence.bo->offset + 16); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci return 0; 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cistatic void 169bf215546Sopenharmony_cinv50_compute_validate_samplers(struct nv50_context *nv50) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE); 172bf215546Sopenharmony_ci if (need_flush) { 173bf215546Sopenharmony_ci BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1); 174bf215546Sopenharmony_ci PUSH_DATA (nv50->base.pushbuf, 0); 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* Invalidate all 3D samplers because they are aliased. */ 178bf215546Sopenharmony_ci nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_cistatic void 182bf215546Sopenharmony_cinv50_compute_validate_textures(struct nv50_context *nv50) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE); 185bf215546Sopenharmony_ci if (need_flush) { 186bf215546Sopenharmony_ci BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1); 187bf215546Sopenharmony_ci PUSH_DATA (nv50->base.pushbuf, 0); 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* Invalidate all 3D textures because they are aliased. */ 191bf215546Sopenharmony_ci nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); 192bf215546Sopenharmony_ci nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_cistatic inline void 196bf215546Sopenharmony_cinv50_compute_invalidate_constbufs(struct nv50_context *nv50) 197bf215546Sopenharmony_ci{ 198bf215546Sopenharmony_ci int s; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ 201bf215546Sopenharmony_ci for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) { 202bf215546Sopenharmony_ci nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s]; 203bf215546Sopenharmony_ci nv50->state.uniform_buffer_bound[s] = false; 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic void 209bf215546Sopenharmony_cinv50_compute_validate_constbufs(struct nv50_context *nv50) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nv50->base.pushbuf; 212bf215546Sopenharmony_ci const int s = NV50_SHADER_STAGE_COMPUTE; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci while (nv50->constbuf_dirty[s]) { 215bf215546Sopenharmony_ci int i = ffs(nv50->constbuf_dirty[s]) - 1; 216bf215546Sopenharmony_ci nv50->constbuf_dirty[s] &= ~(1 << i); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if (nv50->constbuf[s][i].user) { 219bf215546Sopenharmony_ci const unsigned b = NV50_CB_PVP + s; 220bf215546Sopenharmony_ci unsigned start = 0; 221bf215546Sopenharmony_ci unsigned words = nv50->constbuf[s][0].size / 4; 222bf215546Sopenharmony_ci if (i) { 223bf215546Sopenharmony_ci NOUVEAU_ERR("user constbufs only supported in slot 0\n"); 224bf215546Sopenharmony_ci continue; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci if (!nv50->state.uniform_buffer_bound[s]) { 227bf215546Sopenharmony_ci nv50->state.uniform_buffer_bound[s] = true; 228bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 229bf215546Sopenharmony_ci PUSH_DATA (push, (b << 12) | (i << 8) | 1); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci while (words) { 232bf215546Sopenharmony_ci unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci PUSH_SPACE(push, nr + 3); 235bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 236bf215546Sopenharmony_ci PUSH_DATA (push, (start << 8) | b); 237bf215546Sopenharmony_ci BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr); 238bf215546Sopenharmony_ci PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci start += nr; 241bf215546Sopenharmony_ci words -= nr; 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci } else { 244bf215546Sopenharmony_ci struct nv04_resource *res = 245bf215546Sopenharmony_ci nv04_resource(nv50->constbuf[s][i].u.buf); 246bf215546Sopenharmony_ci if (res) { 247bf215546Sopenharmony_ci /* TODO: allocate persistent bindings */ 248bf215546Sopenharmony_ci const unsigned b = s * 16 + i; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci assert(nouveau_resource_mapped_by_gpu(&res->base)); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3); 253bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); 254bf215546Sopenharmony_ci PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); 255bf215546Sopenharmony_ci PUSH_DATA (push, (b << 16) | 256bf215546Sopenharmony_ci (nv50->constbuf[s][i].size & 0xffff)); 257bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 258bf215546Sopenharmony_ci PUSH_DATA (push, (b << 12) | (i << 8) | 1); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci nv50->cb_dirty = 1; /* Force cache flush for UBO. */ 263bf215546Sopenharmony_ci res->cb_bindings[s] |= 1 << i; 264bf215546Sopenharmony_ci } else { 265bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1); 266bf215546Sopenharmony_ci PUSH_DATA (push, (i << 8) | 0); 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci if (i == 0) 269bf215546Sopenharmony_ci nv50->state.uniform_buffer_bound[s] = false; 270bf215546Sopenharmony_ci } 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci // TODO: Check if having orthogonal slots means the two don't trample over 274bf215546Sopenharmony_ci // each other. 275bf215546Sopenharmony_ci nv50_compute_invalidate_constbufs(nv50); 276bf215546Sopenharmony_ci} 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_cistatic void 279bf215546Sopenharmony_cinv50_get_surface_dims(const struct pipe_image_view *view, 280bf215546Sopenharmony_ci int *width, int *height, int *depth) 281bf215546Sopenharmony_ci{ 282bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(view->resource); 283bf215546Sopenharmony_ci int level; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci *width = *height = *depth = 1; 286bf215546Sopenharmony_ci if (res->base.target == PIPE_BUFFER) { 287bf215546Sopenharmony_ci *width = view->u.buf.size / util_format_get_blocksize(view->format); 288bf215546Sopenharmony_ci return; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci level = view->u.tex.level; 292bf215546Sopenharmony_ci *width = u_minify(view->resource->width0, level); 293bf215546Sopenharmony_ci *height = u_minify(view->resource->height0, level); 294bf215546Sopenharmony_ci *depth = u_minify(view->resource->depth0, level); 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci switch (res->base.target) { 297bf215546Sopenharmony_ci case PIPE_TEXTURE_1D_ARRAY: 298bf215546Sopenharmony_ci case PIPE_TEXTURE_2D_ARRAY: 299bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE: 300bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE_ARRAY: 301bf215546Sopenharmony_ci *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1; 302bf215546Sopenharmony_ci break; 303bf215546Sopenharmony_ci case PIPE_TEXTURE_1D: 304bf215546Sopenharmony_ci case PIPE_TEXTURE_2D: 305bf215546Sopenharmony_ci case PIPE_TEXTURE_RECT: 306bf215546Sopenharmony_ci case PIPE_TEXTURE_3D: 307bf215546Sopenharmony_ci break; 308bf215546Sopenharmony_ci default: 309bf215546Sopenharmony_ci assert(!"unexpected texture target"); 310bf215546Sopenharmony_ci break; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci} 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_cistatic void 315bf215546Sopenharmony_cinv50_mark_image_range_valid(const struct pipe_image_view *view) 316bf215546Sopenharmony_ci{ 317bf215546Sopenharmony_ci struct nv04_resource *res = (struct nv04_resource *)view->resource; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci assert(view->resource->target == PIPE_BUFFER); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci util_range_add(&res->base, &res->valid_buffer_range, 322bf215546Sopenharmony_ci view->u.buf.offset, 323bf215546Sopenharmony_ci view->u.buf.offset + view->u.buf.size); 324bf215546Sopenharmony_ci} 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_cistatic inline void 327bf215546Sopenharmony_cinv50_set_surface_info(struct nouveau_pushbuf *push, 328bf215546Sopenharmony_ci const struct pipe_image_view *view, 329bf215546Sopenharmony_ci int width, int height, int depth) 330bf215546Sopenharmony_ci{ 331bf215546Sopenharmony_ci struct nv04_resource *res; 332bf215546Sopenharmony_ci uint32_t *const info = push->cur; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci push->cur += 12; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci /* Make sure to always initialize the surface information area because it's 337bf215546Sopenharmony_ci * used to check if the given image is bound or not. */ 338bf215546Sopenharmony_ci memset(info, 0, 12 * sizeof(*info)); 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci if (!view || !view->resource) 341bf215546Sopenharmony_ci return; 342bf215546Sopenharmony_ci res = nv04_resource(view->resource); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci /* Stick the image dimensions for the imageSize() builtin. */ 345bf215546Sopenharmony_ci info[0] = width; 346bf215546Sopenharmony_ci info[1] = height; 347bf215546Sopenharmony_ci info[2] = depth; 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel 350bf215546Sopenharmony_ci * offset and to check if the format doesn't mismatch. */ 351bf215546Sopenharmony_ci info[3] = util_format_get_blocksize(view->format); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci if (res->base.target != PIPE_BUFFER) { 354bf215546Sopenharmony_ci struct nv50_miptree *mt = nv50_miptree(&res->base); 355bf215546Sopenharmony_ci struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; 356bf215546Sopenharmony_ci unsigned nby = align(util_format_get_nblocksy(view->format, height), 357bf215546Sopenharmony_ci NV50_TILE_SIZE_Y(lvl->tile_mode)); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci if (mt->layout_3d) { 360bf215546Sopenharmony_ci info[4] = nby; 361bf215546Sopenharmony_ci info[11] = view->u.tex.first_layer; 362bf215546Sopenharmony_ci } else { 363bf215546Sopenharmony_ci info[4] = mt->layer_stride / lvl->pitch; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci info[6] = mt->ms_x; 366bf215546Sopenharmony_ci info[7] = mt->ms_y; 367bf215546Sopenharmony_ci info[8] = NV50_TILE_SHIFT_X(lvl->tile_mode); 368bf215546Sopenharmony_ci info[9] = NV50_TILE_SHIFT_Y(lvl->tile_mode); 369bf215546Sopenharmony_ci info[10] = NV50_TILE_SHIFT_Z(lvl->tile_mode); 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci} 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_cistatic void 374bf215546Sopenharmony_cinv50_compute_validate_surfaces(struct nv50_context *nv50) 375bf215546Sopenharmony_ci{ 376bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nv50->base.pushbuf; 377bf215546Sopenharmony_ci int i; 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) { 380bf215546Sopenharmony_ci struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i]; 381bf215546Sopenharmony_ci int width, height, depth; 382bf215546Sopenharmony_ci uint64_t address = 0; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) { 387bf215546Sopenharmony_ci struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot]; 388bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(buffer->buffer); 389bf215546Sopenharmony_ci PUSH_DATAh(push, res->address + buffer->buffer_offset); 390bf215546Sopenharmony_ci PUSH_DATA (push, res->address + buffer->buffer_offset); 391bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* pitch? */ 392bf215546Sopenharmony_ci PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1); 393bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 394bf215546Sopenharmony_ci BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR); 395bf215546Sopenharmony_ci util_range_add(&res->base, &res->valid_buffer_range, 396bf215546Sopenharmony_ci buffer->buffer_offset, 397bf215546Sopenharmony_ci buffer->buffer_offset + 398bf215546Sopenharmony_ci buffer->buffer_size); 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci PUSH_SPACE(push, 1 + 3); 401bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 402bf215546Sopenharmony_ci PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX); 403bf215546Sopenharmony_ci BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1); 404bf215546Sopenharmony_ci PUSH_DATA (push, buffer->buffer_size); 405bf215546Sopenharmony_ci } else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) { 406bf215546Sopenharmony_ci struct pipe_image_view *view = &nv50->images[gmem->slot]; 407bf215546Sopenharmony_ci struct nv04_resource *res = nv04_resource(view->resource); 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci /* get surface dimensions based on the target. */ 410bf215546Sopenharmony_ci nv50_get_surface_dims(view, &width, &height, &depth); 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci address = res->address; 413bf215546Sopenharmony_ci if (res->base.target == PIPE_BUFFER) { 414bf215546Sopenharmony_ci address += view->u.buf.offset; 415bf215546Sopenharmony_ci assert(!(address & 0xff)); 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci if (view->access & PIPE_IMAGE_ACCESS_WRITE) 418bf215546Sopenharmony_ci nv50_mark_image_range_valid(view); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci PUSH_DATAh(push, address); 421bf215546Sopenharmony_ci PUSH_DATA (push, address); 422bf215546Sopenharmony_ci PUSH_DATA (push, 0); /* pitch? */ 423bf215546Sopenharmony_ci PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1); 424bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 425bf215546Sopenharmony_ci } else { 426bf215546Sopenharmony_ci struct nv50_miptree *mt = nv50_miptree(view->resource); 427bf215546Sopenharmony_ci struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; 428bf215546Sopenharmony_ci const unsigned z = view->u.tex.first_layer; 429bf215546Sopenharmony_ci unsigned max_size; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci if (mt->layout_3d) { 432bf215546Sopenharmony_ci address += nv50_mt_zslice_offset(mt, view->u.tex.level, 0); 433bf215546Sopenharmony_ci max_size = mt->total_size; 434bf215546Sopenharmony_ci } else { 435bf215546Sopenharmony_ci address += mt->layer_stride * z; 436bf215546Sopenharmony_ci max_size = mt->layer_stride * (view->u.tex.last_layer - view->u.tex.first_layer + 1); 437bf215546Sopenharmony_ci } 438bf215546Sopenharmony_ci address += lvl->offset; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci PUSH_DATAh(push, address); 441bf215546Sopenharmony_ci PUSH_DATA (push, address); 442bf215546Sopenharmony_ci if (mt->layout_3d) { 443bf215546Sopenharmony_ci // We have to adjust the size of the 3d surface to be 444bf215546Sopenharmony_ci // accessible within 2d limits. The size of each z tile goes 445bf215546Sopenharmony_ci // into the x direction, while the number of z tiles goes into 446bf215546Sopenharmony_ci // the y direction. 447bf215546Sopenharmony_ci const unsigned nby = util_format_get_nblocksy(view->format, height); 448bf215546Sopenharmony_ci const unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); 449bf215546Sopenharmony_ci const unsigned tsz = NV50_TILE_SIZE_Z(lvl->tile_mode); 450bf215546Sopenharmony_ci const unsigned pitch = lvl->pitch * tsz; 451bf215546Sopenharmony_ci const unsigned maxy = align(nby, tsy) * align(depth, tsz) >> NV50_TILE_SHIFT_Z(lvl->tile_mode); 452bf215546Sopenharmony_ci PUSH_DATA (push, pitch * tsy); 453bf215546Sopenharmony_ci PUSH_DATA (push, (maxy - 1) << 16 | (pitch - 1)); 454bf215546Sopenharmony_ci PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); 455bf215546Sopenharmony_ci } else if (nouveau_bo_memtype(res->bo)) { 456bf215546Sopenharmony_ci PUSH_DATA (push, lvl->pitch * NV50_TILE_SIZE_Y(lvl->tile_mode)); 457bf215546Sopenharmony_ci PUSH_DATA (push, (max_size / lvl->pitch - 1) << 16 | (lvl->pitch - 1)); 458bf215546Sopenharmony_ci PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4); 459bf215546Sopenharmony_ci } else { 460bf215546Sopenharmony_ci PUSH_DATA (push, lvl->pitch); 461bf215546Sopenharmony_ci PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1); 462bf215546Sopenharmony_ci PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci } 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR); 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci PUSH_SPACE(push, 12 + 3); 469bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); 470bf215546Sopenharmony_ci PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX); 471bf215546Sopenharmony_ci BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12); 472bf215546Sopenharmony_ci nv50_set_surface_info(push, view, width, height, depth); 473bf215546Sopenharmony_ci } else { 474bf215546Sopenharmony_ci PUSH_DATA (push, 0); 475bf215546Sopenharmony_ci PUSH_DATA (push, 0); 476bf215546Sopenharmony_ci PUSH_DATA (push, 0); 477bf215546Sopenharmony_ci PUSH_DATA (push, 0); 478bf215546Sopenharmony_ci PUSH_DATA (push, 0); 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci } 481bf215546Sopenharmony_ci} 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_cistatic void 484bf215546Sopenharmony_cinv50_compute_validate_globals(struct nv50_context *nv50) 485bf215546Sopenharmony_ci{ 486bf215546Sopenharmony_ci unsigned i; 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); 489bf215546Sopenharmony_ci ++i) { 490bf215546Sopenharmony_ci struct pipe_resource *res = *util_dynarray_element( 491bf215546Sopenharmony_ci &nv50->global_residents, struct pipe_resource *, i); 492bf215546Sopenharmony_ci if (res) 493bf215546Sopenharmony_ci nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, 494bf215546Sopenharmony_ci nv04_resource(res), NOUVEAU_BO_RDWR); 495bf215546Sopenharmony_ci } 496bf215546Sopenharmony_ci} 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_cistatic struct nv50_state_validate 499bf215546Sopenharmony_civalidate_list_cp[] = { 500bf215546Sopenharmony_ci { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, 501bf215546Sopenharmony_ci { nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF }, 502bf215546Sopenharmony_ci { nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES | 503bf215546Sopenharmony_ci NV50_NEW_CP_BUFFERS | 504bf215546Sopenharmony_ci NV50_NEW_CP_PROGRAM }, 505bf215546Sopenharmony_ci { nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES }, 506bf215546Sopenharmony_ci { nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS }, 507bf215546Sopenharmony_ci { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, 508bf215546Sopenharmony_ci}; 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_cistatic bool 511bf215546Sopenharmony_cinv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask) 512bf215546Sopenharmony_ci{ 513bf215546Sopenharmony_ci bool ret; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci /* TODO: validate textures, samplers, surfaces */ 516bf215546Sopenharmony_ci ret = nv50_state_validate(nv50, mask, validate_list_cp, 517bf215546Sopenharmony_ci ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp, 518bf215546Sopenharmony_ci nv50->bufctx_cp); 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci if (unlikely(nv50->state.flushed)) 521bf215546Sopenharmony_ci nv50_bufctx_fence(nv50->bufctx_cp, true); 522bf215546Sopenharmony_ci return ret; 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_cistatic void 526bf215546Sopenharmony_cinv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) 527bf215546Sopenharmony_ci{ 528bf215546Sopenharmony_ci struct nv50_screen *screen = nv50->screen; 529bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nv50->base.pushbuf; 530bf215546Sopenharmony_ci unsigned size = align(nv50->compprog->parm_size, 0x4); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 533bf215546Sopenharmony_ci PUSH_DATA (push, (1 + (size / 4)) << 8); 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci if (size) { 536bf215546Sopenharmony_ci struct nouveau_mm_allocation *mm; 537bf215546Sopenharmony_ci struct nouveau_bo *bo = NULL; 538bf215546Sopenharmony_ci unsigned offset; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); 541bf215546Sopenharmony_ci assert(mm); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci nouveau_bo_map(bo, 0, nv50->base.client); 544bf215546Sopenharmony_ci memcpy(bo->map + offset, input, size); 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 547bf215546Sopenharmony_ci nouveau_pushbuf_bufctx(push, nv50->bufctx); 548bf215546Sopenharmony_ci nouveau_pushbuf_validate(push); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci nouveau_pushbuf_space(push, 0, 0, 1); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4); 553bf215546Sopenharmony_ci nouveau_pushbuf_data(push, bo, offset, size); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); 556bf215546Sopenharmony_ci nouveau_bo_ref(NULL, &bo); 557bf215546Sopenharmony_ci nouveau_bufctx_reset(nv50->bufctx, 0); 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci} 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_civoid 562bf215546Sopenharmony_cinv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 563bf215546Sopenharmony_ci{ 564bf215546Sopenharmony_ci struct nv50_context *nv50 = nv50_context(pipe); 565bf215546Sopenharmony_ci struct nouveau_pushbuf *push = nv50->base.pushbuf; 566bf215546Sopenharmony_ci unsigned block_size = info->block[0] * info->block[1] * info->block[2]; 567bf215546Sopenharmony_ci struct nv50_program *cp = nv50->compprog; 568bf215546Sopenharmony_ci bool ret; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci ret = !nv50_state_validate_cp(nv50, ~0); 571bf215546Sopenharmony_ci if (ret) { 572bf215546Sopenharmony_ci NOUVEAU_ERR("Failed to launch grid !\n"); 573bf215546Sopenharmony_ci return; 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci nv50_compute_upload_input(nv50, info->input); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); 579bf215546Sopenharmony_ci PUSH_DATA (push, cp->code_base); 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); 582bf215546Sopenharmony_ci PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x14, 0x40)); 583bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1); 584bf215546Sopenharmony_ci PUSH_DATA (push, cp->max_gpr); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* no indirect support - just read the parameters out */ 587bf215546Sopenharmony_ci uint32_t grid[3]; 588bf215546Sopenharmony_ci if (unlikely(info->indirect)) { 589bf215546Sopenharmony_ci pipe_buffer_read(pipe, info->indirect, info->indirect_offset, 590bf215546Sopenharmony_ci sizeof(grid), grid); 591bf215546Sopenharmony_ci } else { 592bf215546Sopenharmony_ci memcpy(grid, info->grid, sizeof(grid)); 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* grid/block setup */ 596bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2); 597bf215546Sopenharmony_ci PUSH_DATA (push, info->block[1] << 16 | info->block[0]); 598bf215546Sopenharmony_ci PUSH_DATA (push, info->block[2]); 599bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1); 600bf215546Sopenharmony_ci PUSH_DATA (push, 1 << 16 | block_size); 601bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1); 602bf215546Sopenharmony_ci PUSH_DATA (push, 1); 603bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GRIDDIM), 1); 604bf215546Sopenharmony_ci PUSH_DATA (push, grid[1] << 16 | grid[0]); 605bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(GRIDID), 1); 606bf215546Sopenharmony_ci PUSH_DATA (push, 1); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci for (int i = 0; i < grid[2]; i++) { 609bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1); 610bf215546Sopenharmony_ci PUSH_DATA (push, grid[2] | i << 16); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci /* kernel launching */ 613bf215546Sopenharmony_ci BEGIN_NV04(push, NV50_CP(LAUNCH), 1); 614bf215546Sopenharmony_ci PUSH_DATA (push, 0); 615bf215546Sopenharmony_ci } 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); 618bf215546Sopenharmony_ci PUSH_DATA (push, 0); 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci /* bind a compute shader clobbers fragment shader state */ 621bf215546Sopenharmony_ci nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] * 624bf215546Sopenharmony_ci grid[0] * grid[1] * grid[2]; 625bf215546Sopenharmony_ci} 626