1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2010 Christoph Bumiller 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 12bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci#include "pipe/p_defines.h" 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "nv50/nv50_context.h" 28bf215546Sopenharmony_ci#include "nv50/nv50_program.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "nv50_ir_driver.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistatic inline unsigned 33bf215546Sopenharmony_cibitcount4(const uint32_t val) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci static const uint8_t cnt[16] 36bf215546Sopenharmony_ci = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; 37bf215546Sopenharmony_ci return cnt[val & 0xf]; 38bf215546Sopenharmony_ci} 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic int 41bf215546Sopenharmony_cinv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci struct nv50_program *prog = (struct nv50_program *)info->driverPriv; 44bf215546Sopenharmony_ci unsigned i, n, c; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci n = 0; 47bf215546Sopenharmony_ci for (i = 0; i < info->numInputs; ++i) { 48bf215546Sopenharmony_ci prog->in[i].id = i; 49bf215546Sopenharmony_ci prog->in[i].sn = info->in[i].sn; 50bf215546Sopenharmony_ci prog->in[i].si = info->in[i].si; 51bf215546Sopenharmony_ci prog->in[i].hw = n; 52bf215546Sopenharmony_ci prog->in[i].mask = info->in[i].mask; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) 57bf215546Sopenharmony_ci if (info->in[i].mask & (1 << c)) 58bf215546Sopenharmony_ci info->in[i].slot[c] = n++; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) 61bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci prog->in_nr = info->numInputs; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci for (i = 0; i < info->numSysVals; ++i) { 66bf215546Sopenharmony_ci switch (info->sv[i].sn) { 67bf215546Sopenharmony_ci case TGSI_SEMANTIC_INSTANCEID: 68bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; 69bf215546Sopenharmony_ci continue; 70bf215546Sopenharmony_ci case TGSI_SEMANTIC_VERTEXID: 71bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; 72bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; 73bf215546Sopenharmony_ci continue; 74bf215546Sopenharmony_ci case TGSI_SEMANTIC_PRIMID: 75bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; 76bf215546Sopenharmony_ci break; 77bf215546Sopenharmony_ci default: 78bf215546Sopenharmony_ci break; 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci /* 83bf215546Sopenharmony_ci * Corner case: VP has no inputs, but we will still need to submit data to 84bf215546Sopenharmony_ci * draw it. HW will shout at us and won't draw anything if we don't enable 85bf215546Sopenharmony_ci * any input, so let's just pretend it's the first one. 86bf215546Sopenharmony_ci */ 87bf215546Sopenharmony_ci if (prog->vp.attrs[0] == 0 && 88bf215546Sopenharmony_ci prog->vp.attrs[1] == 0 && 89bf215546Sopenharmony_ci prog->vp.attrs[2] == 0) 90bf215546Sopenharmony_ci prog->vp.attrs[0] |= 0xf; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci /* VertexID before InstanceID */ 93bf215546Sopenharmony_ci if (info->io.vertexId < info->numSysVals) 94bf215546Sopenharmony_ci info->sv[info->io.vertexId].slot[0] = n++; 95bf215546Sopenharmony_ci if (info->io.instanceId < info->numSysVals) 96bf215546Sopenharmony_ci info->sv[info->io.instanceId].slot[0] = n++; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci n = 0; 99bf215546Sopenharmony_ci for (i = 0; i < info->numOutputs; ++i) { 100bf215546Sopenharmony_ci switch (info->out[i].sn) { 101bf215546Sopenharmony_ci case TGSI_SEMANTIC_PSIZE: 102bf215546Sopenharmony_ci prog->vp.psiz = i; 103bf215546Sopenharmony_ci break; 104bf215546Sopenharmony_ci case TGSI_SEMANTIC_CLIPDIST: 105bf215546Sopenharmony_ci prog->vp.clpd[info->out[i].si] = n; 106bf215546Sopenharmony_ci break; 107bf215546Sopenharmony_ci case TGSI_SEMANTIC_EDGEFLAG: 108bf215546Sopenharmony_ci prog->vp.edgeflag = i; 109bf215546Sopenharmony_ci break; 110bf215546Sopenharmony_ci case TGSI_SEMANTIC_BCOLOR: 111bf215546Sopenharmony_ci prog->vp.bfc[info->out[i].si] = i; 112bf215546Sopenharmony_ci break; 113bf215546Sopenharmony_ci case TGSI_SEMANTIC_LAYER: 114bf215546Sopenharmony_ci prog->gp.has_layer = true; 115bf215546Sopenharmony_ci prog->gp.layerid = n; 116bf215546Sopenharmony_ci break; 117bf215546Sopenharmony_ci case TGSI_SEMANTIC_VIEWPORT_INDEX: 118bf215546Sopenharmony_ci prog->gp.has_viewport = true; 119bf215546Sopenharmony_ci prog->gp.viewportid = n; 120bf215546Sopenharmony_ci break; 121bf215546Sopenharmony_ci default: 122bf215546Sopenharmony_ci break; 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci prog->out[i].id = i; 125bf215546Sopenharmony_ci prog->out[i].sn = info->out[i].sn; 126bf215546Sopenharmony_ci prog->out[i].si = info->out[i].si; 127bf215546Sopenharmony_ci prog->out[i].hw = n; 128bf215546Sopenharmony_ci prog->out[i].mask = info->out[i].mask; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) 131bf215546Sopenharmony_ci if (info->out[i].mask & (1 << c)) 132bf215546Sopenharmony_ci info->out[i].slot[c] = n++; 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci prog->out_nr = info->numOutputs; 135bf215546Sopenharmony_ci prog->max_out = n; 136bf215546Sopenharmony_ci if (!prog->max_out) 137bf215546Sopenharmony_ci prog->max_out = 1; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci if (prog->vp.psiz < info->numOutputs) 140bf215546Sopenharmony_ci prog->vp.psiz = prog->out[prog->vp.psiz].hw; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci return 0; 143bf215546Sopenharmony_ci} 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_cistatic int 146bf215546Sopenharmony_cinv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info) 147bf215546Sopenharmony_ci{ 148bf215546Sopenharmony_ci struct nv50_program *prog = (struct nv50_program *)info->driverPriv; 149bf215546Sopenharmony_ci unsigned i, n, m, c; 150bf215546Sopenharmony_ci unsigned nvary; 151bf215546Sopenharmony_ci unsigned nflat; 152bf215546Sopenharmony_ci unsigned nintp = 0; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci /* count recorded non-flat inputs */ 155bf215546Sopenharmony_ci for (m = 0, i = 0; i < info->numInputs; ++i) { 156bf215546Sopenharmony_ci switch (info->in[i].sn) { 157bf215546Sopenharmony_ci case TGSI_SEMANTIC_POSITION: 158bf215546Sopenharmony_ci continue; 159bf215546Sopenharmony_ci default: 160bf215546Sopenharmony_ci m += info->in[i].flat ? 0 : 1; 161bf215546Sopenharmony_ci break; 162bf215546Sopenharmony_ci } 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci /* careful: id may be != i in info->in[prog->in[i].id] */ 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci /* Fill prog->in[] so that non-flat inputs are first and 167bf215546Sopenharmony_ci * kick out special inputs that don't use the RESULT_MAP. 168bf215546Sopenharmony_ci */ 169bf215546Sopenharmony_ci for (n = 0, i = 0; i < info->numInputs; ++i) { 170bf215546Sopenharmony_ci if (info->in[i].sn == TGSI_SEMANTIC_POSITION) { 171bf215546Sopenharmony_ci prog->fp.interp |= info->in[i].mask << 24; 172bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) 173bf215546Sopenharmony_ci if (info->in[i].mask & (1 << c)) 174bf215546Sopenharmony_ci info->in[i].slot[c] = nintp++; 175bf215546Sopenharmony_ci } else { 176bf215546Sopenharmony_ci unsigned j = info->in[i].flat ? m++ : n++; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci if (info->in[i].sn == TGSI_SEMANTIC_COLOR) 179bf215546Sopenharmony_ci prog->vp.bfc[info->in[i].si] = j; 180bf215546Sopenharmony_ci else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) 181bf215546Sopenharmony_ci prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci prog->in[j].id = i; 184bf215546Sopenharmony_ci prog->in[j].mask = info->in[i].mask; 185bf215546Sopenharmony_ci prog->in[j].sn = info->in[i].sn; 186bf215546Sopenharmony_ci prog->in[j].si = info->in[i].si; 187bf215546Sopenharmony_ci prog->in[j].linear = info->in[i].linear; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci prog->in_nr++; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci if (!(prog->fp.interp & (8 << 24))) { 193bf215546Sopenharmony_ci ++nintp; 194bf215546Sopenharmony_ci prog->fp.interp |= 8 << 24; 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci for (i = 0; i < prog->in_nr; ++i) { 198bf215546Sopenharmony_ci int j = prog->in[i].id; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci prog->in[i].hw = nintp; 201bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) 202bf215546Sopenharmony_ci if (prog->in[i].mask & (1 << c)) 203bf215546Sopenharmony_ci info->in[j].slot[c] = nintp++; 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci /* (n == m) if m never increased, i.e. no flat inputs */ 206bf215546Sopenharmony_ci nflat = (n < m) ? (nintp - prog->in[n].hw) : 0; 207bf215546Sopenharmony_ci nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */ 208bf215546Sopenharmony_ci nvary = nintp - nflat; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; 211bf215546Sopenharmony_ci prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci /* put front/back colors right after HPOS */ 214bf215546Sopenharmony_ci prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; 215bf215546Sopenharmony_ci for (i = 0; i < 2; ++i) 216bf215546Sopenharmony_ci if (prog->vp.bfc[i] < 0xff) 217bf215546Sopenharmony_ci prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci /* FP outputs */ 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (info->prop.fp.numColourResults > 1) 222bf215546Sopenharmony_ci prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci for (i = 0; i < info->numOutputs; ++i) { 225bf215546Sopenharmony_ci prog->out[i].id = i; 226bf215546Sopenharmony_ci prog->out[i].sn = info->out[i].sn; 227bf215546Sopenharmony_ci prog->out[i].si = info->out[i].si; 228bf215546Sopenharmony_ci prog->out[i].mask = info->out[i].mask; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (i == info->io.fragDepth || i == info->io.sampleMask) 231bf215546Sopenharmony_ci continue; 232bf215546Sopenharmony_ci prog->out[i].hw = info->out[i].si * 4; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) 235bf215546Sopenharmony_ci info->out[i].slot[c] = prog->out[i].hw + c; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) { 241bf215546Sopenharmony_ci info->out[info->io.sampleMask].slot[0] = prog->max_out++; 242bf215546Sopenharmony_ci prog->fp.has_samplemask = 1; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) 246bf215546Sopenharmony_ci info->out[info->io.fragDepth].slot[2] = prog->max_out++; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (!prog->max_out) 249bf215546Sopenharmony_ci prog->max_out = 4; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci return 0; 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_cistatic int 255bf215546Sopenharmony_cinv50_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) 256bf215546Sopenharmony_ci{ 257bf215546Sopenharmony_ci switch (info->type) { 258bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 259bf215546Sopenharmony_ci return nv50_vertprog_assign_slots(info); 260bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 261bf215546Sopenharmony_ci return nv50_vertprog_assign_slots(info); 262bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: 263bf215546Sopenharmony_ci return nv50_fragprog_assign_slots(info); 264bf215546Sopenharmony_ci case PIPE_SHADER_COMPUTE: 265bf215546Sopenharmony_ci return 0; 266bf215546Sopenharmony_ci default: 267bf215546Sopenharmony_ci return -1; 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci} 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_cistatic struct nv50_stream_output_state * 272bf215546Sopenharmony_cinv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info, 273bf215546Sopenharmony_ci const struct pipe_stream_output_info *pso) 274bf215546Sopenharmony_ci{ 275bf215546Sopenharmony_ci struct nv50_stream_output_state *so; 276bf215546Sopenharmony_ci unsigned b, i, c; 277bf215546Sopenharmony_ci unsigned base[4]; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci so = MALLOC_STRUCT(nv50_stream_output_state); 280bf215546Sopenharmony_ci if (!so) 281bf215546Sopenharmony_ci return NULL; 282bf215546Sopenharmony_ci memset(so->map, 0xff, sizeof(so->map)); 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci for (b = 0; b < 4; ++b) 285bf215546Sopenharmony_ci so->num_attribs[b] = 0; 286bf215546Sopenharmony_ci for (i = 0; i < pso->num_outputs; ++i) { 287bf215546Sopenharmony_ci unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; 288bf215546Sopenharmony_ci b = pso->output[i].output_buffer; 289bf215546Sopenharmony_ci assert(b < 4); 290bf215546Sopenharmony_ci so->num_attribs[b] = MAX2(so->num_attribs[b], end); 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci so->stride[0] = pso->stride[0] * 4; 296bf215546Sopenharmony_ci base[0] = 0; 297bf215546Sopenharmony_ci for (b = 1; b < 4; ++b) { 298bf215546Sopenharmony_ci assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); 299bf215546Sopenharmony_ci so->stride[b] = so->num_attribs[b] * 4; 300bf215546Sopenharmony_ci if (so->num_attribs[b]) 301bf215546Sopenharmony_ci so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; 302bf215546Sopenharmony_ci base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { 305bf215546Sopenharmony_ci assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); 306bf215546Sopenharmony_ci so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci so->map_size = base[3] + so->num_attribs[3]; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci for (i = 0; i < pso->num_outputs; ++i) { 312bf215546Sopenharmony_ci const unsigned s = pso->output[i].start_component; 313bf215546Sopenharmony_ci const unsigned p = pso->output[i].dst_offset; 314bf215546Sopenharmony_ci const unsigned r = pso->output[i].register_index; 315bf215546Sopenharmony_ci b = pso->output[i].output_buffer; 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci if (r >= info->numOutputs) 318bf215546Sopenharmony_ci continue; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci for (c = 0; c < pso->output[i].num_components; ++c) 321bf215546Sopenharmony_ci so->map[base[b] + p + c] = info->out[r].slot[s + c]; 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci return so; 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_cibool 328bf215546Sopenharmony_cinv50_program_translate(struct nv50_program *prog, uint16_t chipset, 329bf215546Sopenharmony_ci struct util_debug_callback *debug) 330bf215546Sopenharmony_ci{ 331bf215546Sopenharmony_ci struct nv50_ir_prog_info *info; 332bf215546Sopenharmony_ci struct nv50_ir_prog_info_out info_out = {}; 333bf215546Sopenharmony_ci int i, ret; 334bf215546Sopenharmony_ci const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci info = CALLOC_STRUCT(nv50_ir_prog_info); 337bf215546Sopenharmony_ci if (!info) 338bf215546Sopenharmony_ci return false; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci info->type = prog->type; 341bf215546Sopenharmony_ci info->target = chipset; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci info->bin.sourceRep = prog->pipe.type; 344bf215546Sopenharmony_ci switch (prog->pipe.type) { 345bf215546Sopenharmony_ci case PIPE_SHADER_IR_TGSI: 346bf215546Sopenharmony_ci info->bin.source = (void *)prog->pipe.tokens; 347bf215546Sopenharmony_ci break; 348bf215546Sopenharmony_ci case PIPE_SHADER_IR_NIR: 349bf215546Sopenharmony_ci info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); 350bf215546Sopenharmony_ci break; 351bf215546Sopenharmony_ci default: 352bf215546Sopenharmony_ci assert(!"unsupported IR!"); 353bf215546Sopenharmony_ci free(info); 354bf215546Sopenharmony_ci return false; 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci info->bin.smemSize = prog->cp.smem_size; 358bf215546Sopenharmony_ci info->io.auxCBSlot = 15; 359bf215546Sopenharmony_ci info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; 360bf215546Sopenharmony_ci info->io.genUserClip = prog->vp.clpd_nr; 361bf215546Sopenharmony_ci if (prog->fp.alphatest) 362bf215546Sopenharmony_ci info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; 365bf215546Sopenharmony_ci info->io.bufInfoBase = NV50_CB_AUX_BUF_INFO(0); 366bf215546Sopenharmony_ci info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; 367bf215546Sopenharmony_ci info->io.msInfoCBSlot = 15; 368bf215546Sopenharmony_ci info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci info->io.membarOffset = NV50_CB_AUX_MEMBAR_OFFSET; 371bf215546Sopenharmony_ci info->io.gmemMembar = 15; 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci info->assignSlots = nv50_program_assign_varying_slots; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci prog->vp.bfc[0] = 0xff; 376bf215546Sopenharmony_ci prog->vp.bfc[1] = 0xff; 377bf215546Sopenharmony_ci prog->vp.edgeflag = 0xff; 378bf215546Sopenharmony_ci prog->vp.clpd[0] = map_undef; 379bf215546Sopenharmony_ci prog->vp.clpd[1] = map_undef; 380bf215546Sopenharmony_ci prog->vp.psiz = map_undef; 381bf215546Sopenharmony_ci prog->gp.has_layer = 0; 382bf215546Sopenharmony_ci prog->gp.has_viewport = 0; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci if (prog->type == PIPE_SHADER_COMPUTE) 385bf215546Sopenharmony_ci info->prop.cp.inputOffset = 0x14; 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci info_out.driverPriv = prog; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci#ifndef NDEBUG 390bf215546Sopenharmony_ci info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); 391bf215546Sopenharmony_ci info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); 392bf215546Sopenharmony_ci info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0); 393bf215546Sopenharmony_ci#else 394bf215546Sopenharmony_ci info->optLevel = 3; 395bf215546Sopenharmony_ci#endif 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci ret = nv50_ir_generate_code(info, &info_out); 398bf215546Sopenharmony_ci if (ret) { 399bf215546Sopenharmony_ci NOUVEAU_ERR("shader translation failed: %i\n", ret); 400bf215546Sopenharmony_ci goto out; 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci prog->code = info_out.bin.code; 404bf215546Sopenharmony_ci prog->code_size = info_out.bin.codeSize; 405bf215546Sopenharmony_ci prog->fixups = info_out.bin.relocData; 406bf215546Sopenharmony_ci prog->interps = info_out.bin.fixupData; 407bf215546Sopenharmony_ci prog->max_gpr = MAX2(4, (info_out.bin.maxGPR >> 1) + 1); 408bf215546Sopenharmony_ci prog->tls_space = info_out.bin.tlsSpace; 409bf215546Sopenharmony_ci prog->cp.smem_size = info_out.bin.smemSize; 410bf215546Sopenharmony_ci prog->mul_zero_wins = info->io.mul_zero_wins; 411bf215546Sopenharmony_ci prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS; 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci prog->vp.clip_enable = (1 << info_out.io.clipDistances) - 1; 414bf215546Sopenharmony_ci prog->vp.cull_enable = 415bf215546Sopenharmony_ci ((1 << info_out.io.cullDistances) - 1) << info_out.io.clipDistances; 416bf215546Sopenharmony_ci prog->vp.clip_mode = 0; 417bf215546Sopenharmony_ci for (i = 0; i < info_out.io.cullDistances; ++i) 418bf215546Sopenharmony_ci prog->vp.clip_mode |= 1 << ((info_out.io.clipDistances + i) * 4); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci if (prog->type == PIPE_SHADER_FRAGMENT) { 421bf215546Sopenharmony_ci if (info_out.prop.fp.writesDepth) { 422bf215546Sopenharmony_ci prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; 423bf215546Sopenharmony_ci prog->fp.flags[1] = 0x11; 424bf215546Sopenharmony_ci } 425bf215546Sopenharmony_ci if (info_out.prop.fp.usesDiscard) 426bf215546Sopenharmony_ci prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; 427bf215546Sopenharmony_ci } else 428bf215546Sopenharmony_ci if (prog->type == PIPE_SHADER_GEOMETRY) { 429bf215546Sopenharmony_ci switch (info_out.prop.gp.outputPrim) { 430bf215546Sopenharmony_ci case PIPE_PRIM_LINE_STRIP: 431bf215546Sopenharmony_ci prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; 432bf215546Sopenharmony_ci break; 433bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLE_STRIP: 434bf215546Sopenharmony_ci prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; 435bf215546Sopenharmony_ci break; 436bf215546Sopenharmony_ci case PIPE_PRIM_POINTS: 437bf215546Sopenharmony_ci default: 438bf215546Sopenharmony_ci assert(info_out.prop.gp.outputPrim == PIPE_PRIM_POINTS); 439bf215546Sopenharmony_ci prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; 440bf215546Sopenharmony_ci break; 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024); 443bf215546Sopenharmony_ci } else 444bf215546Sopenharmony_ci if (prog->type == PIPE_SHADER_COMPUTE) { 445bf215546Sopenharmony_ci for (i = 0; i < NV50_MAX_GLOBALS; i++) { 446bf215546Sopenharmony_ci prog->cp.gmem[i] = (struct nv50_gmem_state){ 447bf215546Sopenharmony_ci .valid = info_out.prop.cp.gmem[i].valid, 448bf215546Sopenharmony_ci .image = info_out.prop.cp.gmem[i].image, 449bf215546Sopenharmony_ci .slot = info_out.prop.cp.gmem[i].slot 450bf215546Sopenharmony_ci }; 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci if (prog->pipe.stream_output.num_outputs) 455bf215546Sopenharmony_ci prog->so = nv50_program_create_strmout_state(&info_out, 456bf215546Sopenharmony_ci &prog->pipe.stream_output); 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci util_debug_message(debug, SHADER_INFO, 459bf215546Sopenharmony_ci "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, loops: %d, bytes: %d", 460bf215546Sopenharmony_ci prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize, 461bf215546Sopenharmony_ci prog->max_gpr, info_out.bin.instructions, info_out.loops, 462bf215546Sopenharmony_ci info_out.bin.codeSize); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ciout: 465bf215546Sopenharmony_ci if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) 466bf215546Sopenharmony_ci ralloc_free((void *)info->bin.source); 467bf215546Sopenharmony_ci FREE(info); 468bf215546Sopenharmony_ci return !ret; 469bf215546Sopenharmony_ci} 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_cibool 472bf215546Sopenharmony_cinv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci struct nouveau_heap *heap; 475bf215546Sopenharmony_ci int ret; 476bf215546Sopenharmony_ci uint32_t size = align(prog->code_size, 0x40); 477bf215546Sopenharmony_ci uint8_t prog_type; 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci switch (prog->type) { 480bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; 481bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; 482bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; 483bf215546Sopenharmony_ci case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; 484bf215546Sopenharmony_ci default: 485bf215546Sopenharmony_ci assert(!"invalid program type"); 486bf215546Sopenharmony_ci return false; 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); 490bf215546Sopenharmony_ci if (ret) { 491bf215546Sopenharmony_ci /* Out of space: evict everything to compactify the code segment, hoping 492bf215546Sopenharmony_ci * the working set is much smaller and drifts slowly. Improve me ! 493bf215546Sopenharmony_ci */ 494bf215546Sopenharmony_ci while (heap->next) { 495bf215546Sopenharmony_ci struct nv50_program *evict = heap->next->priv; 496bf215546Sopenharmony_ci if (evict) 497bf215546Sopenharmony_ci nouveau_heap_free(&evict->mem); 498bf215546Sopenharmony_ci } 499bf215546Sopenharmony_ci debug_printf("WARNING: out of code space, evicting all shaders.\n"); 500bf215546Sopenharmony_ci ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); 501bf215546Sopenharmony_ci if (ret) { 502bf215546Sopenharmony_ci NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); 503bf215546Sopenharmony_ci return false; 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci } 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci if (prog->type == PIPE_SHADER_COMPUTE) { 508bf215546Sopenharmony_ci /* CP code must be uploaded in FP code segment. */ 509bf215546Sopenharmony_ci prog_type = 1; 510bf215546Sopenharmony_ci } else { 511bf215546Sopenharmony_ci prog->code_base = prog->mem->start; 512bf215546Sopenharmony_ci prog_type = prog->type; 513bf215546Sopenharmony_ci } 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci ret = nv50_tls_realloc(nv50->screen, prog->tls_space); 516bf215546Sopenharmony_ci if (ret < 0) { 517bf215546Sopenharmony_ci nouveau_heap_free(&prog->mem); 518bf215546Sopenharmony_ci return false; 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci if (ret > 0) 521bf215546Sopenharmony_ci nv50->state.new_tls_space = true; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci if (prog->fixups) 524bf215546Sopenharmony_ci nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); 525bf215546Sopenharmony_ci if (prog->interps) 526bf215546Sopenharmony_ci nv50_ir_apply_fixups(prog->interps, prog->code, 527bf215546Sopenharmony_ci prog->fp.force_persample_interp, 528bf215546Sopenharmony_ci false /* flatshade */, 529bf215546Sopenharmony_ci prog->fp.alphatest - 1, 530bf215546Sopenharmony_ci false /* msaa */); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, 533bf215546Sopenharmony_ci (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, 534bf215546Sopenharmony_ci NOUVEAU_BO_VRAM, prog->code_size, prog->code); 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); 537bf215546Sopenharmony_ci PUSH_DATA (nv50->base.pushbuf, 0); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci return true; 540bf215546Sopenharmony_ci} 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_civoid 543bf215546Sopenharmony_cinv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 544bf215546Sopenharmony_ci{ 545bf215546Sopenharmony_ci const struct pipe_shader_state pipe = p->pipe; 546bf215546Sopenharmony_ci const ubyte type = p->type; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci if (p->mem) 549bf215546Sopenharmony_ci nouveau_heap_free(&p->mem); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci FREE(p->code); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci FREE(p->fixups); 554bf215546Sopenharmony_ci FREE(p->interps); 555bf215546Sopenharmony_ci FREE(p->so); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci memset(p, 0, sizeof(*p)); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci p->pipe = pipe; 560bf215546Sopenharmony_ci p->type = type; 561bf215546Sopenharmony_ci} 562