1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "fd6_const.h" 26#include "fd6_pack.h" 27 28#define emit_const_user fd6_emit_const_user 29#define emit_const_bo fd6_emit_const_bo 30#include "ir3_const.h" 31 32/* regid: base const register 33 * prsc or dwords: buffer containing constant values 34 * sizedwords: size of const value buffer 35 */ 36void 37fd6_emit_const_user(struct fd_ringbuffer *ring, 38 const struct ir3_shader_variant *v, uint32_t regid, 39 uint32_t sizedwords, const uint32_t *dwords) 40{ 41 emit_const_asserts(ring, v, regid, sizedwords); 42 43 /* NOTE we cheat a bit here, since we know mesa is aligning 44 * the size of the user buffer to 16 bytes. And we want to 45 * cut cycles in a hot path. 46 */ 47 uint32_t align_sz = align(sizedwords, 4); 48 49 if (fd6_geom_stage(v->type)) { 50 OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz, 51 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS, 52 .state_src = SS6_DIRECT, 53 .state_block = fd6_stage2shadersb(v->type), 54 .num_unit = DIV_ROUND_UP(sizedwords, 4)), 55 CP_LOAD_STATE6_1(), 56 CP_LOAD_STATE6_2()); 57 } else { 58 OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz, 59 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS, 60 .state_src = SS6_DIRECT, 61 .state_block = fd6_stage2shadersb(v->type), 62 .num_unit = DIV_ROUND_UP(sizedwords, 4)), 63 CP_LOAD_STATE6_1(), 64 CP_LOAD_STATE6_2()); 65 } 66} 67void 68fd6_emit_const_bo(struct fd_ringbuffer *ring, 69 const struct ir3_shader_variant *v, uint32_t regid, 70 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) 71{ 72 uint32_t dst_off = regid / 4; 73 assert(dst_off % 4 == 0); 74 uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4); 75 assert(num_unit % 4 == 0); 76 77 emit_const_asserts(ring, v, regid, sizedwords); 78 79 if (fd6_geom_stage(v->type)) { 80 OUT_PKT(ring, CP_LOAD_STATE6_GEOM, 81 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS, 82 .state_src = SS6_INDIRECT, 83 .state_block = fd6_stage2shadersb(v->type), 84 .num_unit = num_unit, ), 85 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset)); 86 } else { 87 OUT_PKT(ring, CP_LOAD_STATE6_FRAG, 88 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS, 89 .state_src = SS6_INDIRECT, 90 .state_block = fd6_stage2shadersb(v->type), 91 .num_unit = num_unit, ), 92 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset)); 93 } 94} 95 96static bool 97is_stateobj(struct fd_ringbuffer *ring) 98{ 99 return true; 100} 101 102static void 103emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, 104 uint32_t dst_offset, uint32_t num, struct fd_bo **bos, 105 uint32_t *offsets) 106{ 107 unreachable("shouldn't be called on a6xx"); 108} 109 110static void 111emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, 112 uint32_t *params, int num_params) 113{ 114 const struct ir3_const_state *const_state = ir3_const_state(v); 115 const unsigned regid = const_state->offsets.primitive_param; 116 int size = MIN2(1 + regid, v->constlen) - regid; 117 if (size > 0) 118 fd6_emit_const_user(ring, v, regid * 4, num_params, params); 119} 120 121struct fd_ringbuffer * 122fd6_build_tess_consts(struct fd6_emit *emit) 123{ 124 struct fd_context *ctx = emit->ctx; 125 126 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( 127 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 128 129 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS 130 * size is dwords, since that's what LDG/STG use. 131 */ 132 unsigned num_vertices = emit->hs 133 ? emit->patch_vertices 134 : emit->gs->gs.vertices_in; 135 136 uint32_t vs_params[4] = { 137 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ 138 emit->vs->output_size * 4, /* vs vertex stride */ 139 0, 0}; 140 141 emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params)); 142 143 if (emit->hs) { 144 uint32_t hs_params[4] = { 145 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ 146 emit->vs->output_size * 4, /* vs vertex stride */ 147 emit->hs->output_size, emit->patch_vertices}; 148 149 emit_stage_tess_consts(constobj, emit->hs, hs_params, 150 ARRAY_SIZE(hs_params)); 151 152 if (emit->gs) 153 num_vertices = emit->gs->gs.vertices_in; 154 155 uint32_t ds_params[4] = { 156 emit->ds->output_size * num_vertices * 4, /* ds primitive stride */ 157 emit->ds->output_size * 4, /* ds vertex stride */ 158 emit->hs->output_size, /* hs vertex stride (dwords) */ 159 emit->hs->tess.tcs_vertices_out}; 160 161 emit_stage_tess_consts(constobj, emit->ds, ds_params, 162 ARRAY_SIZE(ds_params)); 163 } 164 165 if (emit->gs) { 166 struct ir3_shader_variant *prev; 167 if (emit->ds) 168 prev = emit->ds; 169 else 170 prev = emit->vs; 171 172 uint32_t gs_params[4] = { 173 prev->output_size * num_vertices * 4, /* ds primitive stride */ 174 prev->output_size * 4, /* ds vertex stride */ 175 0, 176 0, 177 }; 178 179 num_vertices = emit->gs->gs.vertices_in; 180 emit_stage_tess_consts(constobj, emit->gs, gs_params, 181 ARRAY_SIZE(gs_params)); 182 } 183 184 return constobj; 185} 186 187static void 188fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, 189 struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) 190{ 191 const struct ir3_const_state *const_state = ir3_const_state(v); 192 int num_ubos = const_state->num_ubos; 193 194 if (!num_ubos) 195 return; 196 197 OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos)); 198 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | 199 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) | 200 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 201 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) | 202 CP_LOAD_STATE6_0_NUM_UNIT(num_ubos)); 203 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 204 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 205 206 for (int i = 0; i < num_ubos; i++) { 207 /* NIR constant data is packed into the end of the shader. */ 208 if (i == const_state->constant_data_ubo) { 209 int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16); 210 OUT_RELOC(ring, v->bo, v->info.constant_data_offset, 211 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0); 212 continue; 213 } 214 215 struct pipe_constant_buffer *cb = &constbuf->cb[i]; 216 217 /* If we have user pointers (constbuf 0, aka GL uniforms), upload them 218 * to a buffer now, and save it in the constbuf so that we don't have 219 * to reupload until they get changed. 220 */ 221 if (cb->user_buffer) { 222 struct pipe_context *pctx = &ctx->base; 223 u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64, 224 cb->user_buffer, &cb->buffer_offset, &cb->buffer); 225 cb->user_buffer = NULL; 226 } 227 228 if (cb->buffer) { 229 int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16); 230 OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 231 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0); 232 } else { 233 OUT_RING(ring, 0xbad00000 | (i << 16)); 234 OUT_RING(ring, A6XX_UBO_1_SIZE(0)); 235 } 236 } 237} 238 239static unsigned 240user_consts_cmdstream_size(struct ir3_shader_variant *v) 241{ 242 struct ir3_const_state *const_state = ir3_const_state(v); 243 struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state; 244 245 if (unlikely(!ubo_state->cmdstream_size)) { 246 unsigned packets, size; 247 248 /* pre-calculate size required for userconst stateobj: */ 249 ir3_user_consts_size(ubo_state, &packets, &size); 250 251 /* also account for UBO addresses: */ 252 packets += 1; 253 size += 2 * const_state->num_ubos; 254 255 unsigned sizedwords = (4 * packets) + size; 256 ubo_state->cmdstream_size = sizedwords * 4; 257 } 258 259 return ubo_state->cmdstream_size; 260} 261 262struct fd_ringbuffer * 263fd6_build_user_consts(struct fd6_emit *emit) 264{ 265 static const enum pipe_shader_type types[] = { 266 PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL, 267 PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT, 268 }; 269 struct ir3_shader_variant *variants[] = { 270 emit->vs, emit->hs, emit->ds, emit->gs, emit->fs, 271 }; 272 struct fd_context *ctx = emit->ctx; 273 unsigned sz = 0; 274 275 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { 276 if (!variants[i]) 277 continue; 278 sz += user_consts_cmdstream_size(variants[i]); 279 } 280 281 struct fd_ringbuffer *constobj = 282 fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING); 283 284 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { 285 if (!variants[i]) 286 continue; 287 ir3_emit_user_consts(ctx->screen, variants[i], constobj, 288 &ctx->constbuf[types[i]]); 289 fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]); 290 } 291 292 return constobj; 293} 294 295struct fd_ringbuffer * 296fd6_build_driver_params(struct fd6_emit *emit) 297{ 298 struct fd_context *ctx = emit->ctx; 299 struct fd6_context *fd6_ctx = fd6_context(ctx); 300 unsigned num_dp = 0; 301 302 if (emit->vs->need_driver_params) 303 num_dp++; 304 305 if (emit->gs && emit->gs->need_driver_params) 306 num_dp++; 307 308 if (emit->ds && emit->ds->need_driver_params) 309 num_dp++; 310 311 if (!num_dp) { 312 fd6_ctx->has_dp_state = false; 313 return NULL; 314 } 315 316 unsigned size_dwords = num_dp * (4 + IR3_DP_VS_COUNT); /* 4dw PKT7 header */ 317 struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( 318 ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING); 319 320 if (emit->vs->need_driver_params) { 321 ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, 322 emit->indirect, emit->draw); 323 } 324 325 if (emit->gs && emit->gs->need_driver_params) { 326 ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, 327 emit->indirect, emit->draw); 328 } 329 330 if (emit->ds && emit->ds->need_driver_params) { 331 ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, 332 emit->indirect, emit->draw); 333 } 334 335 fd6_ctx->has_dp_state = true; 336 337 return dpconstobj; 338} 339 340void 341fd6_emit_cs_consts(const struct ir3_shader_variant *v, 342 struct fd_ringbuffer *ring, struct fd_context *ctx, 343 const struct pipe_grid_info *info) 344{ 345 ir3_emit_cs_consts(v, ring, ctx, info); 346 fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]); 347} 348 349void 350fd6_emit_immediates(struct fd_screen *screen, 351 const struct ir3_shader_variant *v, 352 struct fd_ringbuffer *ring) 353{ 354 ir3_emit_immediates(screen, v, ring); 355} 356 357void 358fd6_emit_link_map(struct fd_screen *screen, 359 const struct ir3_shader_variant *producer, 360 const struct ir3_shader_variant *v, 361 struct fd_ringbuffer *ring) 362{ 363 ir3_emit_link_map(screen, producer, v, ring); 364} 365