1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "ir3/ir3_nir.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/* This has to reach into the fd_context a bit more than the rest of 30bf215546Sopenharmony_ci * ir3, but it needs to be aligned with the compiler, so both agree 31bf215546Sopenharmony_ci * on which const regs hold what. And the logic is identical between 32bf215546Sopenharmony_ci * ir3 generations, the only difference is small details in the actual 33bf215546Sopenharmony_ci * CP_LOAD_STATE packets (which is handled inside the generation 34bf215546Sopenharmony_ci * specific ctx->emit_const(_bo)() fxns) 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * This file should be included in only a single .c file per gen, which 37bf215546Sopenharmony_ci * defines the following functions: 38bf215546Sopenharmony_ci */ 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic bool is_stateobj(struct fd_ringbuffer *ring); 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic void emit_const_user(struct fd_ringbuffer *ring, 43bf215546Sopenharmony_ci const struct ir3_shader_variant *v, uint32_t regid, 44bf215546Sopenharmony_ci uint32_t size, const uint32_t *user_buffer); 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistatic void emit_const_bo(struct fd_ringbuffer *ring, 47bf215546Sopenharmony_ci const struct ir3_shader_variant *v, uint32_t regid, 48bf215546Sopenharmony_ci uint32_t offset, uint32_t size, struct fd_bo *bo); 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic void 51bf215546Sopenharmony_ciemit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, 52bf215546Sopenharmony_ci uint32_t regid, uint32_t offset, uint32_t size, 53bf215546Sopenharmony_ci struct pipe_resource *buffer) 54bf215546Sopenharmony_ci{ 55bf215546Sopenharmony_ci struct fd_resource *rsc = fd_resource(buffer); 56bf215546Sopenharmony_ci emit_const_bo(ring, v, regid, offset, size, rsc->bo); 57bf215546Sopenharmony_ci} 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_cistatic void emit_const_ptrs(struct fd_ringbuffer *ring, 60bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 61bf215546Sopenharmony_ci uint32_t dst_offset, uint32_t num, 62bf215546Sopenharmony_ci struct fd_bo **bos, uint32_t *offsets); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistatic void 65bf215546Sopenharmony_ciemit_const_asserts(struct fd_ringbuffer *ring, 66bf215546Sopenharmony_ci const struct ir3_shader_variant *v, uint32_t regid, 67bf215546Sopenharmony_ci uint32_t sizedwords) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci assert((regid % 4) == 0); 70bf215546Sopenharmony_ci assert((sizedwords % 4) == 0); 71bf215546Sopenharmony_ci assert(regid + sizedwords <= v->constlen * 4); 72bf215546Sopenharmony_ci} 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_cistatic void 75bf215546Sopenharmony_ciring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci /* when we emit const state via ring (IB2) we need a WFI, but when 78bf215546Sopenharmony_ci * it is emit'd via stateobj, we don't 79bf215546Sopenharmony_ci */ 80bf215546Sopenharmony_ci if (is_stateobj(ring)) 81bf215546Sopenharmony_ci return; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci fd_wfi(batch, ring); 84bf215546Sopenharmony_ci} 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci/** 87bf215546Sopenharmony_ci * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts(). 88bf215546Sopenharmony_ci * Returns number of packets, and total size of all the payload. 89bf215546Sopenharmony_ci * 90bf215546Sopenharmony_ci * The value can be a worst-case, ie. some shader variants may not read all 91bf215546Sopenharmony_ci * consts, etc. 92bf215546Sopenharmony_ci * 93bf215546Sopenharmony_ci * Returns size in dwords. 94bf215546Sopenharmony_ci */ 95bf215546Sopenharmony_cistatic inline void 96bf215546Sopenharmony_ciir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets, 97bf215546Sopenharmony_ci unsigned *size) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci *packets = *size = 0; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { 102bf215546Sopenharmony_ci if (state->range[i].start < state->range[i].end) { 103bf215546Sopenharmony_ci *size += state->range[i].end - state->range[i].start; 104bf215546Sopenharmony_ci (*packets)++; 105bf215546Sopenharmony_ci } 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci/** 110bf215546Sopenharmony_ci * Uploads the referenced subranges of the nir constant_data to the hardware's 111bf215546Sopenharmony_ci * constant buffer. 112bf215546Sopenharmony_ci */ 113bf215546Sopenharmony_cistatic inline void 114bf215546Sopenharmony_ciir3_emit_constant_data(struct fd_screen *screen, 115bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 116bf215546Sopenharmony_ci struct fd_ringbuffer *ring) 117bf215546Sopenharmony_ci{ 118bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 119bf215546Sopenharmony_ci const struct ir3_ubo_analysis_state *state = &const_state->ubo_state; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci for (unsigned i = 0; i < state->num_enabled; i++) { 122bf215546Sopenharmony_ci unsigned ubo = state->range[i].ubo.block; 123bf215546Sopenharmony_ci if (ubo != const_state->constant_data_ubo) 124bf215546Sopenharmony_ci continue; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci uint32_t size = state->range[i].end - state->range[i].start; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* Pre-a6xx, we might have ranges enabled in the shader that aren't 129bf215546Sopenharmony_ci * used in the binning variant. 130bf215546Sopenharmony_ci */ 131bf215546Sopenharmony_ci if (16 * v->constlen <= state->range[i].offset) 132bf215546Sopenharmony_ci continue; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* and even if the start of the const buffer is before 135bf215546Sopenharmony_ci * first_immediate, the end may not be: 136bf215546Sopenharmony_ci */ 137bf215546Sopenharmony_ci size = MIN2(size, (16 * v->constlen) - state->range[i].offset); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci if (size == 0) 140bf215546Sopenharmony_ci continue; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci emit_const_bo(ring, v, state->range[i].offset / 4, 143bf215546Sopenharmony_ci v->info.constant_data_offset + state->range[i].start, 144bf215546Sopenharmony_ci size / 4, v->bo); 145bf215546Sopenharmony_ci } 146bf215546Sopenharmony_ci} 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci/** 149bf215546Sopenharmony_ci * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access 150bf215546Sopenharmony_ci * outside of these ranges will be done using full UBO accesses in the 151bf215546Sopenharmony_ci * shader). 152bf215546Sopenharmony_ci */ 153bf215546Sopenharmony_cistatic inline void 154bf215546Sopenharmony_ciir3_emit_user_consts(struct fd_screen *screen, 155bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 156bf215546Sopenharmony_ci struct fd_ringbuffer *ring, 157bf215546Sopenharmony_ci struct fd_constbuf_stateobj *constbuf) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 160bf215546Sopenharmony_ci const struct ir3_ubo_analysis_state *state = &const_state->ubo_state; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci for (unsigned i = 0; i < state->num_enabled; i++) { 163bf215546Sopenharmony_ci assert(!state->range[i].ubo.bindless); 164bf215546Sopenharmony_ci unsigned ubo = state->range[i].ubo.block; 165bf215546Sopenharmony_ci if (!(constbuf->enabled_mask & (1 << ubo)) || 166bf215546Sopenharmony_ci ubo == const_state->constant_data_ubo) { 167bf215546Sopenharmony_ci continue; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci struct pipe_constant_buffer *cb = &constbuf->cb[ubo]; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci uint32_t size = state->range[i].end - state->range[i].start; 172bf215546Sopenharmony_ci uint32_t offset = cb->buffer_offset + state->range[i].start; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* Pre-a6xx, we might have ranges enabled in the shader that aren't 175bf215546Sopenharmony_ci * used in the binning variant. 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci if (16 * v->constlen <= state->range[i].offset) 178bf215546Sopenharmony_ci continue; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci /* and even if the start of the const buffer is before 181bf215546Sopenharmony_ci * first_immediate, the end may not be: 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci size = MIN2(size, (16 * v->constlen) - state->range[i].offset); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci if (size == 0) 186bf215546Sopenharmony_ci continue; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* things should be aligned to vec4: */ 189bf215546Sopenharmony_ci assert((state->range[i].offset % 16) == 0); 190bf215546Sopenharmony_ci assert((size % 16) == 0); 191bf215546Sopenharmony_ci assert((offset % 16) == 0); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci if (cb->user_buffer) { 194bf215546Sopenharmony_ci emit_const_user(ring, v, state->range[i].offset / 4, size / 4, 195bf215546Sopenharmony_ci cb->user_buffer + state->range[i].start); 196bf215546Sopenharmony_ci } else { 197bf215546Sopenharmony_ci emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4, 198bf215546Sopenharmony_ci cb->buffer); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci} 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_cistatic inline void 204bf215546Sopenharmony_ciir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, 205bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 208bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.ubo; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* a6xx+ uses UBO state and ldc instead of pointers emitted in 211bf215546Sopenharmony_ci * const state and ldg: 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci if (ctx->screen->gen >= 6) 214bf215546Sopenharmony_ci return; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci if (v->constlen > offset) { 217bf215546Sopenharmony_ci uint32_t params = const_state->num_ubos; 218bf215546Sopenharmony_ci uint32_t offsets[params]; 219bf215546Sopenharmony_ci struct fd_bo *bos[params]; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci for (uint32_t i = 0; i < params; i++) { 222bf215546Sopenharmony_ci if (i == const_state->constant_data_ubo) { 223bf215546Sopenharmony_ci bos[i] = v->bo; 224bf215546Sopenharmony_ci offsets[i] = v->info.constant_data_offset; 225bf215546Sopenharmony_ci continue; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci struct pipe_constant_buffer *cb = &constbuf->cb[i]; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci /* If we have user pointers (constbuf 0, aka GL uniforms), upload 231bf215546Sopenharmony_ci * them to a buffer now, and save it in the constbuf so that we 232bf215546Sopenharmony_ci * don't have to reupload until they get changed. 233bf215546Sopenharmony_ci */ 234bf215546Sopenharmony_ci if (cb->user_buffer) { 235bf215546Sopenharmony_ci struct pipe_context *pctx = &ctx->base; 236bf215546Sopenharmony_ci u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64, 237bf215546Sopenharmony_ci cb->user_buffer, &cb->buffer_offset, &cb->buffer); 238bf215546Sopenharmony_ci cb->user_buffer = NULL; 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) { 242bf215546Sopenharmony_ci offsets[i] = cb->buffer_offset; 243bf215546Sopenharmony_ci bos[i] = fd_resource(cb->buffer)->bo; 244bf215546Sopenharmony_ci } else { 245bf215546Sopenharmony_ci offsets[i] = 0; 246bf215546Sopenharmony_ci bos[i] = NULL; 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci assert(offset * 4 + params <= v->constlen * 4); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci emit_const_ptrs(ring, v, offset * 4, params, bos, offsets); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci} 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cistatic inline void 257bf215546Sopenharmony_ciir3_emit_image_dims(struct fd_screen *screen, 258bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 259bf215546Sopenharmony_ci struct fd_ringbuffer *ring, 260bf215546Sopenharmony_ci struct fd_shaderimg_stateobj *si) 261bf215546Sopenharmony_ci{ 262bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 263bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.image_dims; 264bf215546Sopenharmony_ci if (v->constlen > offset) { 265bf215546Sopenharmony_ci uint32_t dims[align(const_state->image_dims.count, 4)]; 266bf215546Sopenharmony_ci unsigned mask = const_state->image_dims.mask; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci while (mask) { 269bf215546Sopenharmony_ci struct pipe_image_view *img; 270bf215546Sopenharmony_ci struct fd_resource *rsc; 271bf215546Sopenharmony_ci unsigned index = u_bit_scan(&mask); 272bf215546Sopenharmony_ci unsigned off = const_state->image_dims.off[index]; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci img = &si->si[index]; 275bf215546Sopenharmony_ci rsc = fd_resource(img->resource); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci dims[off + 0] = util_format_get_blocksize(img->format); 278bf215546Sopenharmony_ci if (img->resource->target != PIPE_BUFFER) { 279bf215546Sopenharmony_ci struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level); 280bf215546Sopenharmony_ci /* note for 2d/cube/etc images, even if re-interpreted 281bf215546Sopenharmony_ci * as a different color format, the pixel size should 282bf215546Sopenharmony_ci * be the same, so use original dimensions for y and z 283bf215546Sopenharmony_ci * stride: 284bf215546Sopenharmony_ci */ 285bf215546Sopenharmony_ci dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level); 286bf215546Sopenharmony_ci /* see corresponding logic in fd_resource_offset(): */ 287bf215546Sopenharmony_ci if (rsc->layout.layer_first) { 288bf215546Sopenharmony_ci dims[off + 2] = rsc->layout.layer_size; 289bf215546Sopenharmony_ci } else { 290bf215546Sopenharmony_ci dims[off + 2] = slice->size0; 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci } else { 293bf215546Sopenharmony_ci /* For buffer-backed images, the log2 of the format's 294bf215546Sopenharmony_ci * bytes-per-pixel is placed on the 2nd slot. This is useful 295bf215546Sopenharmony_ci * when emitting image_size instructions, for which we need 296bf215546Sopenharmony_ci * to divide by bpp for image buffers. Since the bpp 297bf215546Sopenharmony_ci * can only be power-of-two, the division is implemented 298bf215546Sopenharmony_ci * as a SHR, and for that it is handy to have the log2 of 299bf215546Sopenharmony_ci * bpp as a constant. (log2 = first-set-bit - 1) 300bf215546Sopenharmony_ci */ 301bf215546Sopenharmony_ci dims[off + 1] = ffs(dims[off + 0]) - 1; 302bf215546Sopenharmony_ci } 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci emit_const_user(ring, v, offset * 4, size, dims); 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_cistatic inline void 311bf215546Sopenharmony_ciir3_emit_immediates(struct fd_screen *screen, 312bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 313bf215546Sopenharmony_ci struct fd_ringbuffer *ring) 314bf215546Sopenharmony_ci{ 315bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 316bf215546Sopenharmony_ci uint32_t base = const_state->offsets.immediate; 317bf215546Sopenharmony_ci int size = DIV_ROUND_UP(const_state->immediates_count, 4); 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci /* truncate size to avoid writing constants that shader 320bf215546Sopenharmony_ci * does not use: 321bf215546Sopenharmony_ci */ 322bf215546Sopenharmony_ci size = MIN2(size + base, v->constlen) - base; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* convert out of vec4: */ 325bf215546Sopenharmony_ci base *= 4; 326bf215546Sopenharmony_ci size *= 4; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci if (size > 0) 329bf215546Sopenharmony_ci emit_const_user(ring, v, base, size, const_state->immediates); 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci /* NIR constant data has the same lifetime as immediates, so upload it 332bf215546Sopenharmony_ci * now, too. 333bf215546Sopenharmony_ci */ 334bf215546Sopenharmony_ci ir3_emit_constant_data(screen, v, ring); 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic inline void 338bf215546Sopenharmony_ciir3_emit_link_map(struct fd_screen *screen, 339bf215546Sopenharmony_ci const struct ir3_shader_variant *producer, 340bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 341bf215546Sopenharmony_ci struct fd_ringbuffer *ring) 342bf215546Sopenharmony_ci{ 343bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 344bf215546Sopenharmony_ci uint32_t base = const_state->offsets.primitive_map; 345bf215546Sopenharmony_ci int size = DIV_ROUND_UP(v->input_size, 4); 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci /* truncate size to avoid writing constants that shader 348bf215546Sopenharmony_ci * does not use: 349bf215546Sopenharmony_ci */ 350bf215546Sopenharmony_ci size = MIN2(size + base, v->constlen) - base; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci /* convert out of vec4: */ 353bf215546Sopenharmony_ci base *= 4; 354bf215546Sopenharmony_ci size *= 4; 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci if (size > 0) 357bf215546Sopenharmony_ci emit_const_user(ring, v, base, size, producer->output_loc); 358bf215546Sopenharmony_ci} 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci/* emit stream-out buffers: */ 361bf215546Sopenharmony_cistatic inline void 362bf215546Sopenharmony_ciemit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, 363bf215546Sopenharmony_ci struct fd_ringbuffer *ring) 364bf215546Sopenharmony_ci{ 365bf215546Sopenharmony_ci /* streamout addresses after driver-params: */ 366bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 367bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.tfbo; 368bf215546Sopenharmony_ci if (v->constlen > offset) { 369bf215546Sopenharmony_ci struct fd_streamout_stateobj *so = &ctx->streamout; 370bf215546Sopenharmony_ci const struct ir3_stream_output_info *info = &v->stream_output; 371bf215546Sopenharmony_ci uint32_t params = 4; 372bf215546Sopenharmony_ci uint32_t offsets[params]; 373bf215546Sopenharmony_ci struct fd_bo *bos[params]; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci for (uint32_t i = 0; i < params; i++) { 376bf215546Sopenharmony_ci struct pipe_stream_output_target *target = so->targets[i]; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci if (target) { 379bf215546Sopenharmony_ci offsets[i] = 380bf215546Sopenharmony_ci (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset; 381bf215546Sopenharmony_ci bos[i] = fd_resource(target->buffer)->bo; 382bf215546Sopenharmony_ci } else { 383bf215546Sopenharmony_ci offsets[i] = 0; 384bf215546Sopenharmony_ci bos[i] = NULL; 385bf215546Sopenharmony_ci } 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci assert(offset * 4 + params <= v->constlen * 4); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci emit_const_ptrs(ring, v, offset * 4, params, bos, offsets); 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_cistatic inline void 395bf215546Sopenharmony_ciemit_common_consts(const struct ir3_shader_variant *v, 396bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_context *ctx, 397bf215546Sopenharmony_ci enum pipe_shader_type t) assert_dt 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci enum fd_dirty_shader_state dirty = ctx->dirty_shader[t]; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci /* When we use CP_SET_DRAW_STATE objects to emit constant state, 402bf215546Sopenharmony_ci * if we emit any of it we need to emit all. This is because 403bf215546Sopenharmony_ci * we are using the same state-group-id each time for uniform 404bf215546Sopenharmony_ci * state, and if previous update is never evaluated (due to no 405bf215546Sopenharmony_ci * visible primitives in the current tile) then the new stateobj 406bf215546Sopenharmony_ci * completely replaces the old one. 407bf215546Sopenharmony_ci * 408bf215546Sopenharmony_ci * Possibly if we split up different parts of the const state to 409bf215546Sopenharmony_ci * different state-objects we could avoid this. 410bf215546Sopenharmony_ci */ 411bf215546Sopenharmony_ci if (dirty && is_stateobj(ring)) 412bf215546Sopenharmony_ci dirty = ~0; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { 415bf215546Sopenharmony_ci struct fd_constbuf_stateobj *constbuf; 416bf215546Sopenharmony_ci bool shader_dirty; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci constbuf = &ctx->constbuf[t]; 419bf215546Sopenharmony_ci shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci ring_wfi(ctx->batch, ring); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci ir3_emit_user_consts(ctx->screen, v, ring, constbuf); 424bf215546Sopenharmony_ci ir3_emit_ubos(ctx, v, ring, constbuf); 425bf215546Sopenharmony_ci if (shader_dirty) 426bf215546Sopenharmony_ci ir3_emit_immediates(ctx->screen, v, ring); 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { 430bf215546Sopenharmony_ci struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t]; 431bf215546Sopenharmony_ci ring_wfi(ctx->batch, ring); 432bf215546Sopenharmony_ci ir3_emit_image_dims(ctx->screen, v, ring, si); 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci/* emit kernel params */ 437bf215546Sopenharmony_cistatic inline void 438bf215546Sopenharmony_ciemit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v, 439bf215546Sopenharmony_ci struct fd_ringbuffer *ring, const struct pipe_grid_info *info) 440bf215546Sopenharmony_ci assert_dt 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 443bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.kernel_params; 444bf215546Sopenharmony_ci if (v->constlen > offset) { 445bf215546Sopenharmony_ci ring_wfi(ctx->batch, ring); 446bf215546Sopenharmony_ci emit_const_user(ring, v, offset * 4, 447bf215546Sopenharmony_ci align(v->cs.req_input_mem, 4), 448bf215546Sopenharmony_ci info->input); 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci} 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_cistatic inline void 453bf215546Sopenharmony_ciir3_emit_driver_params(const struct ir3_shader_variant *v, 454bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_context *ctx, 455bf215546Sopenharmony_ci const struct pipe_draw_info *info, 456bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 457bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) assert_dt 458bf215546Sopenharmony_ci{ 459bf215546Sopenharmony_ci assert(v->need_driver_params); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 462bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.driver_param; 463bf215546Sopenharmony_ci uint32_t vertex_params[IR3_DP_VS_COUNT] = { 464bf215546Sopenharmony_ci [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ 465bf215546Sopenharmony_ci [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start, 466bf215546Sopenharmony_ci [IR3_DP_INSTID_BASE] = info->start_instance, 467bf215546Sopenharmony_ci [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx, 468bf215546Sopenharmony_ci }; 469bf215546Sopenharmony_ci if (v->key.ucp_enables) { 470bf215546Sopenharmony_ci struct pipe_clip_state *ucp = &ctx->ucp; 471bf215546Sopenharmony_ci unsigned pos = IR3_DP_UCP0_X; 472bf215546Sopenharmony_ci for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { 473bf215546Sopenharmony_ci for (unsigned j = 0; j < 4; j++) { 474bf215546Sopenharmony_ci vertex_params[pos] = fui(ucp->ucp[i][j]); 475bf215546Sopenharmony_ci pos++; 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci } 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci /* Only emit as many params as needed, i.e. up to the highest enabled UCP 481bf215546Sopenharmony_ci * plane. However a binning pass may drop even some of these, so limit to 482bf215546Sopenharmony_ci * program max. 483bf215546Sopenharmony_ci */ 484bf215546Sopenharmony_ci const uint32_t vertex_params_size = 485bf215546Sopenharmony_ci MIN2(const_state->num_driver_params, (v->constlen - offset) * 4); 486bf215546Sopenharmony_ci assert(vertex_params_size <= IR3_DP_VS_COUNT); 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci bool needs_vtxid_base = 489bf215546Sopenharmony_ci ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != 490bf215546Sopenharmony_ci regid(63, 0); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci /* for indirect draw, we need to copy VTXID_BASE from 493bf215546Sopenharmony_ci * indirect-draw parameters buffer.. which is annoying 494bf215546Sopenharmony_ci * and means we can't easily emit these consts in cmd 495bf215546Sopenharmony_ci * stream so need to copy them to bo. 496bf215546Sopenharmony_ci */ 497bf215546Sopenharmony_ci if (indirect && needs_vtxid_base) { 498bf215546Sopenharmony_ci uint32_t vertex_params_area = align(vertex_params_size, 16); 499bf215546Sopenharmony_ci struct pipe_resource *vertex_params_rsc = 500bf215546Sopenharmony_ci pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER, 501bf215546Sopenharmony_ci PIPE_USAGE_STREAM, vertex_params_area * 4); 502bf215546Sopenharmony_ci unsigned src_off = indirect->offset; 503bf215546Sopenharmony_ci void *ptr; 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); 506bf215546Sopenharmony_ci memcpy(ptr, vertex_params, vertex_params_size * 4); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci if (info->index_size) { 509bf215546Sopenharmony_ci /* indexed draw, index_bias is 4th field: */ 510bf215546Sopenharmony_ci src_off += 3 * 4; 511bf215546Sopenharmony_ci } else { 512bf215546Sopenharmony_ci /* non-indexed draw, start is 3rd field: */ 513bf215546Sopenharmony_ci src_off += 2 * 4; 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci /* copy index_bias or start from draw params: */ 517bf215546Sopenharmony_ci ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer, 518bf215546Sopenharmony_ci src_off, 1); 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area, 521bf215546Sopenharmony_ci vertex_params_rsc); 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci pipe_resource_reference(&vertex_params_rsc, NULL); 524bf215546Sopenharmony_ci } else { 525bf215546Sopenharmony_ci emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params); 526bf215546Sopenharmony_ci } 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci /* if needed, emit stream-out buffer addresses: */ 529bf215546Sopenharmony_ci if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { 530bf215546Sopenharmony_ci emit_tfbos(ctx, v, ring); 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci} 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_cistatic inline void 535bf215546Sopenharmony_ciir3_emit_vs_consts(const struct ir3_shader_variant *v, 536bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_context *ctx, 537bf215546Sopenharmony_ci const struct pipe_draw_info *info, 538bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 539bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) assert_dt 540bf215546Sopenharmony_ci{ 541bf215546Sopenharmony_ci assert(v->type == MESA_SHADER_VERTEX); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci /* emit driver params every time: */ 546bf215546Sopenharmony_ci if (info && v->need_driver_params) { 547bf215546Sopenharmony_ci ring_wfi(ctx->batch, ring); 548bf215546Sopenharmony_ci ir3_emit_driver_params(v, ring, ctx, info, indirect, draw); 549bf215546Sopenharmony_ci } 550bf215546Sopenharmony_ci} 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_cistatic inline void 553bf215546Sopenharmony_ciir3_emit_fs_consts(const struct ir3_shader_variant *v, 554bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt 555bf215546Sopenharmony_ci{ 556bf215546Sopenharmony_ci assert(v->type == MESA_SHADER_FRAGMENT); 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); 559bf215546Sopenharmony_ci} 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci/* emit compute-shader consts: */ 562bf215546Sopenharmony_cistatic inline void 563bf215546Sopenharmony_ciir3_emit_cs_consts(const struct ir3_shader_variant *v, 564bf215546Sopenharmony_ci struct fd_ringbuffer *ring, struct fd_context *ctx, 565bf215546Sopenharmony_ci const struct pipe_grid_info *info) assert_dt 566bf215546Sopenharmony_ci{ 567bf215546Sopenharmony_ci assert(gl_shader_stage_is_compute(v->type)); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); 570bf215546Sopenharmony_ci emit_kernel_params(ctx, v, ring, info); 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci /* a3xx/a4xx can inject these directly */ 573bf215546Sopenharmony_ci if (ctx->screen->gen <= 4) 574bf215546Sopenharmony_ci return; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci /* emit compute-shader driver-params: */ 577bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(v); 578bf215546Sopenharmony_ci uint32_t offset = const_state->offsets.driver_param; 579bf215546Sopenharmony_ci if (v->constlen > offset) { 580bf215546Sopenharmony_ci ring_wfi(ctx->batch, ring); 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci if (info->indirect) { 583bf215546Sopenharmony_ci struct pipe_resource *indirect = NULL; 584bf215546Sopenharmony_ci unsigned indirect_offset; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs 587bf215546Sopenharmony_ci * to be aligned more strongly than 4 bytes. So in this case 588bf215546Sopenharmony_ci * we need a temporary buffer to copy NumWorkGroups.xyz to. 589bf215546Sopenharmony_ci * 590bf215546Sopenharmony_ci * TODO if previous compute job is writing to info->indirect, 591bf215546Sopenharmony_ci * we might need a WFI.. but since we currently flush for each 592bf215546Sopenharmony_ci * compute job, we are probably ok for now. 593bf215546Sopenharmony_ci */ 594bf215546Sopenharmony_ci if (info->indirect_offset & 0xf) { 595bf215546Sopenharmony_ci indirect = pipe_buffer_create(&ctx->screen->base, 596bf215546Sopenharmony_ci PIPE_BIND_COMMAND_ARGS_BUFFER, 597bf215546Sopenharmony_ci PIPE_USAGE_STREAM, 0x1000); 598bf215546Sopenharmony_ci indirect_offset = 0; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect, 601bf215546Sopenharmony_ci info->indirect_offset, 3); 602bf215546Sopenharmony_ci } else { 603bf215546Sopenharmony_ci pipe_resource_reference(&indirect, info->indirect); 604bf215546Sopenharmony_ci indirect_offset = info->indirect_offset; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci pipe_resource_reference(&indirect, NULL); 610bf215546Sopenharmony_ci } else { 611bf215546Sopenharmony_ci uint32_t compute_params[IR3_DP_CS_COUNT] = { 612bf215546Sopenharmony_ci [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], 613bf215546Sopenharmony_ci [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], 614bf215546Sopenharmony_ci [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], 615bf215546Sopenharmony_ci [IR3_DP_WORK_DIM] = info->work_dim, 616bf215546Sopenharmony_ci [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], 617bf215546Sopenharmony_ci [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], 618bf215546Sopenharmony_ci [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], 619bf215546Sopenharmony_ci }; 620bf215546Sopenharmony_ci uint32_t size = 621bf215546Sopenharmony_ci MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci emit_const_user(ring, v, offset * 4, size, compute_params); 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci} 627