1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "pan_context.h" 25bf215546Sopenharmony_ci#include "util/u_vbuf.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_civoid 28bf215546Sopenharmony_cipanfrost_analyze_sysvals(struct panfrost_shader_state *ss) 29bf215546Sopenharmony_ci{ 30bf215546Sopenharmony_ci unsigned dirty = 0; 31bf215546Sopenharmony_ci unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST; 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { 34bf215546Sopenharmony_ci switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) { 35bf215546Sopenharmony_ci case PAN_SYSVAL_VIEWPORT_SCALE: 36bf215546Sopenharmony_ci case PAN_SYSVAL_VIEWPORT_OFFSET: 37bf215546Sopenharmony_ci dirty |= PAN_DIRTY_VIEWPORT; 38bf215546Sopenharmony_ci break; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci case PAN_SYSVAL_TEXTURE_SIZE: 41bf215546Sopenharmony_ci dirty_shader |= PAN_DIRTY_STAGE_TEXTURE; 42bf215546Sopenharmony_ci break; 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci case PAN_SYSVAL_SSBO: 45bf215546Sopenharmony_ci dirty_shader |= PAN_DIRTY_STAGE_SSBO; 46bf215546Sopenharmony_ci break; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci case PAN_SYSVAL_XFB: 49bf215546Sopenharmony_ci dirty |= PAN_DIRTY_SO; 50bf215546Sopenharmony_ci break; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci case PAN_SYSVAL_SAMPLER: 53bf215546Sopenharmony_ci dirty_shader |= PAN_DIRTY_STAGE_SAMPLER; 54bf215546Sopenharmony_ci break; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci case PAN_SYSVAL_IMAGE_SIZE: 57bf215546Sopenharmony_ci dirty_shader |= PAN_DIRTY_STAGE_IMAGE; 58bf215546Sopenharmony_ci break; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci case PAN_SYSVAL_NUM_WORK_GROUPS: 61bf215546Sopenharmony_ci case PAN_SYSVAL_LOCAL_GROUP_SIZE: 62bf215546Sopenharmony_ci case PAN_SYSVAL_WORK_DIM: 63bf215546Sopenharmony_ci case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: 64bf215546Sopenharmony_ci case PAN_SYSVAL_NUM_VERTICES: 65bf215546Sopenharmony_ci dirty |= PAN_DIRTY_PARAMS; 66bf215546Sopenharmony_ci break; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci case PAN_SYSVAL_DRAWID: 69bf215546Sopenharmony_ci dirty |= PAN_DIRTY_DRAWID; 70bf215546Sopenharmony_ci break; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci case PAN_SYSVAL_SAMPLE_POSITIONS: 73bf215546Sopenharmony_ci case PAN_SYSVAL_MULTISAMPLED: 74bf215546Sopenharmony_ci case PAN_SYSVAL_RT_CONVERSION: 75bf215546Sopenharmony_ci /* Nothing beyond the batch itself */ 76bf215546Sopenharmony_ci break; 77bf215546Sopenharmony_ci default: 78bf215546Sopenharmony_ci unreachable("Invalid sysval"); 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci ss->dirty_3d = dirty; 83bf215546Sopenharmony_ci ss->dirty_shader = dirty_shader; 84bf215546Sopenharmony_ci} 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci/* 87bf215546Sopenharmony_ci * Gets a GPU address for the associated index buffer. Only gauranteed to be 88bf215546Sopenharmony_ci * good for the duration of the draw (transient), could last longer. Bounds are 89bf215546Sopenharmony_ci * not calculated. 90bf215546Sopenharmony_ci */ 91bf215546Sopenharmony_cimali_ptr 92bf215546Sopenharmony_cipanfrost_get_index_buffer(struct panfrost_batch *batch, 93bf215546Sopenharmony_ci const struct pipe_draw_info *info, 94bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(info->index.resource); 97bf215546Sopenharmony_ci off_t offset = draw->start * info->index_size; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci if (!info->has_user_indices) { 100bf215546Sopenharmony_ci /* Only resources can be directly mapped */ 101bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 102bf215546Sopenharmony_ci return rsrc->image.data.bo->ptr.gpu + offset; 103bf215546Sopenharmony_ci } else { 104bf215546Sopenharmony_ci /* Otherwise, we need to upload to transient memory */ 105bf215546Sopenharmony_ci const uint8_t *ibuf8 = (const uint8_t *) info->index.user; 106bf215546Sopenharmony_ci struct panfrost_ptr T = 107bf215546Sopenharmony_ci pan_pool_alloc_aligned(&batch->pool.base, 108bf215546Sopenharmony_ci draw->count * 109bf215546Sopenharmony_ci info->index_size, 110bf215546Sopenharmony_ci info->index_size); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size); 113bf215546Sopenharmony_ci return T.gpu; 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci} 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci/* Gets a GPU address for the associated index buffer. Only gauranteed to be 118bf215546Sopenharmony_ci * good for the duration of the draw (transient), could last longer. Also get 119bf215546Sopenharmony_ci * the bounds on the index buffer for the range accessed by the draw. We do 120bf215546Sopenharmony_ci * these operations together because there are natural optimizations which 121bf215546Sopenharmony_ci * require them to be together. */ 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_cimali_ptr 124bf215546Sopenharmony_cipanfrost_get_index_buffer_bounded(struct panfrost_batch *batch, 125bf215546Sopenharmony_ci const struct pipe_draw_info *info, 126bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw, 127bf215546Sopenharmony_ci unsigned *min_index, unsigned *max_index) 128bf215546Sopenharmony_ci{ 129bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(info->index.resource); 130bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 131bf215546Sopenharmony_ci bool needs_indices = true; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci if (info->index_bounds_valid) { 134bf215546Sopenharmony_ci *min_index = info->min_index; 135bf215546Sopenharmony_ci *max_index = info->max_index; 136bf215546Sopenharmony_ci needs_indices = false; 137bf215546Sopenharmony_ci } else if (!info->has_user_indices) { 138bf215546Sopenharmony_ci /* Check the cache */ 139bf215546Sopenharmony_ci needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, 140bf215546Sopenharmony_ci draw->start, 141bf215546Sopenharmony_ci draw->count, 142bf215546Sopenharmony_ci min_index, 143bf215546Sopenharmony_ci max_index); 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci if (needs_indices) { 147bf215546Sopenharmony_ci /* Fallback */ 148bf215546Sopenharmony_ci u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index); 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if (!info->has_user_indices) 151bf215546Sopenharmony_ci panfrost_minmax_cache_add(rsrc->index_cache, 152bf215546Sopenharmony_ci draw->start, draw->count, 153bf215546Sopenharmony_ci *min_index, *max_index); 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci return panfrost_get_index_buffer(batch, info, draw); 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci/** 160bf215546Sopenharmony_ci * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and 161bf215546Sopenharmony_ci * Bifrost put divisor information on the attribute buffer descriptor, so this 162bf215546Sopenharmony_ci * is the most we can compact in general. Crucially, this runs at vertex 163bf215546Sopenharmony_ci * elements CSO create time, not at draw time. 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_ciunsigned 166bf215546Sopenharmony_cipan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, 167bf215546Sopenharmony_ci unsigned *nr_bufs, 168bf215546Sopenharmony_ci unsigned vbi, 169bf215546Sopenharmony_ci unsigned divisor) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci /* Look up the buffer */ 172bf215546Sopenharmony_ci for (unsigned i = 0; i < (*nr_bufs); ++i) { 173bf215546Sopenharmony_ci if (buffers[i].vbi == vbi && buffers[i].divisor == divisor) 174bf215546Sopenharmony_ci return i; 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* Else, create a new buffer */ 178bf215546Sopenharmony_ci unsigned idx = (*nr_bufs)++; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci buffers[idx] = (struct pan_vertex_buffer) { 181bf215546Sopenharmony_ci .vbi = vbi, 182bf215546Sopenharmony_ci .divisor = divisor 183bf215546Sopenharmony_ci }; 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci return idx; 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci/* 189bf215546Sopenharmony_ci * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together, 190bf215546Sopenharmony_ci * meaning that we draw to a given target. Adding to only one mask does not 191bf215546Sopenharmony_ci * generally make sense, except for clears which add to batch->clear and 192bf215546Sopenharmony_ci * batch->resolve together. 193bf215546Sopenharmony_ci */ 194bf215546Sopenharmony_cistatic void 195bf215546Sopenharmony_cipanfrost_draw_target(struct panfrost_batch *batch, unsigned target) 196bf215546Sopenharmony_ci{ 197bf215546Sopenharmony_ci batch->draws |= target; 198bf215546Sopenharmony_ci batch->resolve |= target; 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci/* 202bf215546Sopenharmony_ci * Draw time helper to set batch->{read, draws, resolve} based on current blend 203bf215546Sopenharmony_ci * and depth-stencil state. To be called when blend or depth/stencil dirty state 204bf215546Sopenharmony_ci * respectively changes. 205bf215546Sopenharmony_ci */ 206bf215546Sopenharmony_civoid 207bf215546Sopenharmony_cipanfrost_set_batch_masks_blend(struct panfrost_batch *batch) 208bf215546Sopenharmony_ci{ 209bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 210bf215546Sopenharmony_ci struct panfrost_blend_state *blend = ctx->blend; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { 213bf215546Sopenharmony_ci if (!blend->info[i].no_colour && batch->key.cbufs[i]) 214bf215546Sopenharmony_ci panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i); 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_civoid 219bf215546Sopenharmony_cipanfrost_set_batch_masks_zs(struct panfrost_batch *batch) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 222bf215546Sopenharmony_ci struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci /* Assume depth is read (TODO: perf) */ 225bf215546Sopenharmony_ci if (zsa->depth_enabled) 226bf215546Sopenharmony_ci batch->read |= PIPE_CLEAR_DEPTH; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (zsa->depth_writemask) 229bf215546Sopenharmony_ci panfrost_draw_target(batch, PIPE_CLEAR_DEPTH); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci if (zsa->stencil[0].enabled) { 232bf215546Sopenharmony_ci panfrost_draw_target(batch, PIPE_CLEAR_STENCIL); 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci /* Assume stencil is read (TODO: perf) */ 235bf215546Sopenharmony_ci batch->read |= PIPE_CLEAR_STENCIL; 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci} 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_civoid 240bf215546Sopenharmony_cipanfrost_track_image_access(struct panfrost_batch *batch, 241bf215546Sopenharmony_ci enum pipe_shader_type stage, 242bf215546Sopenharmony_ci struct pipe_image_view *image) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(image->resource); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) { 247bf215546Sopenharmony_ci panfrost_batch_write_rsrc(batch, rsrc, stage); 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci bool is_buffer = rsrc->base.target == PIPE_BUFFER; 250bf215546Sopenharmony_ci unsigned level = is_buffer ? 0 : image->u.tex.level; 251bf215546Sopenharmony_ci BITSET_SET(rsrc->valid.data, level); 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci if (is_buffer) { 254bf215546Sopenharmony_ci util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 255bf215546Sopenharmony_ci 0, rsrc->base.width0); 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci } else { 258bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, stage); 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262