1/* 2 * Copyright (C) 2020 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "pan_context.h" 25#include "util/u_vbuf.h" 26 27void 28panfrost_analyze_sysvals(struct panfrost_shader_state *ss) 29{ 30 unsigned dirty = 0; 31 unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST; 32 33 for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { 34 switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) { 35 case PAN_SYSVAL_VIEWPORT_SCALE: 36 case PAN_SYSVAL_VIEWPORT_OFFSET: 37 dirty |= PAN_DIRTY_VIEWPORT; 38 break; 39 40 case PAN_SYSVAL_TEXTURE_SIZE: 41 dirty_shader |= PAN_DIRTY_STAGE_TEXTURE; 42 break; 43 44 case PAN_SYSVAL_SSBO: 45 dirty_shader |= PAN_DIRTY_STAGE_SSBO; 46 break; 47 48 case PAN_SYSVAL_XFB: 49 dirty |= PAN_DIRTY_SO; 50 break; 51 52 case PAN_SYSVAL_SAMPLER: 53 dirty_shader |= PAN_DIRTY_STAGE_SAMPLER; 54 break; 55 56 case PAN_SYSVAL_IMAGE_SIZE: 57 dirty_shader |= PAN_DIRTY_STAGE_IMAGE; 58 break; 59 60 case PAN_SYSVAL_NUM_WORK_GROUPS: 61 case PAN_SYSVAL_LOCAL_GROUP_SIZE: 62 case PAN_SYSVAL_WORK_DIM: 63 case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: 64 case PAN_SYSVAL_NUM_VERTICES: 65 dirty |= PAN_DIRTY_PARAMS; 66 break; 67 68 case PAN_SYSVAL_DRAWID: 69 dirty |= PAN_DIRTY_DRAWID; 70 break; 71 72 case PAN_SYSVAL_SAMPLE_POSITIONS: 73 case PAN_SYSVAL_MULTISAMPLED: 74 case PAN_SYSVAL_RT_CONVERSION: 75 /* Nothing beyond the batch itself */ 76 break; 77 default: 78 unreachable("Invalid sysval"); 79 } 80 } 81 82 ss->dirty_3d = dirty; 83 ss->dirty_shader = dirty_shader; 84} 85 86/* 87 * Gets a GPU address for the associated index buffer. Only gauranteed to be 88 * good for the duration of the draw (transient), could last longer. Bounds are 89 * not calculated. 90 */ 91mali_ptr 92panfrost_get_index_buffer(struct panfrost_batch *batch, 93 const struct pipe_draw_info *info, 94 const struct pipe_draw_start_count_bias *draw) 95{ 96 struct panfrost_resource *rsrc = pan_resource(info->index.resource); 97 off_t offset = draw->start * info->index_size; 98 99 if (!info->has_user_indices) { 100 /* Only resources can be directly mapped */ 101 panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 102 return rsrc->image.data.bo->ptr.gpu + offset; 103 } else { 104 /* Otherwise, we need to upload to transient memory */ 105 const uint8_t *ibuf8 = (const uint8_t *) info->index.user; 106 struct panfrost_ptr T = 107 pan_pool_alloc_aligned(&batch->pool.base, 108 draw->count * 109 info->index_size, 110 info->index_size); 111 112 memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size); 113 return T.gpu; 114 } 115} 116 117/* Gets a GPU address for the associated index buffer. Only gauranteed to be 118 * good for the duration of the draw (transient), could last longer. Also get 119 * the bounds on the index buffer for the range accessed by the draw. We do 120 * these operations together because there are natural optimizations which 121 * require them to be together. */ 122 123mali_ptr 124panfrost_get_index_buffer_bounded(struct panfrost_batch *batch, 125 const struct pipe_draw_info *info, 126 const struct pipe_draw_start_count_bias *draw, 127 unsigned *min_index, unsigned *max_index) 128{ 129 struct panfrost_resource *rsrc = pan_resource(info->index.resource); 130 struct panfrost_context *ctx = batch->ctx; 131 bool needs_indices = true; 132 133 if (info->index_bounds_valid) { 134 *min_index = info->min_index; 135 *max_index = info->max_index; 136 needs_indices = false; 137 } else if (!info->has_user_indices) { 138 /* Check the cache */ 139 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, 140 draw->start, 141 draw->count, 142 min_index, 143 max_index); 144 } 145 146 if (needs_indices) { 147 /* Fallback */ 148 u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index); 149 150 if (!info->has_user_indices) 151 panfrost_minmax_cache_add(rsrc->index_cache, 152 draw->start, draw->count, 153 *min_index, *max_index); 154 } 155 156 return panfrost_get_index_buffer(batch, info, draw); 157} 158 159/** 160 * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and 161 * Bifrost put divisor information on the attribute buffer descriptor, so this 162 * is the most we can compact in general. Crucially, this runs at vertex 163 * elements CSO create time, not at draw time. 164 */ 165unsigned 166pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, 167 unsigned *nr_bufs, 168 unsigned vbi, 169 unsigned divisor) 170{ 171 /* Look up the buffer */ 172 for (unsigned i = 0; i < (*nr_bufs); ++i) { 173 if (buffers[i].vbi == vbi && buffers[i].divisor == divisor) 174 return i; 175 } 176 177 /* Else, create a new buffer */ 178 unsigned idx = (*nr_bufs)++; 179 180 buffers[idx] = (struct pan_vertex_buffer) { 181 .vbi = vbi, 182 .divisor = divisor 183 }; 184 185 return idx; 186} 187 188/* 189 * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together, 190 * meaning that we draw to a given target. Adding to only one mask does not 191 * generally make sense, except for clears which add to batch->clear and 192 * batch->resolve together. 193 */ 194static void 195panfrost_draw_target(struct panfrost_batch *batch, unsigned target) 196{ 197 batch->draws |= target; 198 batch->resolve |= target; 199} 200 201/* 202 * Draw time helper to set batch->{read, draws, resolve} based on current blend 203 * and depth-stencil state. To be called when blend or depth/stencil dirty state 204 * respectively changes. 205 */ 206void 207panfrost_set_batch_masks_blend(struct panfrost_batch *batch) 208{ 209 struct panfrost_context *ctx = batch->ctx; 210 struct panfrost_blend_state *blend = ctx->blend; 211 212 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { 213 if (!blend->info[i].no_colour && batch->key.cbufs[i]) 214 panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i); 215 } 216} 217 218void 219panfrost_set_batch_masks_zs(struct panfrost_batch *batch) 220{ 221 struct panfrost_context *ctx = batch->ctx; 222 struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil; 223 224 /* Assume depth is read (TODO: perf) */ 225 if (zsa->depth_enabled) 226 batch->read |= PIPE_CLEAR_DEPTH; 227 228 if (zsa->depth_writemask) 229 panfrost_draw_target(batch, PIPE_CLEAR_DEPTH); 230 231 if (zsa->stencil[0].enabled) { 232 panfrost_draw_target(batch, PIPE_CLEAR_STENCIL); 233 234 /* Assume stencil is read (TODO: perf) */ 235 batch->read |= PIPE_CLEAR_STENCIL; 236 } 237} 238 239void 240panfrost_track_image_access(struct panfrost_batch *batch, 241 enum pipe_shader_type stage, 242 struct pipe_image_view *image) 243{ 244 struct panfrost_resource *rsrc = pan_resource(image->resource); 245 246 if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) { 247 panfrost_batch_write_rsrc(batch, rsrc, stage); 248 249 bool is_buffer = rsrc->base.target == PIPE_BUFFER; 250 unsigned level = is_buffer ? 0 : image->u.tex.level; 251 BITSET_SET(rsrc->valid.data, level); 252 253 if (is_buffer) { 254 util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 255 0, rsrc->base.width0); 256 } 257 } else { 258 panfrost_batch_read_rsrc(batch, rsrc, stage); 259 } 260} 261 262