1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2018 Alyssa Rosenzweig 3bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd. 4bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23bf215546Sopenharmony_ci * SOFTWARE. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "util/macros.h" 27bf215546Sopenharmony_ci#include "util/u_prim.h" 28bf215546Sopenharmony_ci#include "util/u_vbuf.h" 29bf215546Sopenharmony_ci#include "util/u_helpers.h" 30bf215546Sopenharmony_ci#include "util/u_draw.h" 31bf215546Sopenharmony_ci#include "util/u_memory.h" 32bf215546Sopenharmony_ci#include "pipe/p_defines.h" 33bf215546Sopenharmony_ci#include "pipe/p_state.h" 34bf215546Sopenharmony_ci#include "gallium/auxiliary/util/u_blend.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "genxml/gen_macros.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "pan_pool.h" 39bf215546Sopenharmony_ci#include "pan_bo.h" 40bf215546Sopenharmony_ci#include "pan_blend.h" 41bf215546Sopenharmony_ci#include "pan_context.h" 42bf215546Sopenharmony_ci#include "pan_job.h" 43bf215546Sopenharmony_ci#include "pan_shader.h" 44bf215546Sopenharmony_ci#include "pan_texture.h" 45bf215546Sopenharmony_ci#include "pan_util.h" 46bf215546Sopenharmony_ci#include "pan_indirect_draw.h" 47bf215546Sopenharmony_ci#include "pan_indirect_dispatch.h" 48bf215546Sopenharmony_ci#include "pan_blitter.h" 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci#define PAN_GPU_INDIRECTS (PAN_ARCH == 7) 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_cistruct panfrost_rasterizer { 53bf215546Sopenharmony_ci struct pipe_rasterizer_state base; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci#if PAN_ARCH <= 7 56bf215546Sopenharmony_ci /* Partially packed RSD words */ 57bf215546Sopenharmony_ci struct mali_multisample_misc_packed multisample; 58bf215546Sopenharmony_ci struct mali_stencil_mask_misc_packed stencil_misc; 59bf215546Sopenharmony_ci#endif 60bf215546Sopenharmony_ci}; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistruct panfrost_zsa_state { 63bf215546Sopenharmony_ci struct pipe_depth_stencil_alpha_state base; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci /* Is any depth, stencil, or alpha testing enabled? */ 66bf215546Sopenharmony_ci bool enabled; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci /* Does the depth and stencil tests always pass? This ignores write 69bf215546Sopenharmony_ci * masks, we are only interested in whether pixels may be killed. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci bool zs_always_passes; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci /* Are depth or stencil writes possible? */ 74bf215546Sopenharmony_ci bool writes_zs; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci#if PAN_ARCH <= 7 77bf215546Sopenharmony_ci /* Prepacked words from the RSD */ 78bf215546Sopenharmony_ci struct mali_multisample_misc_packed rsd_depth; 79bf215546Sopenharmony_ci struct mali_stencil_mask_misc_packed rsd_stencil; 80bf215546Sopenharmony_ci struct mali_stencil_packed stencil_front, stencil_back; 81bf215546Sopenharmony_ci#else 82bf215546Sopenharmony_ci /* Depth/stencil descriptor template */ 83bf215546Sopenharmony_ci struct mali_depth_stencil_packed desc; 84bf215546Sopenharmony_ci#endif 85bf215546Sopenharmony_ci}; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistruct panfrost_sampler_state { 88bf215546Sopenharmony_ci struct pipe_sampler_state base; 89bf215546Sopenharmony_ci struct mali_sampler_packed hw; 90bf215546Sopenharmony_ci}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci/* Misnomer: Sampler view corresponds to textures, not samplers */ 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistruct panfrost_sampler_view { 95bf215546Sopenharmony_ci struct pipe_sampler_view base; 96bf215546Sopenharmony_ci struct panfrost_pool_ref state; 97bf215546Sopenharmony_ci struct mali_texture_packed bifrost_descriptor; 98bf215546Sopenharmony_ci mali_ptr texture_bo; 99bf215546Sopenharmony_ci uint64_t modifier; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci /* Pool used to allocate the descriptor. If NULL, defaults to the global 102bf215546Sopenharmony_ci * descriptor pool. Can be set for short lived descriptors, useful for 103bf215546Sopenharmony_ci * shader images on Valhall. 104bf215546Sopenharmony_ci */ 105bf215546Sopenharmony_ci struct panfrost_pool *pool; 106bf215546Sopenharmony_ci}; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_cistruct panfrost_vertex_state { 109bf215546Sopenharmony_ci unsigned num_elements; 110bf215546Sopenharmony_ci struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci#if PAN_ARCH >= 9 113bf215546Sopenharmony_ci /* Packed attribute descriptor. All fields are set at CSO create time 114bf215546Sopenharmony_ci * except for stride, which must be ORed in at draw time 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_ci struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS]; 117bf215546Sopenharmony_ci#else 118bf215546Sopenharmony_ci /* buffers corresponds to attribute buffer, element_buffers corresponds 119bf215546Sopenharmony_ci * to an index in buffers for each vertex element */ 120bf215546Sopenharmony_ci struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS]; 121bf215546Sopenharmony_ci unsigned element_buffer[PIPE_MAX_ATTRIBS]; 122bf215546Sopenharmony_ci unsigned nr_bufs; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci unsigned formats[PIPE_MAX_ATTRIBS]; 125bf215546Sopenharmony_ci#endif 126bf215546Sopenharmony_ci}; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci/* Statically assert that PIPE_* enums match the hardware enums. 129bf215546Sopenharmony_ci * (As long as they match, we don't need to translate them.) 130bf215546Sopenharmony_ci */ 131bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_NEVER == MALI_FUNC_NEVER, "must match"); 132bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_LESS == MALI_FUNC_LESS, "must match"); 133bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_EQUAL == MALI_FUNC_EQUAL, "must match"); 134bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_LEQUAL == MALI_FUNC_LEQUAL, "must match"); 135bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_GREATER == MALI_FUNC_GREATER, "must match"); 136bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_NOTEQUAL == MALI_FUNC_NOT_EQUAL, "must match"); 137bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_GEQUAL == MALI_FUNC_GEQUAL, "must match"); 138bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_ALWAYS == MALI_FUNC_ALWAYS, "must match"); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_cistatic inline enum mali_sample_pattern 141bf215546Sopenharmony_cipanfrost_sample_pattern(unsigned samples) 142bf215546Sopenharmony_ci{ 143bf215546Sopenharmony_ci switch (samples) { 144bf215546Sopenharmony_ci case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED; 145bf215546Sopenharmony_ci case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID; 146bf215546Sopenharmony_ci case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID; 147bf215546Sopenharmony_ci case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID; 148bf215546Sopenharmony_ci default: unreachable("Unsupported sample count"); 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci} 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_cistatic unsigned 153bf215546Sopenharmony_citranslate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest) 154bf215546Sopenharmony_ci{ 155bf215546Sopenharmony_ci /* CLAMP is only supported on Midgard, where it is broken for nearest 156bf215546Sopenharmony_ci * filtering. Use CLAMP_TO_EDGE in that case. 157bf215546Sopenharmony_ci */ 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci switch (w) { 160bf215546Sopenharmony_ci case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT; 161bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; 162bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; 163bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; 164bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; 165bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci#if PAN_ARCH <= 5 168bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP: 169bf215546Sopenharmony_ci return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE : 170bf215546Sopenharmony_ci MALI_WRAP_MODE_CLAMP; 171bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP: 172bf215546Sopenharmony_ci return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE : 173bf215546Sopenharmony_ci MALI_WRAP_MODE_MIRRORED_CLAMP; 174bf215546Sopenharmony_ci#endif 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci default: unreachable("Invalid wrap"); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci/* The hardware compares in the wrong order order, so we have to flip before 181bf215546Sopenharmony_ci * encoding. Yes, really. */ 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_cistatic enum mali_func 184bf215546Sopenharmony_cipanfrost_sampler_compare_func(const struct pipe_sampler_state *cso) 185bf215546Sopenharmony_ci{ 186bf215546Sopenharmony_ci return !cso->compare_mode ? MALI_FUNC_NEVER : 187bf215546Sopenharmony_ci panfrost_flip_compare_func((enum mali_func) cso->compare_func); 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_cistatic enum mali_mipmap_mode 191bf215546Sopenharmony_cipan_pipe_to_mipmode(enum pipe_tex_mipfilter f) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci switch (f) { 194bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST; 195bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; 196bf215546Sopenharmony_ci#if PAN_ARCH >= 6 197bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE; 198bf215546Sopenharmony_ci#else 199bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NEAREST; 200bf215546Sopenharmony_ci#endif 201bf215546Sopenharmony_ci default: unreachable("Invalid"); 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci} 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_cistatic void * 207bf215546Sopenharmony_cipanfrost_create_sampler_state( 208bf215546Sopenharmony_ci struct pipe_context *pctx, 209bf215546Sopenharmony_ci const struct pipe_sampler_state *cso) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); 212bf215546Sopenharmony_ci so->base = *cso; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci pan_pack(&so->hw, SAMPLER, cfg) { 217bf215546Sopenharmony_ci cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST; 218bf215546Sopenharmony_ci cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST; 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci cfg.normalized_coordinates = cso->normalized_coords; 221bf215546Sopenharmony_ci cfg.lod_bias = FIXED_16(cso->lod_bias, true); 222bf215546Sopenharmony_ci cfg.minimum_lod = FIXED_16(cso->min_lod, false); 223bf215546Sopenharmony_ci cfg.maximum_lod = FIXED_16(cso->max_lod, false); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest); 226bf215546Sopenharmony_ci cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest); 227bf215546Sopenharmony_ci cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter); 230bf215546Sopenharmony_ci cfg.compare_function = panfrost_sampler_compare_func(cso); 231bf215546Sopenharmony_ci cfg.seamless_cube_map = cso->seamless_cube_map; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci cfg.border_color_r = cso->border_color.ui[0]; 234bf215546Sopenharmony_ci cfg.border_color_g = cso->border_color.ui[1]; 235bf215546Sopenharmony_ci cfg.border_color_b = cso->border_color.ui[2]; 236bf215546Sopenharmony_ci cfg.border_color_a = cso->border_color.ui[3]; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci#if PAN_ARCH >= 6 239bf215546Sopenharmony_ci if (cso->max_anisotropy > 1) { 240bf215546Sopenharmony_ci cfg.maximum_anisotropy = cso->max_anisotropy; 241bf215546Sopenharmony_ci cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC; 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci#else 244bf215546Sopenharmony_ci /* Emulate disabled mipmapping by clamping the LOD as tight as 245bf215546Sopenharmony_ci * possible (from 0 to epsilon = 1/256) */ 246bf215546Sopenharmony_ci if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) 247bf215546Sopenharmony_ci cfg.maximum_lod = cfg.minimum_lod + 1; 248bf215546Sopenharmony_ci#endif 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci return so; 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_cistatic bool 255bf215546Sopenharmony_cipanfrost_fs_required( 256bf215546Sopenharmony_ci struct panfrost_shader_state *fs, 257bf215546Sopenharmony_ci struct panfrost_blend_state *blend, 258bf215546Sopenharmony_ci struct pipe_framebuffer_state *state, 259bf215546Sopenharmony_ci const struct panfrost_zsa_state *zsa) 260bf215546Sopenharmony_ci{ 261bf215546Sopenharmony_ci /* If we generally have side effects. This inclues use of discard, 262bf215546Sopenharmony_ci * which can affect the results of an occlusion query. */ 263bf215546Sopenharmony_ci if (fs->info.fs.sidefx) 264bf215546Sopenharmony_ci return true; 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci /* Using an empty FS requires early-z to be enabled, but alpha test 267bf215546Sopenharmony_ci * needs it disabled. Alpha test is only native on Midgard, so only 268bf215546Sopenharmony_ci * check there. 269bf215546Sopenharmony_ci */ 270bf215546Sopenharmony_ci if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS) 271bf215546Sopenharmony_ci return true; 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci /* If colour is written we need to execute */ 274bf215546Sopenharmony_ci for (unsigned i = 0; i < state->nr_cbufs; ++i) { 275bf215546Sopenharmony_ci if (state->cbufs[i] && !blend->info[i].no_colour) 276bf215546Sopenharmony_ci return true; 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci /* If depth is written and not implied we need to execute. 280bf215546Sopenharmony_ci * TODO: Predicate on Z/S writes being enabled */ 281bf215546Sopenharmony_ci return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil); 282bf215546Sopenharmony_ci} 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci/* Get pointers to the blend shaders bound to each active render target. Used 285bf215546Sopenharmony_ci * to emit the blend descriptors, as well as the fragment renderer state 286bf215546Sopenharmony_ci * descriptor. 287bf215546Sopenharmony_ci */ 288bf215546Sopenharmony_cistatic void 289bf215546Sopenharmony_cipanfrost_get_blend_shaders(struct panfrost_batch *batch, 290bf215546Sopenharmony_ci mali_ptr *blend_shaders) 291bf215546Sopenharmony_ci{ 292bf215546Sopenharmony_ci unsigned shader_offset = 0; 293bf215546Sopenharmony_ci struct panfrost_bo *shader_bo = NULL; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) { 296bf215546Sopenharmony_ci if (batch->key.cbufs[c]) { 297bf215546Sopenharmony_ci blend_shaders[c] = panfrost_get_blend(batch, 298bf215546Sopenharmony_ci c, &shader_bo, &shader_offset); 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci#if PAN_ARCH >= 5 304bf215546Sopenharmony_ciUNUSED static uint16_t 305bf215546Sopenharmony_cipack_blend_constant(enum pipe_format format, float cons) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci const struct util_format_description *format_desc = 308bf215546Sopenharmony_ci util_format_description(format); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci unsigned chan_size = 0; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci for (unsigned i = 0; i < format_desc->nr_channels; i++) 313bf215546Sopenharmony_ci chan_size = MAX2(format_desc->channel[0].size, chan_size); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci uint16_t unorm = (cons * ((1 << chan_size) - 1)); 316bf215546Sopenharmony_ci return unorm << (16 - chan_size); 317bf215546Sopenharmony_ci} 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci/* 320bf215546Sopenharmony_ci * Determine whether to set the respective overdraw alpha flag. 321bf215546Sopenharmony_ci * 322bf215546Sopenharmony_ci * The overdraw alpha=1 flag should be set when alpha=1 implies full overdraw, 323bf215546Sopenharmony_ci * equivalently, all enabled render targets have alpha_one_store set. Likewise, 324bf215546Sopenharmony_ci * overdraw alpha=0 should be set when alpha=0 implies no overdraw, 325bf215546Sopenharmony_ci * equivalently, all enabled render targets have alpha_zero_nop set. 326bf215546Sopenharmony_ci */ 327bf215546Sopenharmony_cistatic bool 328bf215546Sopenharmony_cipanfrost_overdraw_alpha(const struct panfrost_context *ctx, bool zero) 329bf215546Sopenharmony_ci{ 330bf215546Sopenharmony_ci const struct panfrost_blend_state *so = ctx->blend; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) { 333bf215546Sopenharmony_ci const struct pan_blend_info info = so->info[i]; 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci bool enabled = ctx->pipe_framebuffer.cbufs[i] && info.no_colour; 336bf215546Sopenharmony_ci bool flag = zero ? info.alpha_zero_nop : info.alpha_one_store; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci if (enabled && !flag) 339bf215546Sopenharmony_ci return false; 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci return true; 343bf215546Sopenharmony_ci} 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_cistatic void 346bf215546Sopenharmony_cipanfrost_emit_blend(struct panfrost_batch *batch, void *rts, mali_ptr *blend_shaders) 347bf215546Sopenharmony_ci{ 348bf215546Sopenharmony_ci unsigned rt_count = batch->key.nr_cbufs; 349bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 350bf215546Sopenharmony_ci const struct panfrost_blend_state *so = ctx->blend; 351bf215546Sopenharmony_ci bool dithered = so->base.dither; 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci /* Always have at least one render target for depth-only passes */ 354bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) { 355bf215546Sopenharmony_ci struct mali_blend_packed *packed = rts + (i * pan_size(BLEND)); 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci /* Disable blending for unbacked render targets */ 358bf215546Sopenharmony_ci if (rt_count == 0 || !batch->key.cbufs[i] || so->info[i].no_colour) { 359bf215546Sopenharmony_ci pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) { 360bf215546Sopenharmony_ci cfg.enable = false; 361bf215546Sopenharmony_ci#if PAN_ARCH >= 6 362bf215546Sopenharmony_ci cfg.internal.mode = MALI_BLEND_MODE_OFF; 363bf215546Sopenharmony_ci#endif 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci continue; 367bf215546Sopenharmony_ci } 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci struct pan_blend_info info = so->info[i]; 370bf215546Sopenharmony_ci enum pipe_format format = batch->key.cbufs[i]->format; 371bf215546Sopenharmony_ci float cons = pan_blend_get_constant(info.constant_mask, 372bf215546Sopenharmony_ci ctx->blend_color.color); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci /* Word 0: Flags and constant */ 375bf215546Sopenharmony_ci pan_pack(packed, BLEND, cfg) { 376bf215546Sopenharmony_ci cfg.srgb = util_format_is_srgb(format); 377bf215546Sopenharmony_ci cfg.load_destination = info.load_dest; 378bf215546Sopenharmony_ci cfg.round_to_fb_precision = !dithered; 379bf215546Sopenharmony_ci cfg.alpha_to_one = ctx->blend->base.alpha_to_one; 380bf215546Sopenharmony_ci#if PAN_ARCH >= 6 381bf215546Sopenharmony_ci if (!blend_shaders[i]) 382bf215546Sopenharmony_ci cfg.constant = pack_blend_constant(format, cons); 383bf215546Sopenharmony_ci#else 384bf215546Sopenharmony_ci cfg.blend_shader = (blend_shaders[i] != 0); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci if (blend_shaders[i]) 387bf215546Sopenharmony_ci cfg.shader_pc = blend_shaders[i]; 388bf215546Sopenharmony_ci else 389bf215546Sopenharmony_ci cfg.constant = cons; 390bf215546Sopenharmony_ci#endif 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci if (!blend_shaders[i]) { 394bf215546Sopenharmony_ci /* Word 1: Blend Equation */ 395bf215546Sopenharmony_ci STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4); 396bf215546Sopenharmony_ci packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i]; 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci#if PAN_ARCH >= 6 400bf215546Sopenharmony_ci const struct panfrost_device *dev = pan_device(ctx->base.screen); 401bf215546Sopenharmony_ci struct panfrost_shader_state *fs = 402bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci /* Words 2 and 3: Internal blend */ 405bf215546Sopenharmony_ci if (blend_shaders[i]) { 406bf215546Sopenharmony_ci /* The blend shader's address needs to be at 407bf215546Sopenharmony_ci * the same top 32 bit as the fragment shader. 408bf215546Sopenharmony_ci * TODO: Ensure that's always the case. 409bf215546Sopenharmony_ci */ 410bf215546Sopenharmony_ci assert(!fs->bin.bo || 411bf215546Sopenharmony_ci (blend_shaders[i] & (0xffffffffull << 32)) == 412bf215546Sopenharmony_ci (fs->bin.gpu & (0xffffffffull << 32))); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) { 415bf215546Sopenharmony_ci cfg.mode = MALI_BLEND_MODE_SHADER; 416bf215546Sopenharmony_ci cfg.shader.pc = (u32) blend_shaders[i]; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci#if PAN_ARCH <= 7 419bf215546Sopenharmony_ci unsigned ret_offset = fs->info.bifrost.blend[i].return_offset; 420bf215546Sopenharmony_ci assert(!(ret_offset & 0x7)); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci cfg.shader.return_value = ret_offset ? 423bf215546Sopenharmony_ci fs->bin.gpu + ret_offset : 0; 424bf215546Sopenharmony_ci#endif 425bf215546Sopenharmony_ci } 426bf215546Sopenharmony_ci } else { 427bf215546Sopenharmony_ci pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) { 428bf215546Sopenharmony_ci cfg.mode = info.opaque ? 429bf215546Sopenharmony_ci MALI_BLEND_MODE_OPAQUE : 430bf215546Sopenharmony_ci MALI_BLEND_MODE_FIXED_FUNCTION; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci /* If we want the conversion to work properly, 433bf215546Sopenharmony_ci * num_comps must be set to 4 434bf215546Sopenharmony_ci */ 435bf215546Sopenharmony_ci cfg.fixed_function.num_comps = 4; 436bf215546Sopenharmony_ci cfg.fixed_function.conversion.memory_format = 437bf215546Sopenharmony_ci panfrost_format_to_bifrost_blend(dev, format, dithered); 438bf215546Sopenharmony_ci cfg.fixed_function.conversion.register_format = 439bf215546Sopenharmony_ci fs->info.bifrost.blend[i].format; 440bf215546Sopenharmony_ci cfg.fixed_function.rt = i; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci#if PAN_ARCH <= 7 443bf215546Sopenharmony_ci if (!info.opaque) { 444bf215546Sopenharmony_ci cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop; 445bf215546Sopenharmony_ci cfg.fixed_function.alpha_one_store = info.alpha_one_store; 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci#endif 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci#endif 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci#endif 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_cistatic inline bool 456bf215546Sopenharmony_cipan_allow_forward_pixel_to_kill(struct panfrost_context *ctx, struct panfrost_shader_state *fs) 457bf215546Sopenharmony_ci{ 458bf215546Sopenharmony_ci /* Track if any colour buffer is reused across draws, either 459bf215546Sopenharmony_ci * from reading it directly, or from failing to write it 460bf215546Sopenharmony_ci */ 461bf215546Sopenharmony_ci unsigned rt_mask = ctx->fb_rt_mask; 462bf215546Sopenharmony_ci uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0); 463bf215546Sopenharmony_ci bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask); 464bf215546Sopenharmony_ci bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage; 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci return fs->info.fs.can_fpk && 467bf215546Sopenharmony_ci !(rt_mask & ~rt_written) && 468bf215546Sopenharmony_ci !alpha_to_coverage && 469bf215546Sopenharmony_ci !blend_reads_dest; 470bf215546Sopenharmony_ci} 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_cistatic mali_ptr 473bf215546Sopenharmony_cipanfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage) 474bf215546Sopenharmony_ci{ 475bf215546Sopenharmony_ci struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX); 478bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX); 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci return ss->state.gpu; 481bf215546Sopenharmony_ci} 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci#if PAN_ARCH <= 7 484bf215546Sopenharmony_ci/* Construct a partial RSD corresponding to no executed fragment shader, and 485bf215546Sopenharmony_ci * merge with the existing partial RSD. */ 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_cistatic void 488bf215546Sopenharmony_cipan_merge_empty_fs(struct mali_renderer_state_packed *rsd) 489bf215546Sopenharmony_ci{ 490bf215546Sopenharmony_ci struct mali_renderer_state_packed empty_rsd; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci pan_pack(&empty_rsd, RENDERER_STATE, cfg) { 493bf215546Sopenharmony_ci#if PAN_ARCH >= 6 494bf215546Sopenharmony_ci cfg.properties.shader_modifies_coverage = true; 495bf215546Sopenharmony_ci cfg.properties.allow_forward_pixel_to_kill = true; 496bf215546Sopenharmony_ci cfg.properties.allow_forward_pixel_to_be_killed = true; 497bf215546Sopenharmony_ci cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci /* Alpha isn't written so these are vacuous */ 500bf215546Sopenharmony_ci cfg.multisample_misc.overdraw_alpha0 = true; 501bf215546Sopenharmony_ci cfg.multisample_misc.overdraw_alpha1 = true; 502bf215546Sopenharmony_ci#else 503bf215546Sopenharmony_ci cfg.shader.shader = 0x1; 504bf215546Sopenharmony_ci cfg.properties.work_register_count = 1; 505bf215546Sopenharmony_ci cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 506bf215546Sopenharmony_ci cfg.properties.force_early_z = true; 507bf215546Sopenharmony_ci#endif 508bf215546Sopenharmony_ci } 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci pan_merge((*rsd), empty_rsd, RENDERER_STATE); 511bf215546Sopenharmony_ci} 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_cistatic void 514bf215546Sopenharmony_cipanfrost_prepare_fs_state(struct panfrost_context *ctx, 515bf215546Sopenharmony_ci mali_ptr *blend_shaders, 516bf215546Sopenharmony_ci struct mali_renderer_state_packed *rsd) 517bf215546Sopenharmony_ci{ 518bf215546Sopenharmony_ci struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; 519bf215546Sopenharmony_ci const struct panfrost_zsa_state *zsa = ctx->depth_stencil; 520bf215546Sopenharmony_ci struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 521bf215546Sopenharmony_ci struct panfrost_blend_state *so = ctx->blend; 522bf215546Sopenharmony_ci bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage; 523bf215546Sopenharmony_ci bool msaa = rast->multisample; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci bool has_blend_shader = false; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci for (unsigned c = 0; c < rt_count; ++c) 530bf215546Sopenharmony_ci has_blend_shader |= (blend_shaders[c] != 0); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci bool has_oq = ctx->occlusion_query && ctx->active_queries; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci pan_pack(rsd, RENDERER_STATE, cfg) { 535bf215546Sopenharmony_ci if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) { 536bf215546Sopenharmony_ci#if PAN_ARCH >= 6 537bf215546Sopenharmony_ci struct pan_earlyzs_state earlyzs = 538bf215546Sopenharmony_ci pan_earlyzs_get(fs->earlyzs, 539bf215546Sopenharmony_ci ctx->depth_stencil->writes_zs || 540bf215546Sopenharmony_ci has_oq, 541bf215546Sopenharmony_ci ctx->blend->base.alpha_to_coverage, 542bf215546Sopenharmony_ci ctx->depth_stencil->zs_always_passes); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci cfg.properties.pixel_kill_operation = earlyzs.kill; 545bf215546Sopenharmony_ci cfg.properties.zs_update_operation = earlyzs.update; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci cfg.properties.allow_forward_pixel_to_kill = 548bf215546Sopenharmony_ci pan_allow_forward_pixel_to_kill(ctx, fs); 549bf215546Sopenharmony_ci#else 550bf215546Sopenharmony_ci cfg.properties.force_early_z = 551bf215546Sopenharmony_ci fs->info.fs.can_early_z && !alpha_to_coverage && 552bf215546Sopenharmony_ci ((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS); 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci /* TODO: Reduce this limit? */ 555bf215546Sopenharmony_ci if (has_blend_shader) 556bf215546Sopenharmony_ci cfg.properties.work_register_count = MAX2(fs->info.work_reg_count, 8); 557bf215546Sopenharmony_ci else 558bf215546Sopenharmony_ci cfg.properties.work_register_count = fs->info.work_reg_count; 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci /* Hardware quirks around early-zs forcing without a 561bf215546Sopenharmony_ci * depth buffer. Note this breaks occlusion queries. */ 562bf215546Sopenharmony_ci bool force_ez_with_discard = !zsa->enabled && !has_oq; 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci cfg.properties.shader_reads_tilebuffer = 565bf215546Sopenharmony_ci force_ez_with_discard && fs->info.fs.can_discard; 566bf215546Sopenharmony_ci cfg.properties.shader_contains_discard = 567bf215546Sopenharmony_ci !force_ez_with_discard && fs->info.fs.can_discard; 568bf215546Sopenharmony_ci#endif 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci#if PAN_ARCH == 4 572bf215546Sopenharmony_ci if (rt_count > 0) { 573bf215546Sopenharmony_ci cfg.multisample_misc.load_destination = so->info[0].load_dest; 574bf215546Sopenharmony_ci cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0); 575bf215546Sopenharmony_ci cfg.stencil_mask_misc.write_enable = !so->info[0].no_colour; 576bf215546Sopenharmony_ci cfg.stencil_mask_misc.srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format); 577bf215546Sopenharmony_ci cfg.stencil_mask_misc.dither_disable = !so->base.dither; 578bf215546Sopenharmony_ci cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (blend_shaders[0]) { 581bf215546Sopenharmony_ci cfg.blend_shader = blend_shaders[0]; 582bf215546Sopenharmony_ci } else { 583bf215546Sopenharmony_ci cfg.blend_constant = pan_blend_get_constant( 584bf215546Sopenharmony_ci so->info[0].constant_mask, 585bf215546Sopenharmony_ci ctx->blend_color.color); 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci } else { 588bf215546Sopenharmony_ci /* If there is no colour buffer, leaving fields default is 589bf215546Sopenharmony_ci * fine, except for blending which is nonnullable */ 590bf215546Sopenharmony_ci cfg.blend_equation.color_mask = 0xf; 591bf215546Sopenharmony_ci cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 592bf215546Sopenharmony_ci cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 593bf215546Sopenharmony_ci cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 594bf215546Sopenharmony_ci cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 595bf215546Sopenharmony_ci cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 596bf215546Sopenharmony_ci cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 597bf215546Sopenharmony_ci } 598bf215546Sopenharmony_ci#elif PAN_ARCH == 5 599bf215546Sopenharmony_ci /* Workaround */ 600bf215546Sopenharmony_ci cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count); 601bf215546Sopenharmony_ci#endif 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF; 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci cfg.multisample_misc.evaluate_per_sample = 606bf215546Sopenharmony_ci msaa && (ctx->min_samples > 1); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci#if PAN_ARCH >= 6 609bf215546Sopenharmony_ci /* MSAA blend shaders need to pass their sample ID to 610bf215546Sopenharmony_ci * LD_TILE/ST_TILE, so we must preload it. Additionally, we 611bf215546Sopenharmony_ci * need per-sample shading for the blend shader, accomplished 612bf215546Sopenharmony_ci * by forcing per-sample shading for the whole program. */ 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci if (msaa && has_blend_shader) { 615bf215546Sopenharmony_ci cfg.multisample_misc.evaluate_per_sample = true; 616bf215546Sopenharmony_ci cfg.preload.fragment.sample_mask_id = true; 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci /* Flip gl_PointCoord (and point sprites) depending on API 620bf215546Sopenharmony_ci * setting on framebuffer orientation. We do not use 621bf215546Sopenharmony_ci * lower_wpos_pntc on Bifrost. 622bf215546Sopenharmony_ci */ 623bf215546Sopenharmony_ci cfg.properties.point_sprite_coord_origin_max_y = 624bf215546Sopenharmony_ci (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci cfg.multisample_misc.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0); 627bf215546Sopenharmony_ci cfg.multisample_misc.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1); 628bf215546Sopenharmony_ci#endif 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage; 631bf215546Sopenharmony_ci cfg.depth_units = rast->offset_units * 2.0f; 632bf215546Sopenharmony_ci cfg.depth_factor = rast->offset_scale; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci bool back_enab = zsa->base.stencil[1].enabled; 635bf215546Sopenharmony_ci cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0]; 636bf215546Sopenharmony_ci cfg.stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0]; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci#if PAN_ARCH <= 5 639bf215546Sopenharmony_ci /* v6+ fits register preload here, no alpha testing */ 640bf215546Sopenharmony_ci cfg.alpha_reference = zsa->base.alpha_ref_value; 641bf215546Sopenharmony_ci#endif 642bf215546Sopenharmony_ci } 643bf215546Sopenharmony_ci} 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_cistatic void 646bf215546Sopenharmony_cipanfrost_emit_frag_shader(struct panfrost_context *ctx, 647bf215546Sopenharmony_ci struct mali_renderer_state_packed *fragmeta, 648bf215546Sopenharmony_ci mali_ptr *blend_shaders) 649bf215546Sopenharmony_ci{ 650bf215546Sopenharmony_ci const struct panfrost_zsa_state *zsa = ctx->depth_stencil; 651bf215546Sopenharmony_ci const struct panfrost_rasterizer *rast = ctx->rasterizer; 652bf215546Sopenharmony_ci struct panfrost_shader_state *fs = 653bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci /* We need to merge several several partial renderer state descriptors, 656bf215546Sopenharmony_ci * so stage to temporary storage rather than reading back write-combine 657bf215546Sopenharmony_ci * memory, which will trash performance. */ 658bf215546Sopenharmony_ci struct mali_renderer_state_packed rsd; 659bf215546Sopenharmony_ci panfrost_prepare_fs_state(ctx, blend_shaders, &rsd); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci#if PAN_ARCH == 4 662bf215546Sopenharmony_ci if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) { 663bf215546Sopenharmony_ci /* Word 14: SFBD Blend Equation */ 664bf215546Sopenharmony_ci STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4); 665bf215546Sopenharmony_ci rsd.opaque[14] = ctx->blend->equation[0]; 666bf215546Sopenharmony_ci } 667bf215546Sopenharmony_ci#endif 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci /* Merge with CSO state and upload */ 670bf215546Sopenharmony_ci if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) { 671bf215546Sopenharmony_ci struct mali_renderer_state_packed *partial_rsd = 672bf215546Sopenharmony_ci (struct mali_renderer_state_packed *)&fs->partial_rsd; 673bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd)); 674bf215546Sopenharmony_ci pan_merge(rsd, *partial_rsd, RENDERER_STATE); 675bf215546Sopenharmony_ci } else { 676bf215546Sopenharmony_ci pan_merge_empty_fs(&rsd); 677bf215546Sopenharmony_ci } 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci /* Word 8, 9 Misc state */ 680bf215546Sopenharmony_ci rsd.opaque[8] |= zsa->rsd_depth.opaque[0] 681bf215546Sopenharmony_ci | rast->multisample.opaque[0]; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci rsd.opaque[9] |= zsa->rsd_stencil.opaque[0] 684bf215546Sopenharmony_ci | rast->stencil_misc.opaque[0]; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci /* Word 10, 11 Stencil Front and Back */ 687bf215546Sopenharmony_ci rsd.opaque[10] |= zsa->stencil_front.opaque[0]; 688bf215546Sopenharmony_ci rsd.opaque[11] |= zsa->stencil_back.opaque[0]; 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci memcpy(fragmeta, &rsd, sizeof(rsd)); 691bf215546Sopenharmony_ci} 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_cistatic mali_ptr 694bf215546Sopenharmony_cipanfrost_emit_frag_shader_meta(struct panfrost_batch *batch) 695bf215546Sopenharmony_ci{ 696bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 697bf215546Sopenharmony_ci struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT); 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci struct panfrost_ptr xfer; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci#if PAN_ARCH == 4 704bf215546Sopenharmony_ci xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE); 705bf215546Sopenharmony_ci#else 706bf215546Sopenharmony_ci unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci xfer = pan_pool_alloc_desc_aggregate(&batch->pool.base, 709bf215546Sopenharmony_ci PAN_DESC(RENDERER_STATE), 710bf215546Sopenharmony_ci PAN_DESC_ARRAY(rt_count, BLEND)); 711bf215546Sopenharmony_ci#endif 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 }; 714bf215546Sopenharmony_ci panfrost_get_blend_shaders(batch, blend_shaders); 715bf215546Sopenharmony_ci 716bf215546Sopenharmony_ci panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend_shaders); 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci#if PAN_ARCH >= 5 719bf215546Sopenharmony_ci panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE), blend_shaders); 720bf215546Sopenharmony_ci#endif 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci return xfer.gpu; 723bf215546Sopenharmony_ci} 724bf215546Sopenharmony_ci#endif 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_cistatic mali_ptr 727bf215546Sopenharmony_cipanfrost_emit_viewport(struct panfrost_batch *batch) 728bf215546Sopenharmony_ci{ 729bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 730bf215546Sopenharmony_ci const struct pipe_viewport_state *vp = &ctx->pipe_viewport; 731bf215546Sopenharmony_ci const struct pipe_scissor_state *ss = &ctx->scissor; 732bf215546Sopenharmony_ci const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci /* Derive min/max from translate/scale. Note since |x| >= 0 by 735bf215546Sopenharmony_ci * definition, we have that -|x| <= |x| hence translate - |scale| <= 736bf215546Sopenharmony_ci * translate + |scale|, so the ordering is correct here. */ 737bf215546Sopenharmony_ci float vp_minx = vp->translate[0] - fabsf(vp->scale[0]); 738bf215546Sopenharmony_ci float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]); 739bf215546Sopenharmony_ci float vp_miny = vp->translate[1] - fabsf(vp->scale[1]); 740bf215546Sopenharmony_ci float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]); 741bf215546Sopenharmony_ci float minz = (vp->translate[2] - fabsf(vp->scale[2])); 742bf215546Sopenharmony_ci float maxz = (vp->translate[2] + fabsf(vp->scale[2])); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci /* Scissor to the intersection of viewport and to the scissor, clamped 745bf215546Sopenharmony_ci * to the framebuffer */ 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci unsigned minx = MIN2(batch->key.width, MAX2((int) vp_minx, 0)); 748bf215546Sopenharmony_ci unsigned maxx = MIN2(batch->key.width, MAX2((int) vp_maxx, 0)); 749bf215546Sopenharmony_ci unsigned miny = MIN2(batch->key.height, MAX2((int) vp_miny, 0)); 750bf215546Sopenharmony_ci unsigned maxy = MIN2(batch->key.height, MAX2((int) vp_maxy, 0)); 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci if (ss && rast->scissor) { 753bf215546Sopenharmony_ci minx = MAX2(ss->minx, minx); 754bf215546Sopenharmony_ci miny = MAX2(ss->miny, miny); 755bf215546Sopenharmony_ci maxx = MIN2(ss->maxx, maxx); 756bf215546Sopenharmony_ci maxy = MIN2(ss->maxy, maxy); 757bf215546Sopenharmony_ci } 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci /* Set the range to [1, 1) so max values don't wrap round */ 760bf215546Sopenharmony_ci if (maxx == 0 || maxy == 0) 761bf215546Sopenharmony_ci maxx = maxy = minx = miny = 1; 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy); 764bf215546Sopenharmony_ci batch->scissor_culls_everything = (minx >= maxx || miny >= maxy); 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci /* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */ 767bf215546Sopenharmony_ci maxx--; 768bf215546Sopenharmony_ci maxy--; 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY; 771bf215546Sopenharmony_ci batch->maximum_z = rast->depth_clip_far ? maxz : +INFINITY; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci#if PAN_ARCH <= 7 774bf215546Sopenharmony_ci struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT); 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci pan_pack(T.cpu, VIEWPORT, cfg) { 777bf215546Sopenharmony_ci cfg.scissor_minimum_x = minx; 778bf215546Sopenharmony_ci cfg.scissor_minimum_y = miny; 779bf215546Sopenharmony_ci cfg.scissor_maximum_x = maxx; 780bf215546Sopenharmony_ci cfg.scissor_maximum_y = maxy; 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci cfg.minimum_z = batch->minimum_z; 783bf215546Sopenharmony_ci cfg.maximum_z = batch->maximum_z; 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci return T.gpu; 787bf215546Sopenharmony_ci#else 788bf215546Sopenharmony_ci pan_pack(&batch->scissor, SCISSOR, cfg) { 789bf215546Sopenharmony_ci cfg.scissor_minimum_x = minx; 790bf215546Sopenharmony_ci cfg.scissor_minimum_y = miny; 791bf215546Sopenharmony_ci cfg.scissor_maximum_x = maxx; 792bf215546Sopenharmony_ci cfg.scissor_maximum_y = maxy; 793bf215546Sopenharmony_ci } 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci return 0; 796bf215546Sopenharmony_ci#endif 797bf215546Sopenharmony_ci} 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci#if PAN_ARCH >= 9 800bf215546Sopenharmony_ci/** 801bf215546Sopenharmony_ci * Emit a Valhall depth/stencil descriptor at draw-time. The bulk of the 802bf215546Sopenharmony_ci * descriptor corresponds to a pipe_depth_stencil_alpha CSO and is packed at 803bf215546Sopenharmony_ci * CSO create time. However, the stencil reference values and shader 804bf215546Sopenharmony_ci * interactions are dynamic state. Pack only the dynamic state here and OR 805bf215546Sopenharmony_ci * together. 806bf215546Sopenharmony_ci */ 807bf215546Sopenharmony_cistatic mali_ptr 808bf215546Sopenharmony_cipanfrost_emit_depth_stencil(struct panfrost_batch *batch) 809bf215546Sopenharmony_ci{ 810bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 811bf215546Sopenharmony_ci const struct panfrost_zsa_state *zsa = ctx->depth_stencil; 812bf215546Sopenharmony_ci struct panfrost_rasterizer *rast = ctx->rasterizer; 813bf215546Sopenharmony_ci struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 814bf215546Sopenharmony_ci bool back_enab = zsa->base.stencil[1].enabled; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL); 817bf215546Sopenharmony_ci struct mali_depth_stencil_packed dynamic; 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci pan_pack(&dynamic, DEPTH_STENCIL, cfg) { 820bf215546Sopenharmony_ci cfg.front_reference_value = ctx->stencil_ref.ref_value[0]; 821bf215546Sopenharmony_ci cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0]; 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci cfg.stencil_from_shader = fs->info.fs.writes_stencil; 824bf215546Sopenharmony_ci cfg.depth_source = pan_depth_source(&fs->info); 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci cfg.depth_bias_enable = rast->base.offset_tri; 827bf215546Sopenharmony_ci cfg.depth_units = rast->base.offset_units * 2.0f; 828bf215546Sopenharmony_ci cfg.depth_factor = rast->base.offset_scale; 829bf215546Sopenharmony_ci cfg.depth_bias_clamp = rast->base.offset_clamp; 830bf215546Sopenharmony_ci } 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci pan_merge(dynamic, zsa->desc, DEPTH_STENCIL); 833bf215546Sopenharmony_ci memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL)); 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci return T.gpu; 836bf215546Sopenharmony_ci} 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci/** 839bf215546Sopenharmony_ci * Emit Valhall blend descriptor at draw-time. The descriptor itself is shared 840bf215546Sopenharmony_ci * with Bifrost, but the container data structure is simplified. 841bf215546Sopenharmony_ci */ 842bf215546Sopenharmony_cistatic mali_ptr 843bf215546Sopenharmony_cipanfrost_emit_blend_valhall(struct panfrost_batch *batch) 844bf215546Sopenharmony_ci{ 845bf215546Sopenharmony_ci unsigned rt_count = MAX2(batch->key.nr_cbufs, 1); 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND); 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 }; 850bf215546Sopenharmony_ci panfrost_get_blend_shaders(batch, blend_shaders); 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci panfrost_emit_blend(batch, T.cpu, blend_shaders); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci /* Precalculate for the per-draw path */ 855bf215546Sopenharmony_ci bool has_blend_shader = false; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci for (unsigned i = 0; i < rt_count; ++i) 858bf215546Sopenharmony_ci has_blend_shader |= !!blend_shaders[i]; 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci batch->ctx->valhall_has_blend_shader = has_blend_shader; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci return T.gpu; 863bf215546Sopenharmony_ci} 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci/** 866bf215546Sopenharmony_ci * Emit Valhall buffer descriptors for bound vertex buffers at draw-time. 867bf215546Sopenharmony_ci */ 868bf215546Sopenharmony_cistatic mali_ptr 869bf215546Sopenharmony_cipanfrost_emit_vertex_buffers(struct panfrost_batch *batch) 870bf215546Sopenharmony_ci{ 871bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 872bf215546Sopenharmony_ci unsigned buffer_count = util_last_bit(ctx->vb_mask); 873bf215546Sopenharmony_ci struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, 874bf215546Sopenharmony_ci buffer_count, BUFFER); 875bf215546Sopenharmony_ci struct mali_buffer_packed *buffers = T.cpu; 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci u_foreach_bit(i, ctx->vb_mask) { 878bf215546Sopenharmony_ci struct pipe_vertex_buffer vb = ctx->vertex_buffers[i]; 879bf215546Sopenharmony_ci struct pipe_resource *prsrc = vb.buffer.resource; 880bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(prsrc); 881bf215546Sopenharmony_ci assert(!vb.is_user_buffer); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci pan_pack(buffers + i, BUFFER, cfg) { 886bf215546Sopenharmony_ci cfg.address = rsrc->image.data.bo->ptr.gpu + 887bf215546Sopenharmony_ci vb.buffer_offset; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci cfg.size = prsrc->width0 - vb.buffer_offset; 890bf215546Sopenharmony_ci } 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci return T.gpu; 894bf215546Sopenharmony_ci} 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci/** 897bf215546Sopenharmony_ci * Emit Valhall attribute descriptors and associated (vertex) buffer 898bf215546Sopenharmony_ci * descriptors at draw-time. The attribute descriptors are packed at draw time 899bf215546Sopenharmony_ci * except for the stride field. The buffer descriptors are packed here, though 900bf215546Sopenharmony_ci * that could be moved into panfrost_set_vertex_buffers if needed. 901bf215546Sopenharmony_ci */ 902bf215546Sopenharmony_cistatic mali_ptr 903bf215546Sopenharmony_cipanfrost_emit_vertex_data(struct panfrost_batch *batch) 904bf215546Sopenharmony_ci{ 905bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 906bf215546Sopenharmony_ci struct panfrost_vertex_state *vtx = ctx->vertex; 907bf215546Sopenharmony_ci struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, 908bf215546Sopenharmony_ci vtx->num_elements, 909bf215546Sopenharmony_ci ATTRIBUTE); 910bf215546Sopenharmony_ci struct mali_attribute_packed *attributes = T.cpu; 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci for (unsigned i = 0; i < vtx->num_elements; ++i) { 913bf215546Sopenharmony_ci struct mali_attribute_packed packed; 914bf215546Sopenharmony_ci unsigned vbi = vtx->pipe[i].vertex_buffer_index; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci pan_pack(&packed, ATTRIBUTE, cfg) { 917bf215546Sopenharmony_ci cfg.stride = ctx->vertex_buffers[vbi].stride; 918bf215546Sopenharmony_ci } 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci pan_merge(packed, vtx->attributes[i], ATTRIBUTE); 921bf215546Sopenharmony_ci attributes[i] = packed; 922bf215546Sopenharmony_ci } 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci return T.gpu; 925bf215546Sopenharmony_ci} 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci/* 928bf215546Sopenharmony_ci * Emit Valhall descriptors for shader images. Unlike previous generations, 929bf215546Sopenharmony_ci * Valhall does not have a special descriptor for images. Standard texture 930bf215546Sopenharmony_ci * descriptors are used. The binding is different in Gallium, however, so we 931bf215546Sopenharmony_ci * translate. 932bf215546Sopenharmony_ci */ 933bf215546Sopenharmony_cistatic struct pipe_sampler_view 934bf215546Sopenharmony_cipanfrost_pipe_image_to_sampler_view(struct pipe_image_view *v) 935bf215546Sopenharmony_ci{ 936bf215546Sopenharmony_ci struct pipe_sampler_view out = { 937bf215546Sopenharmony_ci .format = v->format, 938bf215546Sopenharmony_ci .texture = v->resource, 939bf215546Sopenharmony_ci .target = v->resource->target, 940bf215546Sopenharmony_ci .swizzle_r = PIPE_SWIZZLE_X, 941bf215546Sopenharmony_ci .swizzle_g = PIPE_SWIZZLE_Y, 942bf215546Sopenharmony_ci .swizzle_b = PIPE_SWIZZLE_Z, 943bf215546Sopenharmony_ci .swizzle_a = PIPE_SWIZZLE_W 944bf215546Sopenharmony_ci }; 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci if (out.target == PIPE_BUFFER) { 947bf215546Sopenharmony_ci out.u.buf.offset = v->u.buf.offset; 948bf215546Sopenharmony_ci out.u.buf.size = v->u.buf.size; 949bf215546Sopenharmony_ci } else { 950bf215546Sopenharmony_ci out.u.tex.first_layer = v->u.tex.first_layer; 951bf215546Sopenharmony_ci out.u.tex.last_layer = v->u.tex.last_layer; 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci /* Single level only */ 954bf215546Sopenharmony_ci out.u.tex.first_level = v->u.tex.level; 955bf215546Sopenharmony_ci out.u.tex.last_level = v->u.tex.level; 956bf215546Sopenharmony_ci } 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci return out; 959bf215546Sopenharmony_ci} 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_cistatic void 962bf215546Sopenharmony_cipanfrost_update_sampler_view(struct panfrost_sampler_view *view, 963bf215546Sopenharmony_ci struct pipe_context *pctx); 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_cistatic mali_ptr 966bf215546Sopenharmony_cipanfrost_emit_images(struct panfrost_batch *batch, enum pipe_shader_type stage) 967bf215546Sopenharmony_ci{ 968bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 969bf215546Sopenharmony_ci unsigned last_bit = util_last_bit(ctx->image_mask[stage]); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci struct panfrost_ptr T = 972bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE); 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci struct mali_texture_packed *out = (struct mali_texture_packed *) T.cpu; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci for (int i = 0; i < last_bit; ++i) { 977bf215546Sopenharmony_ci struct pipe_image_view *image = &ctx->images[stage][i]; 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) { 980bf215546Sopenharmony_ci memset(&out[i], 0, sizeof(out[i])); 981bf215546Sopenharmony_ci continue; 982bf215546Sopenharmony_ci } 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci /* Construct a synthetic sampler view so we can use our usual 985bf215546Sopenharmony_ci * sampler view code for the actual descriptor packing. 986bf215546Sopenharmony_ci * 987bf215546Sopenharmony_ci * Use the batch pool for a transient allocation, rather than 988bf215546Sopenharmony_ci * allocating a long-lived descriptor. 989bf215546Sopenharmony_ci */ 990bf215546Sopenharmony_ci struct panfrost_sampler_view view = { 991bf215546Sopenharmony_ci .base = panfrost_pipe_image_to_sampler_view(image), 992bf215546Sopenharmony_ci .pool = &batch->pool 993bf215546Sopenharmony_ci }; 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci /* If we specify a cube map, the hardware internally treat it as 996bf215546Sopenharmony_ci * a 2D array. Since cube maps as images can confuse our common 997bf215546Sopenharmony_ci * texturing code, explicitly use a 2D array. 998bf215546Sopenharmony_ci * 999bf215546Sopenharmony_ci * Similar concerns apply to 3D textures. 1000bf215546Sopenharmony_ci */ 1001bf215546Sopenharmony_ci if (view.base.target == PIPE_BUFFER) 1002bf215546Sopenharmony_ci view.base.target = PIPE_BUFFER; 1003bf215546Sopenharmony_ci else 1004bf215546Sopenharmony_ci view.base.target = PIPE_TEXTURE_2D_ARRAY; 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci panfrost_update_sampler_view(&view, &ctx->base); 1007bf215546Sopenharmony_ci out[i] = view.bifrost_descriptor; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci panfrost_track_image_access(batch, stage, image); 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci return T.gpu; 1013bf215546Sopenharmony_ci} 1014bf215546Sopenharmony_ci#endif 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_cistatic mali_ptr 1017bf215546Sopenharmony_cipanfrost_map_constant_buffer_gpu(struct panfrost_batch *batch, 1018bf215546Sopenharmony_ci enum pipe_shader_type st, 1019bf215546Sopenharmony_ci struct panfrost_constant_buffer *buf, 1020bf215546Sopenharmony_ci unsigned index) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci struct pipe_constant_buffer *cb = &buf->cb[index]; 1023bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(cb->buffer); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci if (rsrc) { 1026bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, st); 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci /* Alignment gauranteed by 1029bf215546Sopenharmony_ci * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */ 1030bf215546Sopenharmony_ci return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset; 1031bf215546Sopenharmony_ci } else if (cb->user_buffer) { 1032bf215546Sopenharmony_ci return pan_pool_upload_aligned(&batch->pool.base, 1033bf215546Sopenharmony_ci cb->user_buffer + 1034bf215546Sopenharmony_ci cb->buffer_offset, 1035bf215546Sopenharmony_ci cb->buffer_size, 16); 1036bf215546Sopenharmony_ci } else { 1037bf215546Sopenharmony_ci unreachable("No constant buffer"); 1038bf215546Sopenharmony_ci } 1039bf215546Sopenharmony_ci} 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_cistruct sysval_uniform { 1042bf215546Sopenharmony_ci union { 1043bf215546Sopenharmony_ci float f[4]; 1044bf215546Sopenharmony_ci int32_t i[4]; 1045bf215546Sopenharmony_ci uint32_t u[4]; 1046bf215546Sopenharmony_ci uint64_t du[2]; 1047bf215546Sopenharmony_ci }; 1048bf215546Sopenharmony_ci}; 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_cistatic void 1051bf215546Sopenharmony_cipanfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch, 1052bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1053bf215546Sopenharmony_ci{ 1054bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1055bf215546Sopenharmony_ci const struct pipe_viewport_state *vp = &ctx->pipe_viewport; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci uniform->f[0] = vp->scale[0]; 1058bf215546Sopenharmony_ci uniform->f[1] = vp->scale[1]; 1059bf215546Sopenharmony_ci uniform->f[2] = vp->scale[2]; 1060bf215546Sopenharmony_ci} 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_cistatic void 1063bf215546Sopenharmony_cipanfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch, 1064bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1067bf215546Sopenharmony_ci const struct pipe_viewport_state *vp = &ctx->pipe_viewport; 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci uniform->f[0] = vp->translate[0]; 1070bf215546Sopenharmony_ci uniform->f[1] = vp->translate[1]; 1071bf215546Sopenharmony_ci uniform->f[2] = vp->translate[2]; 1072bf215546Sopenharmony_ci} 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_cistatic void panfrost_upload_txs_sysval(struct panfrost_batch *batch, 1075bf215546Sopenharmony_ci enum pipe_shader_type st, 1076bf215546Sopenharmony_ci unsigned int sysvalid, 1077bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1078bf215546Sopenharmony_ci{ 1079bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1080bf215546Sopenharmony_ci unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid); 1081bf215546Sopenharmony_ci unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid); 1082bf215546Sopenharmony_ci bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid); 1083bf215546Sopenharmony_ci struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base; 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci assert(dim); 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_ci if (tex->target == PIPE_BUFFER) { 1088bf215546Sopenharmony_ci assert(dim == 1); 1089bf215546Sopenharmony_ci uniform->i[0] = 1090bf215546Sopenharmony_ci tex->u.buf.size / util_format_get_blocksize(tex->format); 1091bf215546Sopenharmony_ci return; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level); 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci if (dim > 1) 1097bf215546Sopenharmony_ci uniform->i[1] = u_minify(tex->texture->height0, 1098bf215546Sopenharmony_ci tex->u.tex.first_level); 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci if (dim > 2) 1101bf215546Sopenharmony_ci uniform->i[2] = u_minify(tex->texture->depth0, 1102bf215546Sopenharmony_ci tex->u.tex.first_level); 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci if (is_array) { 1105bf215546Sopenharmony_ci unsigned size = tex->texture->array_size; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci /* Internally, we store the number of 2D images (faces * array 1108bf215546Sopenharmony_ci * size). Externally, we report the array size in terms of 1109bf215546Sopenharmony_ci * complete cubes. So divide by the # of faces per cube. 1110bf215546Sopenharmony_ci */ 1111bf215546Sopenharmony_ci if (tex->target == PIPE_TEXTURE_CUBE_ARRAY) 1112bf215546Sopenharmony_ci size /= 6; 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci uniform->i[dim] = size; 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci} 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_cistatic void panfrost_upload_image_size_sysval(struct panfrost_batch *batch, 1119bf215546Sopenharmony_ci enum pipe_shader_type st, 1120bf215546Sopenharmony_ci unsigned int sysvalid, 1121bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1122bf215546Sopenharmony_ci{ 1123bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1124bf215546Sopenharmony_ci unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid); 1125bf215546Sopenharmony_ci unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid); 1126bf215546Sopenharmony_ci unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid); 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci assert(dim && dim < 4); 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci struct pipe_image_view *image = &ctx->images[st][idx]; 1131bf215546Sopenharmony_ci 1132bf215546Sopenharmony_ci if (image->resource->target == PIPE_BUFFER) { 1133bf215546Sopenharmony_ci unsigned blocksize = util_format_get_blocksize(image->format); 1134bf215546Sopenharmony_ci uniform->i[0] = image->resource->width0 / blocksize; 1135bf215546Sopenharmony_ci return; 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci uniform->i[0] = u_minify(image->resource->width0, 1139bf215546Sopenharmony_ci image->u.tex.level); 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci if (dim > 1) 1142bf215546Sopenharmony_ci uniform->i[1] = u_minify(image->resource->height0, 1143bf215546Sopenharmony_ci image->u.tex.level); 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci if (dim > 2) 1146bf215546Sopenharmony_ci uniform->i[2] = u_minify(image->resource->depth0, 1147bf215546Sopenharmony_ci image->u.tex.level); 1148bf215546Sopenharmony_ci 1149bf215546Sopenharmony_ci if (is_array) 1150bf215546Sopenharmony_ci uniform->i[dim] = image->resource->array_size; 1151bf215546Sopenharmony_ci} 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_cistatic void 1154bf215546Sopenharmony_cipanfrost_upload_ssbo_sysval(struct panfrost_batch *batch, 1155bf215546Sopenharmony_ci enum pipe_shader_type st, 1156bf215546Sopenharmony_ci unsigned ssbo_id, 1157bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1158bf215546Sopenharmony_ci{ 1159bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci assert(ctx->ssbo_mask[st] & (1 << ssbo_id)); 1162bf215546Sopenharmony_ci struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id]; 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci /* Compute address */ 1165bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(sb.buffer); 1166bf215546Sopenharmony_ci struct panfrost_bo *bo = rsrc->image.data.bo; 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci panfrost_batch_write_rsrc(batch, rsrc, st); 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 1171bf215546Sopenharmony_ci sb.buffer_offset, sb.buffer_size); 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci /* Upload address and size as sysval */ 1174bf215546Sopenharmony_ci uniform->du[0] = bo->ptr.gpu + sb.buffer_offset; 1175bf215546Sopenharmony_ci uniform->u[2] = sb.buffer_size; 1176bf215546Sopenharmony_ci} 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_cistatic void 1179bf215546Sopenharmony_cipanfrost_upload_sampler_sysval(struct panfrost_batch *batch, 1180bf215546Sopenharmony_ci enum pipe_shader_type st, 1181bf215546Sopenharmony_ci unsigned samp_idx, 1182bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1183bf215546Sopenharmony_ci{ 1184bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1185bf215546Sopenharmony_ci struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base; 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci uniform->f[0] = sampl->min_lod; 1188bf215546Sopenharmony_ci uniform->f[1] = sampl->max_lod; 1189bf215546Sopenharmony_ci uniform->f[2] = sampl->lod_bias; 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci /* Even without any errata, Midgard represents "no mipmapping" as 1192bf215546Sopenharmony_ci * fixing the LOD with the clamps; keep behaviour consistent. c.f. 1193bf215546Sopenharmony_ci * panfrost_create_sampler_state which also explains our choice of 1194bf215546Sopenharmony_ci * epsilon value (again to keep behaviour consistent) */ 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) 1197bf215546Sopenharmony_ci uniform->f[1] = uniform->f[0] + (1.0/256.0); 1198bf215546Sopenharmony_ci} 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_cistatic void 1201bf215546Sopenharmony_cipanfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch, 1202bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1203bf215546Sopenharmony_ci{ 1204bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci uniform->u[0] = ctx->compute_grid->grid[0]; 1207bf215546Sopenharmony_ci uniform->u[1] = ctx->compute_grid->grid[1]; 1208bf215546Sopenharmony_ci uniform->u[2] = ctx->compute_grid->grid[2]; 1209bf215546Sopenharmony_ci} 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_cistatic void 1212bf215546Sopenharmony_cipanfrost_upload_local_group_size_sysval(struct panfrost_batch *batch, 1213bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1214bf215546Sopenharmony_ci{ 1215bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1216bf215546Sopenharmony_ci 1217bf215546Sopenharmony_ci uniform->u[0] = ctx->compute_grid->block[0]; 1218bf215546Sopenharmony_ci uniform->u[1] = ctx->compute_grid->block[1]; 1219bf215546Sopenharmony_ci uniform->u[2] = ctx->compute_grid->block[2]; 1220bf215546Sopenharmony_ci} 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_cistatic void 1223bf215546Sopenharmony_cipanfrost_upload_work_dim_sysval(struct panfrost_batch *batch, 1224bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1225bf215546Sopenharmony_ci{ 1226bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci uniform->u[0] = ctx->compute_grid->work_dim; 1229bf215546Sopenharmony_ci} 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci/* Sample positions are pushed in a Bifrost specific format on Bifrost. On 1232bf215546Sopenharmony_ci * Midgard, we emulate the Bifrost path with some extra arithmetic in the 1233bf215546Sopenharmony_ci * shader, to keep the code as unified as possible. */ 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_cistatic void 1236bf215546Sopenharmony_cipanfrost_upload_sample_positions_sysval(struct panfrost_batch *batch, 1237bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1238bf215546Sopenharmony_ci{ 1239bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1240bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(ctx->base.screen); 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci unsigned samples = util_framebuffer_get_num_samples(&batch->key); 1243bf215546Sopenharmony_ci uniform->du[0] = panfrost_sample_positions(dev, panfrost_sample_pattern(samples)); 1244bf215546Sopenharmony_ci} 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_cistatic void 1247bf215546Sopenharmony_cipanfrost_upload_multisampled_sysval(struct panfrost_batch *batch, 1248bf215546Sopenharmony_ci struct sysval_uniform *uniform) 1249bf215546Sopenharmony_ci{ 1250bf215546Sopenharmony_ci unsigned samples = util_framebuffer_get_num_samples(&batch->key); 1251bf215546Sopenharmony_ci uniform->u[0] = samples > 1; 1252bf215546Sopenharmony_ci} 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci#if PAN_ARCH >= 6 1255bf215546Sopenharmony_cistatic void 1256bf215546Sopenharmony_cipanfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, 1257bf215546Sopenharmony_ci unsigned size_and_rt, struct sysval_uniform *uniform) 1258bf215546Sopenharmony_ci{ 1259bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1260bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(ctx->base.screen); 1261bf215546Sopenharmony_ci unsigned rt = size_and_rt & 0xF; 1262bf215546Sopenharmony_ci unsigned size = size_and_rt >> 4; 1263bf215546Sopenharmony_ci 1264bf215546Sopenharmony_ci if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) { 1265bf215546Sopenharmony_ci enum pipe_format format = batch->key.cbufs[rt]->format; 1266bf215546Sopenharmony_ci uniform->u[0] = 1267bf215546Sopenharmony_ci GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32; 1268bf215546Sopenharmony_ci } else { 1269bf215546Sopenharmony_ci pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg) 1270bf215546Sopenharmony_ci cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw; 1271bf215546Sopenharmony_ci } 1272bf215546Sopenharmony_ci} 1273bf215546Sopenharmony_ci#endif 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_cistatic unsigned 1276bf215546Sopenharmony_cipanfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target) 1277bf215546Sopenharmony_ci{ 1278bf215546Sopenharmony_ci return target->buffer_offset + (pan_so_target(target)->offset * stride); 1279bf215546Sopenharmony_ci} 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_cistatic void 1282bf215546Sopenharmony_cipanfrost_upload_sysvals(struct panfrost_batch *batch, 1283bf215546Sopenharmony_ci const struct panfrost_ptr *ptr, 1284bf215546Sopenharmony_ci struct panfrost_shader_state *ss, 1285bf215546Sopenharmony_ci enum pipe_shader_type st) 1286bf215546Sopenharmony_ci{ 1287bf215546Sopenharmony_ci struct sysval_uniform *uniforms = ptr->cpu; 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { 1290bf215546Sopenharmony_ci int sysval = ss->info.sysvals.sysvals[i]; 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_ci switch (PAN_SYSVAL_TYPE(sysval)) { 1293bf215546Sopenharmony_ci case PAN_SYSVAL_VIEWPORT_SCALE: 1294bf215546Sopenharmony_ci panfrost_upload_viewport_scale_sysval(batch, 1295bf215546Sopenharmony_ci &uniforms[i]); 1296bf215546Sopenharmony_ci break; 1297bf215546Sopenharmony_ci case PAN_SYSVAL_VIEWPORT_OFFSET: 1298bf215546Sopenharmony_ci panfrost_upload_viewport_offset_sysval(batch, 1299bf215546Sopenharmony_ci &uniforms[i]); 1300bf215546Sopenharmony_ci break; 1301bf215546Sopenharmony_ci case PAN_SYSVAL_TEXTURE_SIZE: 1302bf215546Sopenharmony_ci panfrost_upload_txs_sysval(batch, st, 1303bf215546Sopenharmony_ci PAN_SYSVAL_ID(sysval), 1304bf215546Sopenharmony_ci &uniforms[i]); 1305bf215546Sopenharmony_ci break; 1306bf215546Sopenharmony_ci case PAN_SYSVAL_SSBO: 1307bf215546Sopenharmony_ci panfrost_upload_ssbo_sysval(batch, st, 1308bf215546Sopenharmony_ci PAN_SYSVAL_ID(sysval), 1309bf215546Sopenharmony_ci &uniforms[i]); 1310bf215546Sopenharmony_ci break; 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_ci case PAN_SYSVAL_XFB: 1313bf215546Sopenharmony_ci { 1314bf215546Sopenharmony_ci unsigned buf = PAN_SYSVAL_ID(sysval); 1315bf215546Sopenharmony_ci struct panfrost_shader_state *vs = 1316bf215546Sopenharmony_ci panfrost_get_shader_state(batch->ctx, PIPE_SHADER_VERTEX); 1317bf215546Sopenharmony_ci struct pipe_stream_output_info *so = &vs->stream_output; 1318bf215546Sopenharmony_ci unsigned stride = so->stride[buf] * 4; 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_ci struct pipe_stream_output_target *target = NULL; 1321bf215546Sopenharmony_ci if (buf < batch->ctx->streamout.num_targets) 1322bf215546Sopenharmony_ci target = batch->ctx->streamout.targets[buf]; 1323bf215546Sopenharmony_ci 1324bf215546Sopenharmony_ci if (!target) { 1325bf215546Sopenharmony_ci /* Memory sink */ 1326bf215546Sopenharmony_ci uniforms[i].du[0] = 0x8ull << 60; 1327bf215546Sopenharmony_ci break; 1328bf215546Sopenharmony_ci } 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(target->buffer); 1331bf215546Sopenharmony_ci unsigned offset = panfrost_xfb_offset(stride, target); 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 1334bf215546Sopenharmony_ci offset, target->buffer_size - offset); 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci uniforms[i].du[0] = rsrc->image.data.bo->ptr.gpu + offset; 1339bf215546Sopenharmony_ci break; 1340bf215546Sopenharmony_ci } 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci case PAN_SYSVAL_NUM_VERTICES: 1343bf215546Sopenharmony_ci uniforms[i].u[0] = batch->ctx->vertex_count; 1344bf215546Sopenharmony_ci break; 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci case PAN_SYSVAL_NUM_WORK_GROUPS: 1347bf215546Sopenharmony_ci for (unsigned j = 0; j < 3; j++) { 1348bf215546Sopenharmony_ci batch->num_wg_sysval[j] = 1349bf215546Sopenharmony_ci ptr->gpu + (i * sizeof(*uniforms)) + (j * 4); 1350bf215546Sopenharmony_ci } 1351bf215546Sopenharmony_ci panfrost_upload_num_work_groups_sysval(batch, 1352bf215546Sopenharmony_ci &uniforms[i]); 1353bf215546Sopenharmony_ci break; 1354bf215546Sopenharmony_ci case PAN_SYSVAL_LOCAL_GROUP_SIZE: 1355bf215546Sopenharmony_ci panfrost_upload_local_group_size_sysval(batch, 1356bf215546Sopenharmony_ci &uniforms[i]); 1357bf215546Sopenharmony_ci break; 1358bf215546Sopenharmony_ci case PAN_SYSVAL_WORK_DIM: 1359bf215546Sopenharmony_ci panfrost_upload_work_dim_sysval(batch, 1360bf215546Sopenharmony_ci &uniforms[i]); 1361bf215546Sopenharmony_ci break; 1362bf215546Sopenharmony_ci case PAN_SYSVAL_SAMPLER: 1363bf215546Sopenharmony_ci panfrost_upload_sampler_sysval(batch, st, 1364bf215546Sopenharmony_ci PAN_SYSVAL_ID(sysval), 1365bf215546Sopenharmony_ci &uniforms[i]); 1366bf215546Sopenharmony_ci break; 1367bf215546Sopenharmony_ci case PAN_SYSVAL_IMAGE_SIZE: 1368bf215546Sopenharmony_ci panfrost_upload_image_size_sysval(batch, st, 1369bf215546Sopenharmony_ci PAN_SYSVAL_ID(sysval), 1370bf215546Sopenharmony_ci &uniforms[i]); 1371bf215546Sopenharmony_ci break; 1372bf215546Sopenharmony_ci case PAN_SYSVAL_SAMPLE_POSITIONS: 1373bf215546Sopenharmony_ci panfrost_upload_sample_positions_sysval(batch, 1374bf215546Sopenharmony_ci &uniforms[i]); 1375bf215546Sopenharmony_ci break; 1376bf215546Sopenharmony_ci case PAN_SYSVAL_MULTISAMPLED: 1377bf215546Sopenharmony_ci panfrost_upload_multisampled_sysval(batch, 1378bf215546Sopenharmony_ci &uniforms[i]); 1379bf215546Sopenharmony_ci break; 1380bf215546Sopenharmony_ci#if PAN_ARCH >= 6 1381bf215546Sopenharmony_ci case PAN_SYSVAL_RT_CONVERSION: 1382bf215546Sopenharmony_ci panfrost_upload_rt_conversion_sysval(batch, 1383bf215546Sopenharmony_ci PAN_SYSVAL_ID(sysval), &uniforms[i]); 1384bf215546Sopenharmony_ci break; 1385bf215546Sopenharmony_ci#endif 1386bf215546Sopenharmony_ci case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: 1387bf215546Sopenharmony_ci batch->ctx->first_vertex_sysval_ptr = 1388bf215546Sopenharmony_ci ptr->gpu + (i * sizeof(*uniforms)); 1389bf215546Sopenharmony_ci batch->ctx->base_vertex_sysval_ptr = 1390bf215546Sopenharmony_ci batch->ctx->first_vertex_sysval_ptr + 4; 1391bf215546Sopenharmony_ci batch->ctx->base_instance_sysval_ptr = 1392bf215546Sopenharmony_ci batch->ctx->first_vertex_sysval_ptr + 8; 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci uniforms[i].u[0] = batch->ctx->offset_start; 1395bf215546Sopenharmony_ci uniforms[i].u[1] = batch->ctx->base_vertex; 1396bf215546Sopenharmony_ci uniforms[i].u[2] = batch->ctx->base_instance; 1397bf215546Sopenharmony_ci break; 1398bf215546Sopenharmony_ci case PAN_SYSVAL_DRAWID: 1399bf215546Sopenharmony_ci uniforms[i].u[0] = batch->ctx->drawid; 1400bf215546Sopenharmony_ci break; 1401bf215546Sopenharmony_ci default: 1402bf215546Sopenharmony_ci assert(0); 1403bf215546Sopenharmony_ci } 1404bf215546Sopenharmony_ci } 1405bf215546Sopenharmony_ci} 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_cistatic const void * 1408bf215546Sopenharmony_cipanfrost_map_constant_buffer_cpu(struct panfrost_context *ctx, 1409bf215546Sopenharmony_ci struct panfrost_constant_buffer *buf, 1410bf215546Sopenharmony_ci unsigned index) 1411bf215546Sopenharmony_ci{ 1412bf215546Sopenharmony_ci struct pipe_constant_buffer *cb = &buf->cb[index]; 1413bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(cb->buffer); 1414bf215546Sopenharmony_ci 1415bf215546Sopenharmony_ci if (rsrc) { 1416bf215546Sopenharmony_ci panfrost_bo_mmap(rsrc->image.data.bo); 1417bf215546Sopenharmony_ci panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping"); 1418bf215546Sopenharmony_ci panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false); 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset; 1421bf215546Sopenharmony_ci } else if (cb->user_buffer) { 1422bf215546Sopenharmony_ci return cb->user_buffer + cb->buffer_offset; 1423bf215546Sopenharmony_ci } else 1424bf215546Sopenharmony_ci unreachable("No constant buffer"); 1425bf215546Sopenharmony_ci} 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci/* Emit a single UBO record. On Valhall, UBOs are dumb buffers and are 1428bf215546Sopenharmony_ci * implemented with buffer descriptors in the resource table, sized in terms of 1429bf215546Sopenharmony_ci * bytes. On Bifrost and older, UBOs have special uniform buffer data 1430bf215546Sopenharmony_ci * structure, sized in terms of entries. 1431bf215546Sopenharmony_ci */ 1432bf215546Sopenharmony_cistatic void 1433bf215546Sopenharmony_cipanfrost_emit_ubo(void *base, unsigned index, mali_ptr address, size_t size) 1434bf215546Sopenharmony_ci{ 1435bf215546Sopenharmony_ci#if PAN_ARCH >= 9 1436bf215546Sopenharmony_ci struct mali_buffer_packed *out = base; 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci pan_pack(out + index, BUFFER, cfg) { 1439bf215546Sopenharmony_ci cfg.size = size; 1440bf215546Sopenharmony_ci cfg.address = address; 1441bf215546Sopenharmony_ci } 1442bf215546Sopenharmony_ci#else 1443bf215546Sopenharmony_ci struct mali_uniform_buffer_packed *out = base; 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci /* Issue (57) for the ARB_uniform_buffer_object spec says that 1446bf215546Sopenharmony_ci * the buffer can be larger than the uniform data inside it, 1447bf215546Sopenharmony_ci * so clamp ubo size to what hardware supports. */ 1448bf215546Sopenharmony_ci 1449bf215546Sopenharmony_ci pan_pack(out + index, UNIFORM_BUFFER, cfg) { 1450bf215546Sopenharmony_ci cfg.entries = MIN2(DIV_ROUND_UP(size, 16), 1 << 12); 1451bf215546Sopenharmony_ci cfg.pointer = address; 1452bf215546Sopenharmony_ci } 1453bf215546Sopenharmony_ci#endif 1454bf215546Sopenharmony_ci} 1455bf215546Sopenharmony_ci 1456bf215546Sopenharmony_cistatic mali_ptr 1457bf215546Sopenharmony_cipanfrost_emit_const_buf(struct panfrost_batch *batch, 1458bf215546Sopenharmony_ci enum pipe_shader_type stage, 1459bf215546Sopenharmony_ci unsigned *buffer_count, 1460bf215546Sopenharmony_ci mali_ptr *push_constants, 1461bf215546Sopenharmony_ci unsigned *pushed_words) 1462bf215546Sopenharmony_ci{ 1463bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1464bf215546Sopenharmony_ci struct panfrost_shader_variants *all = ctx->shader[stage]; 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci if (!all) 1467bf215546Sopenharmony_ci return 0; 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage]; 1470bf215546Sopenharmony_ci struct panfrost_shader_state *ss = &all->variants[all->active_variant]; 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci /* Allocate room for the sysval and the uniforms */ 1473bf215546Sopenharmony_ci size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count; 1474bf215546Sopenharmony_ci struct panfrost_ptr transfer = 1475bf215546Sopenharmony_ci pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16); 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci /* Upload sysvals requested by the shader */ 1478bf215546Sopenharmony_ci panfrost_upload_sysvals(batch, &transfer, ss, stage); 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_ci /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ 1481bf215546Sopenharmony_ci struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); 1482bf215546Sopenharmony_ci unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0); 1483bf215546Sopenharmony_ci unsigned sysval_ubo = sys_size ? ubo_count : ~0; 1484bf215546Sopenharmony_ci struct panfrost_ptr ubos = { 0 }; 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci#if PAN_ARCH >= 9 1487bf215546Sopenharmony_ci ubos = pan_pool_alloc_desc_array(&batch->pool.base, 1488bf215546Sopenharmony_ci ubo_count + 1, 1489bf215546Sopenharmony_ci BUFFER); 1490bf215546Sopenharmony_ci#else 1491bf215546Sopenharmony_ci ubos = pan_pool_alloc_desc_array(&batch->pool.base, 1492bf215546Sopenharmony_ci ubo_count + 1, 1493bf215546Sopenharmony_ci UNIFORM_BUFFER); 1494bf215546Sopenharmony_ci#endif 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci if (buffer_count) 1497bf215546Sopenharmony_ci *buffer_count = ubo_count + (sys_size ? 1 : 0); 1498bf215546Sopenharmony_ci 1499bf215546Sopenharmony_ci /* Upload sysval as a final UBO */ 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_ci if (sys_size) 1502bf215546Sopenharmony_ci panfrost_emit_ubo(ubos.cpu, ubo_count, transfer.gpu, sys_size); 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_ci /* The rest are honest-to-goodness UBOs */ 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_ci u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) { 1507bf215546Sopenharmony_ci size_t usz = buf->cb[ubo].buffer_size; 1508bf215546Sopenharmony_ci mali_ptr address = 0; 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_ci if (usz > 0) { 1511bf215546Sopenharmony_ci address = panfrost_map_constant_buffer_gpu(batch, 1512bf215546Sopenharmony_ci stage, buf, ubo); 1513bf215546Sopenharmony_ci } 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci panfrost_emit_ubo(ubos.cpu, ubo, address, usz); 1516bf215546Sopenharmony_ci } 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci if (pushed_words) 1519bf215546Sopenharmony_ci *pushed_words = ss->info.push.count; 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (ss->info.push.count == 0) 1522bf215546Sopenharmony_ci return ubos.gpu; 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci /* Copy push constants required by the shader */ 1525bf215546Sopenharmony_ci struct panfrost_ptr push_transfer = 1526bf215546Sopenharmony_ci pan_pool_alloc_aligned(&batch->pool.base, 1527bf215546Sopenharmony_ci ss->info.push.count * 4, 16); 1528bf215546Sopenharmony_ci 1529bf215546Sopenharmony_ci uint32_t *push_cpu = (uint32_t *) push_transfer.cpu; 1530bf215546Sopenharmony_ci *push_constants = push_transfer.gpu; 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci for (unsigned i = 0; i < ss->info.push.count; ++i) { 1533bf215546Sopenharmony_ci struct panfrost_ubo_word src = ss->info.push.words[i]; 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci if (src.ubo == sysval_ubo) { 1536bf215546Sopenharmony_ci unsigned sysval_idx = src.offset / 16; 1537bf215546Sopenharmony_ci unsigned sysval_comp = (src.offset % 16) / 4; 1538bf215546Sopenharmony_ci unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]); 1539bf215546Sopenharmony_ci mali_ptr ptr = push_transfer.gpu + (4 * i); 1540bf215546Sopenharmony_ci 1541bf215546Sopenharmony_ci switch (sysval_type) { 1542bf215546Sopenharmony_ci case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: 1543bf215546Sopenharmony_ci switch (sysval_comp) { 1544bf215546Sopenharmony_ci case 0: 1545bf215546Sopenharmony_ci batch->ctx->first_vertex_sysval_ptr = ptr; 1546bf215546Sopenharmony_ci break; 1547bf215546Sopenharmony_ci case 1: 1548bf215546Sopenharmony_ci batch->ctx->base_vertex_sysval_ptr = ptr; 1549bf215546Sopenharmony_ci break; 1550bf215546Sopenharmony_ci case 2: 1551bf215546Sopenharmony_ci batch->ctx->base_instance_sysval_ptr = ptr; 1552bf215546Sopenharmony_ci break; 1553bf215546Sopenharmony_ci case 3: 1554bf215546Sopenharmony_ci /* Spurious (Midgard doesn't pack) */ 1555bf215546Sopenharmony_ci break; 1556bf215546Sopenharmony_ci default: 1557bf215546Sopenharmony_ci unreachable("Invalid vertex/instance offset component\n"); 1558bf215546Sopenharmony_ci } 1559bf215546Sopenharmony_ci break; 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci case PAN_SYSVAL_NUM_WORK_GROUPS: 1562bf215546Sopenharmony_ci batch->num_wg_sysval[sysval_comp] = ptr; 1563bf215546Sopenharmony_ci break; 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci default: 1566bf215546Sopenharmony_ci break; 1567bf215546Sopenharmony_ci } 1568bf215546Sopenharmony_ci } 1569bf215546Sopenharmony_ci /* Map the UBO, this should be cheap. However this is reading 1570bf215546Sopenharmony_ci * from write-combine memory which is _very_ slow. It might pay 1571bf215546Sopenharmony_ci * off to upload sysvals to a staging buffer on the CPU on the 1572bf215546Sopenharmony_ci * assumption sysvals will get pushed (TODO) */ 1573bf215546Sopenharmony_ci 1574bf215546Sopenharmony_ci const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu : 1575bf215546Sopenharmony_ci panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo); 1576bf215546Sopenharmony_ci 1577bf215546Sopenharmony_ci /* TODO: Is there any benefit to combining ranges */ 1578bf215546Sopenharmony_ci memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4); 1579bf215546Sopenharmony_ci } 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci return ubos.gpu; 1582bf215546Sopenharmony_ci} 1583bf215546Sopenharmony_ci 1584bf215546Sopenharmony_cistatic mali_ptr 1585bf215546Sopenharmony_cipanfrost_emit_shared_memory(struct panfrost_batch *batch, 1586bf215546Sopenharmony_ci const struct pipe_grid_info *grid) 1587bf215546Sopenharmony_ci{ 1588bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1589bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(ctx->base.screen); 1590bf215546Sopenharmony_ci struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE]; 1591bf215546Sopenharmony_ci struct panfrost_shader_state *ss = &all->variants[all->active_variant]; 1592bf215546Sopenharmony_ci struct panfrost_ptr t = 1593bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE); 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci struct pan_tls_info info = { 1596bf215546Sopenharmony_ci .tls.size = ss->info.tls_size, 1597bf215546Sopenharmony_ci .wls.size = ss->info.wls_size, 1598bf215546Sopenharmony_ci .wls.dim.x = grid->grid[0], 1599bf215546Sopenharmony_ci .wls.dim.y = grid->grid[1], 1600bf215546Sopenharmony_ci .wls.dim.z = grid->grid[2], 1601bf215546Sopenharmony_ci }; 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_ci if (ss->info.tls_size) { 1604bf215546Sopenharmony_ci struct panfrost_bo *bo = 1605bf215546Sopenharmony_ci panfrost_batch_get_scratchpad(batch, 1606bf215546Sopenharmony_ci ss->info.tls_size, 1607bf215546Sopenharmony_ci dev->thread_tls_alloc, 1608bf215546Sopenharmony_ci dev->core_id_range); 1609bf215546Sopenharmony_ci info.tls.ptr = bo->ptr.gpu; 1610bf215546Sopenharmony_ci } 1611bf215546Sopenharmony_ci 1612bf215546Sopenharmony_ci if (ss->info.wls_size) { 1613bf215546Sopenharmony_ci unsigned size = 1614bf215546Sopenharmony_ci pan_wls_adjust_size(info.wls.size) * 1615bf215546Sopenharmony_ci pan_wls_instances(&info.wls.dim) * 1616bf215546Sopenharmony_ci dev->core_id_range; 1617bf215546Sopenharmony_ci 1618bf215546Sopenharmony_ci struct panfrost_bo *bo = 1619bf215546Sopenharmony_ci panfrost_batch_get_shared_memory(batch, size, 1); 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci info.wls.ptr = bo->ptr.gpu; 1622bf215546Sopenharmony_ci } 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci GENX(pan_emit_tls)(&info, t.cpu); 1625bf215546Sopenharmony_ci return t.gpu; 1626bf215546Sopenharmony_ci} 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci#if PAN_ARCH <= 5 1629bf215546Sopenharmony_cistatic mali_ptr 1630bf215546Sopenharmony_cipanfrost_get_tex_desc(struct panfrost_batch *batch, 1631bf215546Sopenharmony_ci enum pipe_shader_type st, 1632bf215546Sopenharmony_ci struct panfrost_sampler_view *view) 1633bf215546Sopenharmony_ci{ 1634bf215546Sopenharmony_ci if (!view) 1635bf215546Sopenharmony_ci return (mali_ptr) 0; 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci struct pipe_sampler_view *pview = &view->base; 1638bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(pview->texture); 1639bf215546Sopenharmony_ci 1640bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, st); 1641bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, view->state.bo, st); 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci return view->state.gpu; 1644bf215546Sopenharmony_ci} 1645bf215546Sopenharmony_ci#endif 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_cistatic void 1648bf215546Sopenharmony_cipanfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, 1649bf215546Sopenharmony_ci struct pipe_context *pctx, 1650bf215546Sopenharmony_ci struct pipe_resource *texture) 1651bf215546Sopenharmony_ci{ 1652bf215546Sopenharmony_ci struct panfrost_device *device = pan_device(pctx->screen); 1653bf215546Sopenharmony_ci struct panfrost_context *ctx = pan_context(pctx); 1654bf215546Sopenharmony_ci struct panfrost_resource *prsrc = (struct panfrost_resource *)texture; 1655bf215546Sopenharmony_ci enum pipe_format format = so->base.format; 1656bf215546Sopenharmony_ci assert(prsrc->image.data.bo); 1657bf215546Sopenharmony_ci 1658bf215546Sopenharmony_ci /* Format to access the stencil/depth portion of a Z32_S8 texture */ 1659bf215546Sopenharmony_ci if (format == PIPE_FORMAT_X32_S8X24_UINT) { 1660bf215546Sopenharmony_ci assert(prsrc->separate_stencil); 1661bf215546Sopenharmony_ci texture = &prsrc->separate_stencil->base; 1662bf215546Sopenharmony_ci prsrc = (struct panfrost_resource *)texture; 1663bf215546Sopenharmony_ci format = texture->format; 1664bf215546Sopenharmony_ci } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { 1665bf215546Sopenharmony_ci format = PIPE_FORMAT_Z32_FLOAT; 1666bf215546Sopenharmony_ci } 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format); 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci bool fake_rgtc = !panfrost_supports_compressed_format(device, MALI_BC4_UNORM); 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC && fake_rgtc) { 1673bf215546Sopenharmony_ci if (desc->is_snorm) 1674bf215546Sopenharmony_ci format = PIPE_FORMAT_R8G8B8A8_SNORM; 1675bf215546Sopenharmony_ci else 1676bf215546Sopenharmony_ci format = PIPE_FORMAT_R8G8B8A8_UNORM; 1677bf215546Sopenharmony_ci desc = util_format_description(format); 1678bf215546Sopenharmony_ci } 1679bf215546Sopenharmony_ci 1680bf215546Sopenharmony_ci so->texture_bo = prsrc->image.data.bo->ptr.gpu; 1681bf215546Sopenharmony_ci so->modifier = prsrc->image.layout.modifier; 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci /* MSAA only supported for 2D textures */ 1684bf215546Sopenharmony_ci 1685bf215546Sopenharmony_ci assert(texture->nr_samples <= 1 || 1686bf215546Sopenharmony_ci so->base.target == PIPE_TEXTURE_2D || 1687bf215546Sopenharmony_ci so->base.target == PIPE_TEXTURE_2D_ARRAY); 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci enum mali_texture_dimension type = 1690bf215546Sopenharmony_ci panfrost_translate_texture_dimension(so->base.target); 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_ci bool is_buffer = (so->base.target == PIPE_BUFFER); 1693bf215546Sopenharmony_ci 1694bf215546Sopenharmony_ci unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level; 1695bf215546Sopenharmony_ci unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level; 1696bf215546Sopenharmony_ci unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer; 1697bf215546Sopenharmony_ci unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer; 1698bf215546Sopenharmony_ci unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0; 1699bf215546Sopenharmony_ci unsigned buf_size = (is_buffer ? so->base.u.buf.size : 0) / 1700bf215546Sopenharmony_ci util_format_get_blocksize(format); 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_ci if (so->base.target == PIPE_TEXTURE_3D) { 1703bf215546Sopenharmony_ci first_layer /= prsrc->image.layout.depth; 1704bf215546Sopenharmony_ci last_layer /= prsrc->image.layout.depth; 1705bf215546Sopenharmony_ci assert(!first_layer && !last_layer); 1706bf215546Sopenharmony_ci } 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_ci struct pan_image_view iview = { 1709bf215546Sopenharmony_ci .format = format, 1710bf215546Sopenharmony_ci .dim = type, 1711bf215546Sopenharmony_ci .first_level = first_level, 1712bf215546Sopenharmony_ci .last_level = last_level, 1713bf215546Sopenharmony_ci .first_layer = first_layer, 1714bf215546Sopenharmony_ci .last_layer = last_layer, 1715bf215546Sopenharmony_ci .swizzle = { 1716bf215546Sopenharmony_ci so->base.swizzle_r, 1717bf215546Sopenharmony_ci so->base.swizzle_g, 1718bf215546Sopenharmony_ci so->base.swizzle_b, 1719bf215546Sopenharmony_ci so->base.swizzle_a, 1720bf215546Sopenharmony_ci }, 1721bf215546Sopenharmony_ci .image = &prsrc->image, 1722bf215546Sopenharmony_ci 1723bf215546Sopenharmony_ci .buf.offset = buf_offset, 1724bf215546Sopenharmony_ci .buf.size = buf_size, 1725bf215546Sopenharmony_ci }; 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci unsigned size = 1728bf215546Sopenharmony_ci (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) + 1729bf215546Sopenharmony_ci GENX(panfrost_estimate_texture_payload_size)(&iview); 1730bf215546Sopenharmony_ci 1731bf215546Sopenharmony_ci struct panfrost_pool *pool = so->pool ?: &ctx->descs; 1732bf215546Sopenharmony_ci struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64); 1733bf215546Sopenharmony_ci so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu); 1734bf215546Sopenharmony_ci 1735bf215546Sopenharmony_ci void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu; 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci if (PAN_ARCH <= 5) { 1738bf215546Sopenharmony_ci payload.cpu += pan_size(TEXTURE); 1739bf215546Sopenharmony_ci payload.gpu += pan_size(TEXTURE); 1740bf215546Sopenharmony_ci } 1741bf215546Sopenharmony_ci 1742bf215546Sopenharmony_ci GENX(panfrost_new_texture)(device, &iview, tex, &payload); 1743bf215546Sopenharmony_ci} 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_cistatic void 1746bf215546Sopenharmony_cipanfrost_update_sampler_view(struct panfrost_sampler_view *view, 1747bf215546Sopenharmony_ci struct pipe_context *pctx) 1748bf215546Sopenharmony_ci{ 1749bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(view->base.texture); 1750bf215546Sopenharmony_ci if (view->texture_bo != rsrc->image.data.bo->ptr.gpu || 1751bf215546Sopenharmony_ci view->modifier != rsrc->image.layout.modifier) { 1752bf215546Sopenharmony_ci panfrost_bo_unreference(view->state.bo); 1753bf215546Sopenharmony_ci panfrost_create_sampler_view_bo(view, pctx, &rsrc->base); 1754bf215546Sopenharmony_ci } 1755bf215546Sopenharmony_ci} 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_cistatic mali_ptr 1758bf215546Sopenharmony_cipanfrost_emit_texture_descriptors(struct panfrost_batch *batch, 1759bf215546Sopenharmony_ci enum pipe_shader_type stage) 1760bf215546Sopenharmony_ci{ 1761bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci if (!ctx->sampler_view_count[stage]) 1764bf215546Sopenharmony_ci return 0; 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci#if PAN_ARCH >= 6 1767bf215546Sopenharmony_ci struct panfrost_ptr T = 1768bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, 1769bf215546Sopenharmony_ci ctx->sampler_view_count[stage], 1770bf215546Sopenharmony_ci TEXTURE); 1771bf215546Sopenharmony_ci struct mali_texture_packed *out = 1772bf215546Sopenharmony_ci (struct mali_texture_packed *) T.cpu; 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_ci for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) { 1775bf215546Sopenharmony_ci struct panfrost_sampler_view *view = ctx->sampler_views[stage][i]; 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci if (!view) { 1778bf215546Sopenharmony_ci memset(&out[i], 0, sizeof(out[i])); 1779bf215546Sopenharmony_ci continue; 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci struct pipe_sampler_view *pview = &view->base; 1783bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(pview->texture); 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci panfrost_update_sampler_view(view, &ctx->base); 1786bf215546Sopenharmony_ci out[i] = view->bifrost_descriptor; 1787bf215546Sopenharmony_ci 1788bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, stage); 1789bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, view->state.bo, stage); 1790bf215546Sopenharmony_ci } 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci return T.gpu; 1793bf215546Sopenharmony_ci#else 1794bf215546Sopenharmony_ci uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) { 1797bf215546Sopenharmony_ci struct panfrost_sampler_view *view = ctx->sampler_views[stage][i]; 1798bf215546Sopenharmony_ci 1799bf215546Sopenharmony_ci if (!view) { 1800bf215546Sopenharmony_ci trampolines[i] = 0; 1801bf215546Sopenharmony_ci continue; 1802bf215546Sopenharmony_ci } 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_ci panfrost_update_sampler_view(view, &ctx->base); 1805bf215546Sopenharmony_ci 1806bf215546Sopenharmony_ci trampolines[i] = panfrost_get_tex_desc(batch, stage, view); 1807bf215546Sopenharmony_ci } 1808bf215546Sopenharmony_ci 1809bf215546Sopenharmony_ci return pan_pool_upload_aligned(&batch->pool.base, trampolines, 1810bf215546Sopenharmony_ci sizeof(uint64_t) * 1811bf215546Sopenharmony_ci ctx->sampler_view_count[stage], 1812bf215546Sopenharmony_ci sizeof(uint64_t)); 1813bf215546Sopenharmony_ci#endif 1814bf215546Sopenharmony_ci} 1815bf215546Sopenharmony_ci 1816bf215546Sopenharmony_cistatic mali_ptr 1817bf215546Sopenharmony_cipanfrost_emit_sampler_descriptors(struct panfrost_batch *batch, 1818bf215546Sopenharmony_ci enum pipe_shader_type stage) 1819bf215546Sopenharmony_ci{ 1820bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1821bf215546Sopenharmony_ci 1822bf215546Sopenharmony_ci if (!ctx->sampler_count[stage]) 1823bf215546Sopenharmony_ci return 0; 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci struct panfrost_ptr T = 1826bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, 1827bf215546Sopenharmony_ci ctx->sampler_count[stage], 1828bf215546Sopenharmony_ci SAMPLER); 1829bf215546Sopenharmony_ci struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu; 1830bf215546Sopenharmony_ci 1831bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) { 1832bf215546Sopenharmony_ci struct panfrost_sampler_state *st = ctx->samplers[stage][i]; 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci out[i] = st ? st->hw : (struct mali_sampler_packed){0}; 1835bf215546Sopenharmony_ci } 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci return T.gpu; 1838bf215546Sopenharmony_ci} 1839bf215546Sopenharmony_ci 1840bf215546Sopenharmony_ci#if PAN_ARCH <= 7 1841bf215546Sopenharmony_ci/* Packs all image attribute descs and attribute buffer descs. 1842bf215546Sopenharmony_ci * `first_image_buf_index` must be the index of the first image attribute buffer descriptor. 1843bf215546Sopenharmony_ci */ 1844bf215546Sopenharmony_cistatic void 1845bf215546Sopenharmony_ciemit_image_attribs(struct panfrost_context *ctx, enum pipe_shader_type shader, 1846bf215546Sopenharmony_ci struct mali_attribute_packed *attribs, unsigned first_buf) 1847bf215546Sopenharmony_ci{ 1848bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(ctx->base.screen); 1849bf215546Sopenharmony_ci unsigned last_bit = util_last_bit(ctx->image_mask[shader]); 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci for (unsigned i = 0; i < last_bit; ++i) { 1852bf215546Sopenharmony_ci enum pipe_format format = ctx->images[shader][i].format; 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_ci pan_pack(attribs + i, ATTRIBUTE, cfg) { 1855bf215546Sopenharmony_ci /* Continuation record means 2 buffers per image */ 1856bf215546Sopenharmony_ci cfg.buffer_index = first_buf + (i * 2); 1857bf215546Sopenharmony_ci cfg.offset_enable = (PAN_ARCH <= 5); 1858bf215546Sopenharmony_ci cfg.format = dev->formats[format].hw; 1859bf215546Sopenharmony_ci } 1860bf215546Sopenharmony_ci } 1861bf215546Sopenharmony_ci} 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_cistatic enum mali_attribute_type 1864bf215546Sopenharmony_cipan_modifier_to_attr_type(uint64_t modifier) 1865bf215546Sopenharmony_ci{ 1866bf215546Sopenharmony_ci switch (modifier) { 1867bf215546Sopenharmony_ci case DRM_FORMAT_MOD_LINEAR: 1868bf215546Sopenharmony_ci return MALI_ATTRIBUTE_TYPE_3D_LINEAR; 1869bf215546Sopenharmony_ci case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: 1870bf215546Sopenharmony_ci return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED; 1871bf215546Sopenharmony_ci default: 1872bf215546Sopenharmony_ci unreachable("Invalid modifier for attribute record"); 1873bf215546Sopenharmony_ci } 1874bf215546Sopenharmony_ci} 1875bf215546Sopenharmony_ci 1876bf215546Sopenharmony_cistatic void 1877bf215546Sopenharmony_ciemit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader, 1878bf215546Sopenharmony_ci struct mali_attribute_buffer_packed *bufs, 1879bf215546Sopenharmony_ci unsigned first_image_buf_index) 1880bf215546Sopenharmony_ci{ 1881bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1882bf215546Sopenharmony_ci unsigned last_bit = util_last_bit(ctx->image_mask[shader]); 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_ci for (unsigned i = 0; i < last_bit; ++i) { 1885bf215546Sopenharmony_ci struct pipe_image_view *image = &ctx->images[shader][i]; 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci if (!(ctx->image_mask[shader] & (1 << i)) || 1888bf215546Sopenharmony_ci !(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) { 1889bf215546Sopenharmony_ci /* Unused image bindings */ 1890bf215546Sopenharmony_ci pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg); 1891bf215546Sopenharmony_ci pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg); 1892bf215546Sopenharmony_ci continue; 1893bf215546Sopenharmony_ci } 1894bf215546Sopenharmony_ci 1895bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(image->resource); 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_ci /* TODO: MSAA */ 1898bf215546Sopenharmony_ci assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported"); 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D; 1901bf215546Sopenharmony_ci bool is_buffer = rsrc->base.target == PIPE_BUFFER; 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci unsigned offset = is_buffer ? image->u.buf.offset : 1904bf215546Sopenharmony_ci panfrost_texture_offset(&rsrc->image.layout, 1905bf215546Sopenharmony_ci image->u.tex.level, 1906bf215546Sopenharmony_ci is_3d ? 0 : image->u.tex.first_layer, 1907bf215546Sopenharmony_ci is_3d ? image->u.tex.first_layer : 0); 1908bf215546Sopenharmony_ci 1909bf215546Sopenharmony_ci panfrost_track_image_access(batch, shader, image); 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ci pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { 1912bf215546Sopenharmony_ci cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier); 1913bf215546Sopenharmony_ci cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset; 1914bf215546Sopenharmony_ci cfg.stride = util_format_get_blocksize(image->format); 1915bf215546Sopenharmony_ci cfg.size = rsrc->image.data.bo->size - offset; 1916bf215546Sopenharmony_ci } 1917bf215546Sopenharmony_ci 1918bf215546Sopenharmony_ci if (is_buffer) { 1919bf215546Sopenharmony_ci pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { 1920bf215546Sopenharmony_ci cfg.s_dimension = rsrc->base.width0 / 1921bf215546Sopenharmony_ci util_format_get_blocksize(image->format); 1922bf215546Sopenharmony_ci cfg.t_dimension = cfg.r_dimension = 1; 1923bf215546Sopenharmony_ci } 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci continue; 1926bf215546Sopenharmony_ci } 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { 1929bf215546Sopenharmony_ci unsigned level = image->u.tex.level; 1930bf215546Sopenharmony_ci 1931bf215546Sopenharmony_ci cfg.s_dimension = u_minify(rsrc->base.width0, level); 1932bf215546Sopenharmony_ci cfg.t_dimension = u_minify(rsrc->base.height0, level); 1933bf215546Sopenharmony_ci cfg.r_dimension = is_3d ? 1934bf215546Sopenharmony_ci u_minify(rsrc->base.depth0, level) : 1935bf215546Sopenharmony_ci image->u.tex.last_layer - image->u.tex.first_layer + 1; 1936bf215546Sopenharmony_ci 1937bf215546Sopenharmony_ci cfg.row_stride = 1938bf215546Sopenharmony_ci rsrc->image.layout.slices[level].row_stride; 1939bf215546Sopenharmony_ci 1940bf215546Sopenharmony_ci if (rsrc->base.target != PIPE_TEXTURE_2D) { 1941bf215546Sopenharmony_ci cfg.slice_stride = 1942bf215546Sopenharmony_ci panfrost_get_layer_stride(&rsrc->image.layout, 1943bf215546Sopenharmony_ci level); 1944bf215546Sopenharmony_ci } 1945bf215546Sopenharmony_ci } 1946bf215546Sopenharmony_ci } 1947bf215546Sopenharmony_ci} 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_cistatic mali_ptr 1950bf215546Sopenharmony_cipanfrost_emit_image_attribs(struct panfrost_batch *batch, 1951bf215546Sopenharmony_ci mali_ptr *buffers, 1952bf215546Sopenharmony_ci enum pipe_shader_type type) 1953bf215546Sopenharmony_ci{ 1954bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1955bf215546Sopenharmony_ci struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type); 1956bf215546Sopenharmony_ci 1957bf215546Sopenharmony_ci if (!shader->info.attribute_count) { 1958bf215546Sopenharmony_ci *buffers = 0; 1959bf215546Sopenharmony_ci return 0; 1960bf215546Sopenharmony_ci } 1961bf215546Sopenharmony_ci 1962bf215546Sopenharmony_ci /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */ 1963bf215546Sopenharmony_ci unsigned attr_count = shader->info.attribute_count; 1964bf215546Sopenharmony_ci unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0); 1965bf215546Sopenharmony_ci 1966bf215546Sopenharmony_ci struct panfrost_ptr bufs = 1967bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER); 1968bf215546Sopenharmony_ci 1969bf215546Sopenharmony_ci struct panfrost_ptr attribs = 1970bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE); 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci emit_image_attribs(ctx, type, attribs.cpu, 0); 1973bf215546Sopenharmony_ci emit_image_bufs(batch, type, bufs.cpu, 0); 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci /* We need an empty attrib buf to stop the prefetching on Bifrost */ 1976bf215546Sopenharmony_ci#if PAN_ARCH >= 6 1977bf215546Sopenharmony_ci pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)), 1978bf215546Sopenharmony_ci ATTRIBUTE_BUFFER, cfg); 1979bf215546Sopenharmony_ci#endif 1980bf215546Sopenharmony_ci 1981bf215546Sopenharmony_ci *buffers = bufs.gpu; 1982bf215546Sopenharmony_ci return attribs.gpu; 1983bf215546Sopenharmony_ci} 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_cistatic mali_ptr 1986bf215546Sopenharmony_cipanfrost_emit_vertex_data(struct panfrost_batch *batch, 1987bf215546Sopenharmony_ci mali_ptr *buffers) 1988bf215546Sopenharmony_ci{ 1989bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 1990bf215546Sopenharmony_ci struct panfrost_vertex_state *so = ctx->vertex; 1991bf215546Sopenharmony_ci struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 1992bf215546Sopenharmony_ci bool instanced = ctx->indirect_draw || ctx->instance_count > 1; 1993bf215546Sopenharmony_ci uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX]; 1994bf215546Sopenharmony_ci unsigned nr_images = util_last_bit(image_mask); 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci /* Worst case: everything is NPOT, which is only possible if instancing 1997bf215546Sopenharmony_ci * is enabled. Otherwise single record is gauranteed. 1998bf215546Sopenharmony_ci * Also, we allocate more memory than what's needed here if either instancing 1999bf215546Sopenharmony_ci * is enabled or images are present, this can be improved. */ 2000bf215546Sopenharmony_ci unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1; 2001bf215546Sopenharmony_ci unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) + 2002bf215546Sopenharmony_ci (PAN_ARCH >= 6 ? 1 : 0); 2003bf215546Sopenharmony_ci 2004bf215546Sopenharmony_ci unsigned count = vs->info.attribute_count; 2005bf215546Sopenharmony_ci 2006bf215546Sopenharmony_ci if (vs->xfb) 2007bf215546Sopenharmony_ci count = MAX2(count, vs->xfb->info.attribute_count); 2008bf215546Sopenharmony_ci 2009bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2010bf215546Sopenharmony_ci /* Midgard needs vertexid/instanceid handled specially */ 2011bf215546Sopenharmony_ci bool special_vbufs = count >= PAN_VERTEX_ID; 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_ci if (special_vbufs) 2014bf215546Sopenharmony_ci nr_bufs += 2; 2015bf215546Sopenharmony_ci#endif 2016bf215546Sopenharmony_ci 2017bf215546Sopenharmony_ci if (!nr_bufs) { 2018bf215546Sopenharmony_ci *buffers = 0; 2019bf215546Sopenharmony_ci return 0; 2020bf215546Sopenharmony_ci } 2021bf215546Sopenharmony_ci 2022bf215546Sopenharmony_ci struct panfrost_ptr S = 2023bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs, 2024bf215546Sopenharmony_ci ATTRIBUTE_BUFFER); 2025bf215546Sopenharmony_ci struct panfrost_ptr T = 2026bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, count, 2027bf215546Sopenharmony_ci ATTRIBUTE); 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci struct mali_attribute_buffer_packed *bufs = 2030bf215546Sopenharmony_ci (struct mali_attribute_buffer_packed *) S.cpu; 2031bf215546Sopenharmony_ci 2032bf215546Sopenharmony_ci struct mali_attribute_packed *out = 2033bf215546Sopenharmony_ci (struct mali_attribute_packed *) T.cpu; 2034bf215546Sopenharmony_ci 2035bf215546Sopenharmony_ci unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 }; 2036bf215546Sopenharmony_ci unsigned k = 0; 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_ci for (unsigned i = 0; i < so->nr_bufs; ++i) { 2039bf215546Sopenharmony_ci unsigned vbi = so->buffers[i].vbi; 2040bf215546Sopenharmony_ci unsigned divisor = so->buffers[i].divisor; 2041bf215546Sopenharmony_ci attrib_to_buffer[i] = k; 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci if (!(ctx->vb_mask & (1 << vbi))) 2044bf215546Sopenharmony_ci continue; 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; 2047bf215546Sopenharmony_ci struct panfrost_resource *rsrc; 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci rsrc = pan_resource(buf->buffer.resource); 2050bf215546Sopenharmony_ci if (!rsrc) 2051bf215546Sopenharmony_ci continue; 2052bf215546Sopenharmony_ci 2053bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 2054bf215546Sopenharmony_ci 2055bf215546Sopenharmony_ci /* Mask off lower bits, see offset fixup below */ 2056bf215546Sopenharmony_ci mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset; 2057bf215546Sopenharmony_ci mali_ptr addr = raw_addr & ~63; 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci /* Since we advanced the base pointer, we shrink the buffer 2060bf215546Sopenharmony_ci * size, but add the offset we subtracted */ 2061bf215546Sopenharmony_ci unsigned size = rsrc->base.width0 + (raw_addr - addr) 2062bf215546Sopenharmony_ci - buf->buffer_offset; 2063bf215546Sopenharmony_ci 2064bf215546Sopenharmony_ci /* When there is a divisor, the hardware-level divisor is 2065bf215546Sopenharmony_ci * the product of the instance divisor and the padded count */ 2066bf215546Sopenharmony_ci unsigned stride = buf->stride; 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci if (ctx->indirect_draw) { 2069bf215546Sopenharmony_ci /* We allocated 2 records for each attribute buffer */ 2070bf215546Sopenharmony_ci assert((k & 1) == 0); 2071bf215546Sopenharmony_ci 2072bf215546Sopenharmony_ci /* With indirect draws we can't guess the vertex_count. 2073bf215546Sopenharmony_ci * Pre-set the address, stride and size fields, the 2074bf215546Sopenharmony_ci * compute shader do the rest. 2075bf215546Sopenharmony_ci */ 2076bf215546Sopenharmony_ci pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) { 2077bf215546Sopenharmony_ci cfg.type = MALI_ATTRIBUTE_TYPE_1D; 2078bf215546Sopenharmony_ci cfg.pointer = addr; 2079bf215546Sopenharmony_ci cfg.stride = stride; 2080bf215546Sopenharmony_ci cfg.size = size; 2081bf215546Sopenharmony_ci } 2082bf215546Sopenharmony_ci 2083bf215546Sopenharmony_ci /* We store the unmodified divisor in the continuation 2084bf215546Sopenharmony_ci * slot so the compute shader can retrieve it. 2085bf215546Sopenharmony_ci */ 2086bf215546Sopenharmony_ci pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { 2087bf215546Sopenharmony_ci cfg.divisor = divisor; 2088bf215546Sopenharmony_ci } 2089bf215546Sopenharmony_ci 2090bf215546Sopenharmony_ci k += 2; 2091bf215546Sopenharmony_ci continue; 2092bf215546Sopenharmony_ci } 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci unsigned hw_divisor = ctx->padded_count * divisor; 2095bf215546Sopenharmony_ci 2096bf215546Sopenharmony_ci if (ctx->instance_count <= 1) { 2097bf215546Sopenharmony_ci /* Per-instance would be every attribute equal */ 2098bf215546Sopenharmony_ci if (divisor) 2099bf215546Sopenharmony_ci stride = 0; 2100bf215546Sopenharmony_ci 2101bf215546Sopenharmony_ci pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) { 2102bf215546Sopenharmony_ci cfg.pointer = addr; 2103bf215546Sopenharmony_ci cfg.stride = stride; 2104bf215546Sopenharmony_ci cfg.size = size; 2105bf215546Sopenharmony_ci } 2106bf215546Sopenharmony_ci } else if (!divisor) { 2107bf215546Sopenharmony_ci pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) { 2108bf215546Sopenharmony_ci cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; 2109bf215546Sopenharmony_ci cfg.pointer = addr; 2110bf215546Sopenharmony_ci cfg.stride = stride; 2111bf215546Sopenharmony_ci cfg.size = size; 2112bf215546Sopenharmony_ci cfg.divisor = ctx->padded_count; 2113bf215546Sopenharmony_ci } 2114bf215546Sopenharmony_ci } else if (util_is_power_of_two_or_zero(hw_divisor)) { 2115bf215546Sopenharmony_ci pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) { 2116bf215546Sopenharmony_ci cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; 2117bf215546Sopenharmony_ci cfg.pointer = addr; 2118bf215546Sopenharmony_ci cfg.stride = stride; 2119bf215546Sopenharmony_ci cfg.size = size; 2120bf215546Sopenharmony_ci cfg.divisor_r = __builtin_ctz(hw_divisor); 2121bf215546Sopenharmony_ci } 2122bf215546Sopenharmony_ci 2123bf215546Sopenharmony_ci } else { 2124bf215546Sopenharmony_ci unsigned shift = 0, extra_flags = 0; 2125bf215546Sopenharmony_ci 2126bf215546Sopenharmony_ci unsigned magic_divisor = 2127bf215546Sopenharmony_ci panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags); 2128bf215546Sopenharmony_ci 2129bf215546Sopenharmony_ci /* Records with continuations must be aligned */ 2130bf215546Sopenharmony_ci k = ALIGN_POT(k, 2); 2131bf215546Sopenharmony_ci attrib_to_buffer[i] = k; 2132bf215546Sopenharmony_ci 2133bf215546Sopenharmony_ci pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) { 2134bf215546Sopenharmony_ci cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; 2135bf215546Sopenharmony_ci cfg.pointer = addr; 2136bf215546Sopenharmony_ci cfg.stride = stride; 2137bf215546Sopenharmony_ci cfg.size = size; 2138bf215546Sopenharmony_ci 2139bf215546Sopenharmony_ci cfg.divisor_r = shift; 2140bf215546Sopenharmony_ci cfg.divisor_e = extra_flags; 2141bf215546Sopenharmony_ci } 2142bf215546Sopenharmony_ci 2143bf215546Sopenharmony_ci pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { 2144bf215546Sopenharmony_ci cfg.divisor_numerator = magic_divisor; 2145bf215546Sopenharmony_ci cfg.divisor = divisor; 2146bf215546Sopenharmony_ci } 2147bf215546Sopenharmony_ci 2148bf215546Sopenharmony_ci ++k; 2149bf215546Sopenharmony_ci } 2150bf215546Sopenharmony_ci 2151bf215546Sopenharmony_ci ++k; 2152bf215546Sopenharmony_ci } 2153bf215546Sopenharmony_ci 2154bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2155bf215546Sopenharmony_ci /* Add special gl_VertexID/gl_InstanceID buffers */ 2156bf215546Sopenharmony_ci if (special_vbufs) { 2157bf215546Sopenharmony_ci panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1); 2158bf215546Sopenharmony_ci 2159bf215546Sopenharmony_ci pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) { 2160bf215546Sopenharmony_ci cfg.buffer_index = k++; 2161bf215546Sopenharmony_ci cfg.format = so->formats[PAN_VERTEX_ID]; 2162bf215546Sopenharmony_ci } 2163bf215546Sopenharmony_ci 2164bf215546Sopenharmony_ci panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1); 2165bf215546Sopenharmony_ci 2166bf215546Sopenharmony_ci pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) { 2167bf215546Sopenharmony_ci cfg.buffer_index = k++; 2168bf215546Sopenharmony_ci cfg.format = so->formats[PAN_INSTANCE_ID]; 2169bf215546Sopenharmony_ci } 2170bf215546Sopenharmony_ci } 2171bf215546Sopenharmony_ci#endif 2172bf215546Sopenharmony_ci 2173bf215546Sopenharmony_ci if (nr_images) { 2174bf215546Sopenharmony_ci k = ALIGN_POT(k, 2); 2175bf215546Sopenharmony_ci emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k); 2176bf215546Sopenharmony_ci emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k); 2177bf215546Sopenharmony_ci k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2); 2178bf215546Sopenharmony_ci } 2179bf215546Sopenharmony_ci 2180bf215546Sopenharmony_ci#if PAN_ARCH >= 6 2181bf215546Sopenharmony_ci /* We need an empty attrib buf to stop the prefetching on Bifrost */ 2182bf215546Sopenharmony_ci pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg); 2183bf215546Sopenharmony_ci#endif 2184bf215546Sopenharmony_ci 2185bf215546Sopenharmony_ci /* Attribute addresses require 64-byte alignment, so let: 2186bf215546Sopenharmony_ci * 2187bf215546Sopenharmony_ci * base' = base & ~63 = base - (base & 63) 2188bf215546Sopenharmony_ci * offset' = offset + (base & 63) 2189bf215546Sopenharmony_ci * 2190bf215546Sopenharmony_ci * Since base' + offset' = base + offset, these are equivalent 2191bf215546Sopenharmony_ci * addressing modes and now base is 64 aligned. 2192bf215546Sopenharmony_ci */ 2193bf215546Sopenharmony_ci 2194bf215546Sopenharmony_ci /* While these are usually equal, they are not required to be. In some 2195bf215546Sopenharmony_ci * cases, u_blitter passes too high a value for num_elements. 2196bf215546Sopenharmony_ci */ 2197bf215546Sopenharmony_ci assert(vs->info.attributes_read_count <= so->num_elements); 2198bf215546Sopenharmony_ci 2199bf215546Sopenharmony_ci for (unsigned i = 0; i < vs->info.attributes_read_count; ++i) { 2200bf215546Sopenharmony_ci unsigned vbi = so->pipe[i].vertex_buffer_index; 2201bf215546Sopenharmony_ci struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi]; 2202bf215546Sopenharmony_ci 2203bf215546Sopenharmony_ci /* BOs are aligned; just fixup for buffer_offset */ 2204bf215546Sopenharmony_ci signed src_offset = so->pipe[i].src_offset; 2205bf215546Sopenharmony_ci src_offset += (buf->buffer_offset & 63); 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci /* Base instance offset */ 2208bf215546Sopenharmony_ci if (ctx->base_instance && so->pipe[i].instance_divisor) { 2209bf215546Sopenharmony_ci src_offset += (ctx->base_instance * buf->stride) / 2210bf215546Sopenharmony_ci so->pipe[i].instance_divisor; 2211bf215546Sopenharmony_ci } 2212bf215546Sopenharmony_ci 2213bf215546Sopenharmony_ci /* Also, somewhat obscurely per-instance data needs to be 2214bf215546Sopenharmony_ci * offset in response to a delayed start in an indexed draw */ 2215bf215546Sopenharmony_ci 2216bf215546Sopenharmony_ci if (so->pipe[i].instance_divisor && ctx->instance_count > 1) 2217bf215546Sopenharmony_ci src_offset -= buf->stride * ctx->offset_start; 2218bf215546Sopenharmony_ci 2219bf215546Sopenharmony_ci pan_pack(out + i, ATTRIBUTE, cfg) { 2220bf215546Sopenharmony_ci cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]]; 2221bf215546Sopenharmony_ci cfg.format = so->formats[i]; 2222bf215546Sopenharmony_ci cfg.offset = src_offset; 2223bf215546Sopenharmony_ci } 2224bf215546Sopenharmony_ci } 2225bf215546Sopenharmony_ci 2226bf215546Sopenharmony_ci *buffers = S.gpu; 2227bf215546Sopenharmony_ci return T.gpu; 2228bf215546Sopenharmony_ci} 2229bf215546Sopenharmony_ci 2230bf215546Sopenharmony_cistatic mali_ptr 2231bf215546Sopenharmony_cipanfrost_emit_varyings(struct panfrost_batch *batch, 2232bf215546Sopenharmony_ci struct mali_attribute_buffer_packed *slot, 2233bf215546Sopenharmony_ci unsigned stride, unsigned count) 2234bf215546Sopenharmony_ci{ 2235bf215546Sopenharmony_ci unsigned size = stride * count; 2236bf215546Sopenharmony_ci mali_ptr ptr = 2237bf215546Sopenharmony_ci batch->ctx->indirect_draw ? 0 : 2238bf215546Sopenharmony_ci pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu; 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci pan_pack(slot, ATTRIBUTE_BUFFER, cfg) { 2241bf215546Sopenharmony_ci cfg.stride = stride; 2242bf215546Sopenharmony_ci cfg.size = size; 2243bf215546Sopenharmony_ci cfg.pointer = ptr; 2244bf215546Sopenharmony_ci } 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_ci return ptr; 2247bf215546Sopenharmony_ci} 2248bf215546Sopenharmony_ci 2249bf215546Sopenharmony_ci/* Given a varying, figure out which index it corresponds to */ 2250bf215546Sopenharmony_ci 2251bf215546Sopenharmony_cistatic inline unsigned 2252bf215546Sopenharmony_cipan_varying_index(unsigned present, enum pan_special_varying v) 2253bf215546Sopenharmony_ci{ 2254bf215546Sopenharmony_ci return util_bitcount(present & BITFIELD_MASK(v)); 2255bf215546Sopenharmony_ci} 2256bf215546Sopenharmony_ci 2257bf215546Sopenharmony_ci/* Determines which varying buffers are required */ 2258bf215546Sopenharmony_ci 2259bf215546Sopenharmony_cistatic inline unsigned 2260bf215546Sopenharmony_cipan_varying_present(const struct panfrost_device *dev, 2261bf215546Sopenharmony_ci struct pan_shader_info *producer, 2262bf215546Sopenharmony_ci struct pan_shader_info *consumer, 2263bf215546Sopenharmony_ci uint16_t point_coord_mask) 2264bf215546Sopenharmony_ci{ 2265bf215546Sopenharmony_ci /* At the moment we always emit general and position buffers. Not 2266bf215546Sopenharmony_ci * strictly necessary but usually harmless */ 2267bf215546Sopenharmony_ci 2268bf215546Sopenharmony_ci unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION); 2269bf215546Sopenharmony_ci 2270bf215546Sopenharmony_ci /* Enable special buffers by the shader info */ 2271bf215546Sopenharmony_ci 2272bf215546Sopenharmony_ci if (producer->vs.writes_point_size) 2273bf215546Sopenharmony_ci present |= BITFIELD_BIT(PAN_VARY_PSIZ); 2274bf215546Sopenharmony_ci 2275bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2276bf215546Sopenharmony_ci /* On Midgard, these exist as real varyings. Later architectures use 2277bf215546Sopenharmony_ci * LD_VAR_SPECIAL reads instead. */ 2278bf215546Sopenharmony_ci 2279bf215546Sopenharmony_ci if (consumer->fs.reads_point_coord) 2280bf215546Sopenharmony_ci present |= BITFIELD_BIT(PAN_VARY_PNTCOORD); 2281bf215546Sopenharmony_ci 2282bf215546Sopenharmony_ci if (consumer->fs.reads_face) 2283bf215546Sopenharmony_ci present |= BITFIELD_BIT(PAN_VARY_FACE); 2284bf215546Sopenharmony_ci 2285bf215546Sopenharmony_ci if (consumer->fs.reads_frag_coord) 2286bf215546Sopenharmony_ci present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD); 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci /* Also, if we have a point sprite, we need a point coord buffer */ 2289bf215546Sopenharmony_ci 2290bf215546Sopenharmony_ci for (unsigned i = 0; i < consumer->varyings.input_count; i++) { 2291bf215546Sopenharmony_ci gl_varying_slot loc = consumer->varyings.input[i].location; 2292bf215546Sopenharmony_ci 2293bf215546Sopenharmony_ci if (util_varying_is_point_coord(loc, point_coord_mask)) 2294bf215546Sopenharmony_ci present |= BITFIELD_BIT(PAN_VARY_PNTCOORD); 2295bf215546Sopenharmony_ci } 2296bf215546Sopenharmony_ci#endif 2297bf215546Sopenharmony_ci 2298bf215546Sopenharmony_ci return present; 2299bf215546Sopenharmony_ci} 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_ci/* Emitters for varying records */ 2302bf215546Sopenharmony_ci 2303bf215546Sopenharmony_cistatic void 2304bf215546Sopenharmony_cipan_emit_vary(const struct panfrost_device *dev, 2305bf215546Sopenharmony_ci struct mali_attribute_packed *out, 2306bf215546Sopenharmony_ci unsigned buffer_index, 2307bf215546Sopenharmony_ci mali_pixel_format format, unsigned offset) 2308bf215546Sopenharmony_ci{ 2309bf215546Sopenharmony_ci pan_pack(out, ATTRIBUTE, cfg) { 2310bf215546Sopenharmony_ci cfg.buffer_index = buffer_index; 2311bf215546Sopenharmony_ci cfg.offset_enable = (PAN_ARCH <= 5); 2312bf215546Sopenharmony_ci cfg.format = format; 2313bf215546Sopenharmony_ci cfg.offset = offset; 2314bf215546Sopenharmony_ci } 2315bf215546Sopenharmony_ci} 2316bf215546Sopenharmony_ci 2317bf215546Sopenharmony_ci/* Special records */ 2318bf215546Sopenharmony_ci 2319bf215546Sopenharmony_cistatic const struct { 2320bf215546Sopenharmony_ci unsigned components; 2321bf215546Sopenharmony_ci enum mali_format format; 2322bf215546Sopenharmony_ci} pan_varying_formats[PAN_VARY_MAX] = { 2323bf215546Sopenharmony_ci [PAN_VARY_POSITION] = { 4, MALI_SNAP_4 }, 2324bf215546Sopenharmony_ci [PAN_VARY_PSIZ] = { 1, MALI_R16F }, 2325bf215546Sopenharmony_ci [PAN_VARY_PNTCOORD] = { 1, MALI_R16F }, 2326bf215546Sopenharmony_ci [PAN_VARY_FACE] = { 1, MALI_R32I }, 2327bf215546Sopenharmony_ci [PAN_VARY_FRAGCOORD] = { 4, MALI_RGBA32F }, 2328bf215546Sopenharmony_ci}; 2329bf215546Sopenharmony_ci 2330bf215546Sopenharmony_cistatic mali_pixel_format 2331bf215546Sopenharmony_cipan_special_format(const struct panfrost_device *dev, 2332bf215546Sopenharmony_ci enum pan_special_varying buf) 2333bf215546Sopenharmony_ci{ 2334bf215546Sopenharmony_ci assert(buf < PAN_VARY_MAX); 2335bf215546Sopenharmony_ci mali_pixel_format format = (pan_varying_formats[buf].format << 12); 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_ci#if PAN_ARCH <= 6 2338bf215546Sopenharmony_ci unsigned nr = pan_varying_formats[buf].components; 2339bf215546Sopenharmony_ci format |= panfrost_get_default_swizzle(nr); 2340bf215546Sopenharmony_ci#endif 2341bf215546Sopenharmony_ci 2342bf215546Sopenharmony_ci return format; 2343bf215546Sopenharmony_ci} 2344bf215546Sopenharmony_ci 2345bf215546Sopenharmony_cistatic void 2346bf215546Sopenharmony_cipan_emit_vary_special(const struct panfrost_device *dev, 2347bf215546Sopenharmony_ci struct mali_attribute_packed *out, 2348bf215546Sopenharmony_ci unsigned present, enum pan_special_varying buf) 2349bf215546Sopenharmony_ci{ 2350bf215546Sopenharmony_ci pan_emit_vary(dev, out, pan_varying_index(present, buf), 2351bf215546Sopenharmony_ci pan_special_format(dev, buf), 0); 2352bf215546Sopenharmony_ci} 2353bf215546Sopenharmony_ci 2354bf215546Sopenharmony_ci/* Negative indicates a varying is not found */ 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_cistatic signed 2357bf215546Sopenharmony_cipan_find_vary(const struct pan_shader_varying *vary, 2358bf215546Sopenharmony_ci unsigned vary_count, unsigned loc) 2359bf215546Sopenharmony_ci{ 2360bf215546Sopenharmony_ci for (unsigned i = 0; i < vary_count; ++i) { 2361bf215546Sopenharmony_ci if (vary[i].location == loc) 2362bf215546Sopenharmony_ci return i; 2363bf215546Sopenharmony_ci } 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci return -1; 2366bf215546Sopenharmony_ci} 2367bf215546Sopenharmony_ci 2368bf215546Sopenharmony_ci/* Assign varying locations for the general buffer. Returns the calculated 2369bf215546Sopenharmony_ci * per-vertex stride, and outputs offsets into the passed array. Negative 2370bf215546Sopenharmony_ci * offset indicates a varying is not used. */ 2371bf215546Sopenharmony_ci 2372bf215546Sopenharmony_cistatic unsigned 2373bf215546Sopenharmony_cipan_assign_varyings(const struct panfrost_device *dev, 2374bf215546Sopenharmony_ci struct pan_shader_info *producer, 2375bf215546Sopenharmony_ci struct pan_shader_info *consumer, 2376bf215546Sopenharmony_ci signed *offsets) 2377bf215546Sopenharmony_ci{ 2378bf215546Sopenharmony_ci unsigned producer_count = producer->varyings.output_count; 2379bf215546Sopenharmony_ci unsigned consumer_count = consumer->varyings.input_count; 2380bf215546Sopenharmony_ci 2381bf215546Sopenharmony_ci const struct pan_shader_varying *producer_vars = producer->varyings.output; 2382bf215546Sopenharmony_ci const struct pan_shader_varying *consumer_vars = consumer->varyings.input; 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_ci unsigned stride = 0; 2385bf215546Sopenharmony_ci 2386bf215546Sopenharmony_ci for (unsigned i = 0; i < producer_count; ++i) { 2387bf215546Sopenharmony_ci signed loc = pan_find_vary(consumer_vars, consumer_count, 2388bf215546Sopenharmony_ci producer_vars[i].location); 2389bf215546Sopenharmony_ci 2390bf215546Sopenharmony_ci if (loc >= 0) { 2391bf215546Sopenharmony_ci offsets[i] = stride; 2392bf215546Sopenharmony_ci 2393bf215546Sopenharmony_ci enum pipe_format format = consumer_vars[loc].format; 2394bf215546Sopenharmony_ci stride += util_format_get_blocksize(format); 2395bf215546Sopenharmony_ci } else { 2396bf215546Sopenharmony_ci offsets[i] = -1; 2397bf215546Sopenharmony_ci } 2398bf215546Sopenharmony_ci } 2399bf215546Sopenharmony_ci 2400bf215546Sopenharmony_ci return stride; 2401bf215546Sopenharmony_ci} 2402bf215546Sopenharmony_ci 2403bf215546Sopenharmony_ci/* Emitter for a single varying (attribute) descriptor */ 2404bf215546Sopenharmony_ci 2405bf215546Sopenharmony_cistatic void 2406bf215546Sopenharmony_cipanfrost_emit_varying(const struct panfrost_device *dev, 2407bf215546Sopenharmony_ci struct mali_attribute_packed *out, 2408bf215546Sopenharmony_ci const struct pan_shader_varying varying, 2409bf215546Sopenharmony_ci enum pipe_format pipe_format, 2410bf215546Sopenharmony_ci unsigned present, 2411bf215546Sopenharmony_ci uint16_t point_sprite_mask, 2412bf215546Sopenharmony_ci signed offset, 2413bf215546Sopenharmony_ci enum pan_special_varying pos_varying) 2414bf215546Sopenharmony_ci{ 2415bf215546Sopenharmony_ci /* Note: varying.format != pipe_format in some obscure cases due to a 2416bf215546Sopenharmony_ci * limitation of the NIR linker. This should be fixed in the future to 2417bf215546Sopenharmony_ci * eliminate the additional lookups. See: 2418bf215546Sopenharmony_ci * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex 2419bf215546Sopenharmony_ci */ 2420bf215546Sopenharmony_ci gl_varying_slot loc = varying.location; 2421bf215546Sopenharmony_ci mali_pixel_format format = dev->formats[pipe_format].hw; 2422bf215546Sopenharmony_ci 2423bf215546Sopenharmony_ci if (util_varying_is_point_coord(loc, point_sprite_mask)) { 2424bf215546Sopenharmony_ci pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD); 2425bf215546Sopenharmony_ci } else if (loc == VARYING_SLOT_POS) { 2426bf215546Sopenharmony_ci pan_emit_vary_special(dev, out, present, pos_varying); 2427bf215546Sopenharmony_ci } else if (loc == VARYING_SLOT_PSIZ) { 2428bf215546Sopenharmony_ci pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ); 2429bf215546Sopenharmony_ci } else if (loc == VARYING_SLOT_FACE) { 2430bf215546Sopenharmony_ci pan_emit_vary_special(dev, out, present, PAN_VARY_FACE); 2431bf215546Sopenharmony_ci } else if (offset < 0) { 2432bf215546Sopenharmony_ci pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0); 2433bf215546Sopenharmony_ci } else { 2434bf215546Sopenharmony_ci STATIC_ASSERT(PAN_VARY_GENERAL == 0); 2435bf215546Sopenharmony_ci pan_emit_vary(dev, out, 0, format, offset); 2436bf215546Sopenharmony_ci } 2437bf215546Sopenharmony_ci} 2438bf215546Sopenharmony_ci 2439bf215546Sopenharmony_ci/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time, 2440bf215546Sopenharmony_ci * rather than draw time (under good conditions). */ 2441bf215546Sopenharmony_ci 2442bf215546Sopenharmony_cistatic void 2443bf215546Sopenharmony_cipanfrost_emit_varying_descs( 2444bf215546Sopenharmony_ci struct panfrost_pool *pool, 2445bf215546Sopenharmony_ci struct panfrost_shader_state *producer, 2446bf215546Sopenharmony_ci struct panfrost_shader_state *consumer, 2447bf215546Sopenharmony_ci uint16_t point_coord_mask, 2448bf215546Sopenharmony_ci struct pan_linkage *out) 2449bf215546Sopenharmony_ci{ 2450bf215546Sopenharmony_ci struct panfrost_device *dev = pool->base.dev; 2451bf215546Sopenharmony_ci unsigned producer_count = producer->info.varyings.output_count; 2452bf215546Sopenharmony_ci unsigned consumer_count = consumer->info.varyings.input_count; 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci /* Offsets within the general varying buffer, indexed by location */ 2455bf215546Sopenharmony_ci signed offsets[PAN_MAX_VARYINGS]; 2456bf215546Sopenharmony_ci assert(producer_count <= ARRAY_SIZE(offsets)); 2457bf215546Sopenharmony_ci assert(consumer_count <= ARRAY_SIZE(offsets)); 2458bf215546Sopenharmony_ci 2459bf215546Sopenharmony_ci /* Allocate enough descriptors for both shader stages */ 2460bf215546Sopenharmony_ci struct panfrost_ptr T = 2461bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&pool->base, 2462bf215546Sopenharmony_ci producer_count + consumer_count, 2463bf215546Sopenharmony_ci ATTRIBUTE); 2464bf215546Sopenharmony_ci 2465bf215546Sopenharmony_ci /* Take a reference if we're being put on the CSO */ 2466bf215546Sopenharmony_ci if (!pool->owned) { 2467bf215546Sopenharmony_ci out->bo = pool->transient_bo; 2468bf215546Sopenharmony_ci panfrost_bo_reference(out->bo); 2469bf215546Sopenharmony_ci } 2470bf215546Sopenharmony_ci 2471bf215546Sopenharmony_ci struct mali_attribute_packed *descs = T.cpu; 2472bf215546Sopenharmony_ci out->producer = producer_count ? T.gpu : 0; 2473bf215546Sopenharmony_ci out->consumer = consumer_count ? T.gpu + 2474bf215546Sopenharmony_ci (pan_size(ATTRIBUTE) * producer_count) : 0; 2475bf215546Sopenharmony_ci 2476bf215546Sopenharmony_ci /* Lay out the varyings. Must use producer to lay out, in order to 2477bf215546Sopenharmony_ci * respect transform feedback precisions. */ 2478bf215546Sopenharmony_ci out->present = pan_varying_present(dev, &producer->info, 2479bf215546Sopenharmony_ci &consumer->info, point_coord_mask); 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci out->stride = pan_assign_varyings(dev, &producer->info, 2482bf215546Sopenharmony_ci &consumer->info, offsets); 2483bf215546Sopenharmony_ci 2484bf215546Sopenharmony_ci for (unsigned i = 0; i < producer_count; ++i) { 2485bf215546Sopenharmony_ci signed j = pan_find_vary(consumer->info.varyings.input, 2486bf215546Sopenharmony_ci consumer->info.varyings.input_count, 2487bf215546Sopenharmony_ci producer->info.varyings.output[i].location); 2488bf215546Sopenharmony_ci 2489bf215546Sopenharmony_ci enum pipe_format format = (j >= 0) ? 2490bf215546Sopenharmony_ci consumer->info.varyings.input[j].format : 2491bf215546Sopenharmony_ci producer->info.varyings.output[i].format; 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci panfrost_emit_varying(dev, descs + i, 2494bf215546Sopenharmony_ci producer->info.varyings.output[i], format, 2495bf215546Sopenharmony_ci out->present, 0, offsets[i], PAN_VARY_POSITION); 2496bf215546Sopenharmony_ci } 2497bf215546Sopenharmony_ci 2498bf215546Sopenharmony_ci for (unsigned i = 0; i < consumer_count; ++i) { 2499bf215546Sopenharmony_ci signed j = pan_find_vary(producer->info.varyings.output, 2500bf215546Sopenharmony_ci producer->info.varyings.output_count, 2501bf215546Sopenharmony_ci consumer->info.varyings.input[i].location); 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci signed offset = (j >= 0) ? offsets[j] : -1; 2504bf215546Sopenharmony_ci 2505bf215546Sopenharmony_ci panfrost_emit_varying(dev, descs + producer_count + i, 2506bf215546Sopenharmony_ci consumer->info.varyings.input[i], 2507bf215546Sopenharmony_ci consumer->info.varyings.input[i].format, 2508bf215546Sopenharmony_ci out->present, point_coord_mask, 2509bf215546Sopenharmony_ci offset, PAN_VARY_FRAGCOORD); 2510bf215546Sopenharmony_ci } 2511bf215546Sopenharmony_ci} 2512bf215546Sopenharmony_ci 2513bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2514bf215546Sopenharmony_cistatic void 2515bf215546Sopenharmony_cipan_emit_special_input(struct mali_attribute_buffer_packed *out, 2516bf215546Sopenharmony_ci unsigned present, 2517bf215546Sopenharmony_ci enum pan_special_varying v, 2518bf215546Sopenharmony_ci unsigned special) 2519bf215546Sopenharmony_ci{ 2520bf215546Sopenharmony_ci if (present & BITFIELD_BIT(v)) { 2521bf215546Sopenharmony_ci unsigned idx = pan_varying_index(present, v); 2522bf215546Sopenharmony_ci 2523bf215546Sopenharmony_ci pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) { 2524bf215546Sopenharmony_ci cfg.special = special; 2525bf215546Sopenharmony_ci cfg.type = 0; 2526bf215546Sopenharmony_ci } 2527bf215546Sopenharmony_ci } 2528bf215546Sopenharmony_ci} 2529bf215546Sopenharmony_ci#endif 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_cistatic void 2532bf215546Sopenharmony_cipanfrost_emit_varying_descriptor(struct panfrost_batch *batch, 2533bf215546Sopenharmony_ci unsigned vertex_count, 2534bf215546Sopenharmony_ci mali_ptr *vs_attribs, 2535bf215546Sopenharmony_ci mali_ptr *fs_attribs, 2536bf215546Sopenharmony_ci mali_ptr *buffers, 2537bf215546Sopenharmony_ci unsigned *buffer_count, 2538bf215546Sopenharmony_ci mali_ptr *position, 2539bf215546Sopenharmony_ci mali_ptr *psiz, 2540bf215546Sopenharmony_ci bool point_coord_replace) 2541bf215546Sopenharmony_ci{ 2542bf215546Sopenharmony_ci /* Load the shaders */ 2543bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 2544bf215546Sopenharmony_ci struct panfrost_shader_state *vs, *fs; 2545bf215546Sopenharmony_ci 2546bf215546Sopenharmony_ci vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 2547bf215546Sopenharmony_ci fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 2548bf215546Sopenharmony_ci 2549bf215546Sopenharmony_ci uint16_t point_coord_mask = 0; 2550bf215546Sopenharmony_ci 2551bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2552bf215546Sopenharmony_ci /* Point sprites are lowered on Bifrost and newer */ 2553bf215546Sopenharmony_ci if (point_coord_replace) 2554bf215546Sopenharmony_ci point_coord_mask = ctx->rasterizer->base.sprite_coord_enable; 2555bf215546Sopenharmony_ci#endif 2556bf215546Sopenharmony_ci 2557bf215546Sopenharmony_ci /* In good conditions, we only need to link varyings once */ 2558bf215546Sopenharmony_ci bool prelink = 2559bf215546Sopenharmony_ci (point_coord_mask == 0) && 2560bf215546Sopenharmony_ci !vs->info.separable && 2561bf215546Sopenharmony_ci !fs->info.separable; 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci /* Try to reduce copies */ 2564bf215546Sopenharmony_ci struct pan_linkage _linkage; 2565bf215546Sopenharmony_ci struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage; 2566bf215546Sopenharmony_ci 2567bf215546Sopenharmony_ci /* Emit ATTRIBUTE descriptors if needed */ 2568bf215546Sopenharmony_ci if (!prelink || vs->linkage.bo == NULL) { 2569bf215546Sopenharmony_ci struct panfrost_pool *pool = 2570bf215546Sopenharmony_ci prelink ? &ctx->descs : &batch->pool; 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci panfrost_emit_varying_descs(pool, vs, fs, point_coord_mask, linkage); 2573bf215546Sopenharmony_ci } 2574bf215546Sopenharmony_ci 2575bf215546Sopenharmony_ci unsigned present = linkage->present, stride = linkage->stride; 2576bf215546Sopenharmony_ci unsigned count = util_bitcount(present); 2577bf215546Sopenharmony_ci struct panfrost_ptr T = 2578bf215546Sopenharmony_ci pan_pool_alloc_desc_array(&batch->pool.base, 2579bf215546Sopenharmony_ci count + 1, 2580bf215546Sopenharmony_ci ATTRIBUTE_BUFFER); 2581bf215546Sopenharmony_ci struct mali_attribute_buffer_packed *varyings = 2582bf215546Sopenharmony_ci (struct mali_attribute_buffer_packed *) T.cpu; 2583bf215546Sopenharmony_ci 2584bf215546Sopenharmony_ci if (buffer_count) 2585bf215546Sopenharmony_ci *buffer_count = count; 2586bf215546Sopenharmony_ci 2587bf215546Sopenharmony_ci#if PAN_ARCH >= 6 2588bf215546Sopenharmony_ci /* Suppress prefetch on Bifrost */ 2589bf215546Sopenharmony_ci memset(varyings + count, 0, sizeof(*varyings)); 2590bf215546Sopenharmony_ci#endif 2591bf215546Sopenharmony_ci 2592bf215546Sopenharmony_ci if (stride) { 2593bf215546Sopenharmony_ci panfrost_emit_varyings(batch, 2594bf215546Sopenharmony_ci &varyings[pan_varying_index(present, PAN_VARY_GENERAL)], 2595bf215546Sopenharmony_ci stride, vertex_count); 2596bf215546Sopenharmony_ci } else { 2597bf215546Sopenharmony_ci /* The indirect draw code reads the stride field, make sure 2598bf215546Sopenharmony_ci * that it is initialised */ 2599bf215546Sopenharmony_ci memset(varyings + pan_varying_index(present, PAN_VARY_GENERAL), 0, 2600bf215546Sopenharmony_ci sizeof(*varyings)); 2601bf215546Sopenharmony_ci } 2602bf215546Sopenharmony_ci 2603bf215546Sopenharmony_ci /* fp32 vec4 gl_Position */ 2604bf215546Sopenharmony_ci *position = panfrost_emit_varyings(batch, 2605bf215546Sopenharmony_ci &varyings[pan_varying_index(present, PAN_VARY_POSITION)], 2606bf215546Sopenharmony_ci sizeof(float) * 4, vertex_count); 2607bf215546Sopenharmony_ci 2608bf215546Sopenharmony_ci if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) { 2609bf215546Sopenharmony_ci *psiz = panfrost_emit_varyings(batch, 2610bf215546Sopenharmony_ci &varyings[pan_varying_index(present, PAN_VARY_PSIZ)], 2611bf215546Sopenharmony_ci 2, vertex_count); 2612bf215546Sopenharmony_ci } 2613bf215546Sopenharmony_ci 2614bf215546Sopenharmony_ci#if PAN_ARCH <= 5 2615bf215546Sopenharmony_ci pan_emit_special_input(varyings, present, 2616bf215546Sopenharmony_ci PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD); 2617bf215546Sopenharmony_ci pan_emit_special_input(varyings, present, PAN_VARY_FACE, 2618bf215546Sopenharmony_ci MALI_ATTRIBUTE_SPECIAL_FRONT_FACING); 2619bf215546Sopenharmony_ci pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, 2620bf215546Sopenharmony_ci MALI_ATTRIBUTE_SPECIAL_FRAG_COORD); 2621bf215546Sopenharmony_ci#endif 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci *buffers = T.gpu; 2624bf215546Sopenharmony_ci *vs_attribs = linkage->producer; 2625bf215546Sopenharmony_ci *fs_attribs = linkage->consumer; 2626bf215546Sopenharmony_ci} 2627bf215546Sopenharmony_ci 2628bf215546Sopenharmony_ci/* 2629bf215546Sopenharmony_ci * Emit jobs required for the rasterization pipeline. If there are side effects 2630bf215546Sopenharmony_ci * from the vertex shader, these are handled ahead-of-time with a compute 2631bf215546Sopenharmony_ci * shader. This function should not be called if rasterization is skipped. 2632bf215546Sopenharmony_ci */ 2633bf215546Sopenharmony_cistatic void 2634bf215546Sopenharmony_cipanfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch, 2635bf215546Sopenharmony_ci const struct panfrost_ptr *vertex_job, 2636bf215546Sopenharmony_ci const struct panfrost_ptr *tiler_job) 2637bf215546Sopenharmony_ci{ 2638bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 2639bf215546Sopenharmony_ci 2640bf215546Sopenharmony_ci /* XXX - set job_barrier in case buffers get ping-ponged and we need to 2641bf215546Sopenharmony_ci * enforce ordering, this has a perf hit! See 2642bf215546Sopenharmony_ci * KHR-GLES31.core.vertex_attrib_binding.advanced-iterations 2643bf215546Sopenharmony_ci */ 2644bf215546Sopenharmony_ci unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard, 2645bf215546Sopenharmony_ci MALI_JOB_TYPE_VERTEX, true, false, 2646bf215546Sopenharmony_ci ctx->indirect_draw ? 2647bf215546Sopenharmony_ci batch->indirect_draw_job_id : 0, 2648bf215546Sopenharmony_ci 0, vertex_job, false); 2649bf215546Sopenharmony_ci 2650bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, 2651bf215546Sopenharmony_ci MALI_JOB_TYPE_TILER, false, false, 2652bf215546Sopenharmony_ci vertex, 0, tiler_job, false); 2653bf215546Sopenharmony_ci} 2654bf215546Sopenharmony_ci#endif 2655bf215546Sopenharmony_ci 2656bf215546Sopenharmony_cistatic void 2657bf215546Sopenharmony_ciemit_tls(struct panfrost_batch *batch) 2658bf215546Sopenharmony_ci{ 2659bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(batch->ctx->base.screen); 2660bf215546Sopenharmony_ci 2661bf215546Sopenharmony_ci /* Emitted with the FB descriptor on Midgard. */ 2662bf215546Sopenharmony_ci if (PAN_ARCH <= 5 && batch->framebuffer.gpu) 2663bf215546Sopenharmony_ci return; 2664bf215546Sopenharmony_ci 2665bf215546Sopenharmony_ci struct panfrost_bo *tls_bo = 2666bf215546Sopenharmony_ci batch->stack_size ? 2667bf215546Sopenharmony_ci panfrost_batch_get_scratchpad(batch, 2668bf215546Sopenharmony_ci batch->stack_size, 2669bf215546Sopenharmony_ci dev->thread_tls_alloc, 2670bf215546Sopenharmony_ci dev->core_id_range): 2671bf215546Sopenharmony_ci NULL; 2672bf215546Sopenharmony_ci struct pan_tls_info tls = { 2673bf215546Sopenharmony_ci .tls = { 2674bf215546Sopenharmony_ci .ptr = tls_bo ? tls_bo->ptr.gpu : 0, 2675bf215546Sopenharmony_ci .size = batch->stack_size, 2676bf215546Sopenharmony_ci }, 2677bf215546Sopenharmony_ci }; 2678bf215546Sopenharmony_ci 2679bf215546Sopenharmony_ci assert(batch->tls.cpu); 2680bf215546Sopenharmony_ci GENX(pan_emit_tls)(&tls, batch->tls.cpu); 2681bf215546Sopenharmony_ci} 2682bf215546Sopenharmony_ci 2683bf215546Sopenharmony_cistatic void 2684bf215546Sopenharmony_ciemit_fbd(struct panfrost_batch *batch, const struct pan_fb_info *fb) 2685bf215546Sopenharmony_ci{ 2686bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(batch->ctx->base.screen); 2687bf215546Sopenharmony_ci struct panfrost_bo *tls_bo = 2688bf215546Sopenharmony_ci batch->stack_size ? 2689bf215546Sopenharmony_ci panfrost_batch_get_scratchpad(batch, 2690bf215546Sopenharmony_ci batch->stack_size, 2691bf215546Sopenharmony_ci dev->thread_tls_alloc, 2692bf215546Sopenharmony_ci dev->core_id_range): 2693bf215546Sopenharmony_ci NULL; 2694bf215546Sopenharmony_ci struct pan_tls_info tls = { 2695bf215546Sopenharmony_ci .tls = { 2696bf215546Sopenharmony_ci .ptr = tls_bo ? tls_bo->ptr.gpu : 0, 2697bf215546Sopenharmony_ci .size = batch->stack_size, 2698bf215546Sopenharmony_ci }, 2699bf215546Sopenharmony_ci }; 2700bf215546Sopenharmony_ci 2701bf215546Sopenharmony_ci batch->framebuffer.gpu |= 2702bf215546Sopenharmony_ci GENX(pan_emit_fbd)(dev, fb, &tls, &batch->tiler_ctx, 2703bf215546Sopenharmony_ci batch->framebuffer.cpu); 2704bf215546Sopenharmony_ci} 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci/* Mark a surface as written */ 2707bf215546Sopenharmony_ci 2708bf215546Sopenharmony_cistatic void 2709bf215546Sopenharmony_cipanfrost_initialize_surface(struct panfrost_batch *batch, 2710bf215546Sopenharmony_ci struct pipe_surface *surf) 2711bf215546Sopenharmony_ci{ 2712bf215546Sopenharmony_ci if (surf) { 2713bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(surf->texture); 2714bf215546Sopenharmony_ci BITSET_SET(rsrc->valid.data, surf->u.tex.level); 2715bf215546Sopenharmony_ci } 2716bf215546Sopenharmony_ci} 2717bf215546Sopenharmony_ci 2718bf215546Sopenharmony_ci/* Generate a fragment job. This should be called once per frame. (Usually, 2719bf215546Sopenharmony_ci * this corresponds to eglSwapBuffers or one of glFlush, glFinish) 2720bf215546Sopenharmony_ci */ 2721bf215546Sopenharmony_cistatic mali_ptr 2722bf215546Sopenharmony_ciemit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb) 2723bf215546Sopenharmony_ci{ 2724bf215546Sopenharmony_ci /* Mark the affected buffers as initialized, since we're writing to it. 2725bf215546Sopenharmony_ci * Also, add the surfaces we're writing to to the batch */ 2726bf215546Sopenharmony_ci 2727bf215546Sopenharmony_ci struct pipe_framebuffer_state *fb = &batch->key; 2728bf215546Sopenharmony_ci 2729bf215546Sopenharmony_ci for (unsigned i = 0; i < fb->nr_cbufs; ++i) 2730bf215546Sopenharmony_ci panfrost_initialize_surface(batch, fb->cbufs[i]); 2731bf215546Sopenharmony_ci 2732bf215546Sopenharmony_ci panfrost_initialize_surface(batch, fb->zsbuf); 2733bf215546Sopenharmony_ci 2734bf215546Sopenharmony_ci /* The passed tile coords can be out of range in some cases, so we need 2735bf215546Sopenharmony_ci * to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT. 2736bf215546Sopenharmony_ci * Theoretically we also need to clamp the coordinates positive, but we 2737bf215546Sopenharmony_ci * avoid that edge case as all four values are unsigned. Also, 2738bf215546Sopenharmony_ci * theoretically we could clamp the minima, but if that has to happen 2739bf215546Sopenharmony_ci * the asserts would fail anyway (since the maxima would get clamped 2740bf215546Sopenharmony_ci * and then be smaller than the minima). An edge case of sorts occurs 2741bf215546Sopenharmony_ci * when no scissors are added to draw, so by default min=~0 and max=0. 2742bf215546Sopenharmony_ci * But that can't happen if any actual drawing occurs (beyond a 2743bf215546Sopenharmony_ci * wallpaper reload), so this is again irrelevant in practice. */ 2744bf215546Sopenharmony_ci 2745bf215546Sopenharmony_ci batch->maxx = MIN2(batch->maxx, fb->width); 2746bf215546Sopenharmony_ci batch->maxy = MIN2(batch->maxy, fb->height); 2747bf215546Sopenharmony_ci 2748bf215546Sopenharmony_ci /* Rendering region must be at least 1x1; otherwise, there is nothing 2749bf215546Sopenharmony_ci * to do and the whole job chain should have been discarded. */ 2750bf215546Sopenharmony_ci 2751bf215546Sopenharmony_ci assert(batch->maxx > batch->minx); 2752bf215546Sopenharmony_ci assert(batch->maxy > batch->miny); 2753bf215546Sopenharmony_ci 2754bf215546Sopenharmony_ci struct panfrost_ptr transfer = 2755bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB); 2756bf215546Sopenharmony_ci 2757bf215546Sopenharmony_ci GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu, 2758bf215546Sopenharmony_ci transfer.cpu); 2759bf215546Sopenharmony_ci 2760bf215546Sopenharmony_ci return transfer.gpu; 2761bf215546Sopenharmony_ci} 2762bf215546Sopenharmony_ci 2763bf215546Sopenharmony_ci#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c; 2764bf215546Sopenharmony_ci 2765bf215546Sopenharmony_cistatic uint8_t 2766bf215546Sopenharmony_cipan_draw_mode(enum pipe_prim_type mode) 2767bf215546Sopenharmony_ci{ 2768bf215546Sopenharmony_ci switch (mode) { 2769bf215546Sopenharmony_ci DEFINE_CASE(POINTS); 2770bf215546Sopenharmony_ci DEFINE_CASE(LINES); 2771bf215546Sopenharmony_ci DEFINE_CASE(LINE_LOOP); 2772bf215546Sopenharmony_ci DEFINE_CASE(LINE_STRIP); 2773bf215546Sopenharmony_ci DEFINE_CASE(TRIANGLES); 2774bf215546Sopenharmony_ci DEFINE_CASE(TRIANGLE_STRIP); 2775bf215546Sopenharmony_ci DEFINE_CASE(TRIANGLE_FAN); 2776bf215546Sopenharmony_ci DEFINE_CASE(QUADS); 2777bf215546Sopenharmony_ci DEFINE_CASE(POLYGON); 2778bf215546Sopenharmony_ci#if PAN_ARCH <= 6 2779bf215546Sopenharmony_ci DEFINE_CASE(QUAD_STRIP); 2780bf215546Sopenharmony_ci#endif 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci default: 2783bf215546Sopenharmony_ci unreachable("Invalid draw mode"); 2784bf215546Sopenharmony_ci } 2785bf215546Sopenharmony_ci} 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_ci#undef DEFINE_CASE 2788bf215546Sopenharmony_ci 2789bf215546Sopenharmony_ci/* Count generated primitives (when there is no geom/tess shaders) for 2790bf215546Sopenharmony_ci * transform feedback */ 2791bf215546Sopenharmony_ci 2792bf215546Sopenharmony_cistatic void 2793bf215546Sopenharmony_cipanfrost_statistics_record( 2794bf215546Sopenharmony_ci struct panfrost_context *ctx, 2795bf215546Sopenharmony_ci const struct pipe_draw_info *info, 2796bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) 2797bf215546Sopenharmony_ci{ 2798bf215546Sopenharmony_ci if (!ctx->active_queries) 2799bf215546Sopenharmony_ci return; 2800bf215546Sopenharmony_ci 2801bf215546Sopenharmony_ci uint32_t prims = u_prims_for_vertices(info->mode, draw->count); 2802bf215546Sopenharmony_ci ctx->prims_generated += prims; 2803bf215546Sopenharmony_ci 2804bf215546Sopenharmony_ci if (!ctx->streamout.num_targets) 2805bf215546Sopenharmony_ci return; 2806bf215546Sopenharmony_ci 2807bf215546Sopenharmony_ci ctx->tf_prims_generated += prims; 2808bf215546Sopenharmony_ci ctx->dirty |= PAN_DIRTY_SO; 2809bf215546Sopenharmony_ci} 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_cistatic void 2812bf215546Sopenharmony_cipanfrost_update_streamout_offsets(struct panfrost_context *ctx) 2813bf215546Sopenharmony_ci{ 2814bf215546Sopenharmony_ci unsigned count = u_stream_outputs_for_vertices(ctx->active_prim, 2815bf215546Sopenharmony_ci ctx->vertex_count); 2816bf215546Sopenharmony_ci 2817bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) { 2818bf215546Sopenharmony_ci if (!ctx->streamout.targets[i]) 2819bf215546Sopenharmony_ci continue; 2820bf215546Sopenharmony_ci 2821bf215546Sopenharmony_ci pan_so_target(ctx->streamout.targets[i])->offset += count; 2822bf215546Sopenharmony_ci } 2823bf215546Sopenharmony_ci} 2824bf215546Sopenharmony_ci 2825bf215546Sopenharmony_cistatic inline enum mali_index_type 2826bf215546Sopenharmony_cipanfrost_translate_index_size(unsigned size) 2827bf215546Sopenharmony_ci{ 2828bf215546Sopenharmony_ci STATIC_ASSERT(MALI_INDEX_TYPE_NONE == 0); 2829bf215546Sopenharmony_ci STATIC_ASSERT(MALI_INDEX_TYPE_UINT8 == 1); 2830bf215546Sopenharmony_ci STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2); 2831bf215546Sopenharmony_ci 2832bf215546Sopenharmony_ci return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size; 2833bf215546Sopenharmony_ci} 2834bf215546Sopenharmony_ci 2835bf215546Sopenharmony_ci#if PAN_ARCH <= 7 2836bf215546Sopenharmony_cistatic inline void 2837bf215546Sopenharmony_cipan_emit_draw_descs(struct panfrost_batch *batch, 2838bf215546Sopenharmony_ci struct MALI_DRAW *d, enum pipe_shader_type st) 2839bf215546Sopenharmony_ci{ 2840bf215546Sopenharmony_ci d->offset_start = batch->ctx->offset_start; 2841bf215546Sopenharmony_ci d->instance_size = batch->ctx->instance_count > 1 ? 2842bf215546Sopenharmony_ci batch->ctx->padded_count : 1; 2843bf215546Sopenharmony_ci 2844bf215546Sopenharmony_ci d->uniform_buffers = batch->uniform_buffers[st]; 2845bf215546Sopenharmony_ci d->push_uniforms = batch->push_uniforms[st]; 2846bf215546Sopenharmony_ci d->textures = batch->textures[st]; 2847bf215546Sopenharmony_ci d->samplers = batch->samplers[st]; 2848bf215546Sopenharmony_ci} 2849bf215546Sopenharmony_ci 2850bf215546Sopenharmony_cistatic void 2851bf215546Sopenharmony_cipanfrost_draw_emit_vertex_section(struct panfrost_batch *batch, 2852bf215546Sopenharmony_ci mali_ptr vs_vary, mali_ptr varyings, 2853bf215546Sopenharmony_ci mali_ptr attribs, mali_ptr attrib_bufs, 2854bf215546Sopenharmony_ci void *section) 2855bf215546Sopenharmony_ci{ 2856bf215546Sopenharmony_ci pan_pack(section, DRAW, cfg) { 2857bf215546Sopenharmony_ci cfg.state = batch->rsd[PIPE_SHADER_VERTEX]; 2858bf215546Sopenharmony_ci cfg.attributes = attribs; 2859bf215546Sopenharmony_ci cfg.attribute_buffers = attrib_bufs; 2860bf215546Sopenharmony_ci cfg.varyings = vs_vary; 2861bf215546Sopenharmony_ci cfg.varying_buffers = vs_vary ? varyings : 0; 2862bf215546Sopenharmony_ci cfg.thread_storage = batch->tls.gpu; 2863bf215546Sopenharmony_ci pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX); 2864bf215546Sopenharmony_ci } 2865bf215546Sopenharmony_ci} 2866bf215546Sopenharmony_ci 2867bf215546Sopenharmony_cistatic void 2868bf215546Sopenharmony_cipanfrost_draw_emit_vertex(struct panfrost_batch *batch, 2869bf215546Sopenharmony_ci const struct pipe_draw_info *info, 2870bf215546Sopenharmony_ci void *invocation_template, 2871bf215546Sopenharmony_ci mali_ptr vs_vary, mali_ptr varyings, 2872bf215546Sopenharmony_ci mali_ptr attribs, mali_ptr attrib_bufs, 2873bf215546Sopenharmony_ci void *job) 2874bf215546Sopenharmony_ci{ 2875bf215546Sopenharmony_ci void *section = 2876bf215546Sopenharmony_ci pan_section_ptr(job, COMPUTE_JOB, INVOCATION); 2877bf215546Sopenharmony_ci memcpy(section, invocation_template, pan_size(INVOCATION)); 2878bf215546Sopenharmony_ci 2879bf215546Sopenharmony_ci pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { 2880bf215546Sopenharmony_ci cfg.job_task_split = 5; 2881bf215546Sopenharmony_ci } 2882bf215546Sopenharmony_ci 2883bf215546Sopenharmony_ci section = pan_section_ptr(job, COMPUTE_JOB, DRAW); 2884bf215546Sopenharmony_ci panfrost_draw_emit_vertex_section(batch, vs_vary, varyings, 2885bf215546Sopenharmony_ci attribs, attrib_bufs, section); 2886bf215546Sopenharmony_ci} 2887bf215546Sopenharmony_ci#endif 2888bf215546Sopenharmony_ci 2889bf215546Sopenharmony_cistatic void 2890bf215546Sopenharmony_cipanfrost_emit_primitive_size(struct panfrost_context *ctx, 2891bf215546Sopenharmony_ci bool points, mali_ptr size_array, 2892bf215546Sopenharmony_ci void *prim_size) 2893bf215546Sopenharmony_ci{ 2894bf215546Sopenharmony_ci struct panfrost_rasterizer *rast = ctx->rasterizer; 2895bf215546Sopenharmony_ci 2896bf215546Sopenharmony_ci pan_pack(prim_size, PRIMITIVE_SIZE, cfg) { 2897bf215546Sopenharmony_ci if (panfrost_writes_point_size(ctx)) { 2898bf215546Sopenharmony_ci cfg.size_array = size_array; 2899bf215546Sopenharmony_ci } else { 2900bf215546Sopenharmony_ci cfg.constant = points ? 2901bf215546Sopenharmony_ci rast->base.point_size : 2902bf215546Sopenharmony_ci rast->base.line_width; 2903bf215546Sopenharmony_ci } 2904bf215546Sopenharmony_ci } 2905bf215546Sopenharmony_ci} 2906bf215546Sopenharmony_ci 2907bf215546Sopenharmony_cistatic bool 2908bf215546Sopenharmony_cipanfrost_is_implicit_prim_restart(const struct pipe_draw_info *info) 2909bf215546Sopenharmony_ci{ 2910bf215546Sopenharmony_ci /* As a reminder primitive_restart should always be checked before any 2911bf215546Sopenharmony_ci access to restart_index. */ 2912bf215546Sopenharmony_ci return info->primitive_restart && 2913bf215546Sopenharmony_ci info->restart_index == (unsigned)BITFIELD_MASK(info->index_size * 8); 2914bf215546Sopenharmony_ci} 2915bf215546Sopenharmony_ci 2916bf215546Sopenharmony_ci/* On Bifrost and older, the Renderer State Descriptor aggregates many pieces of 2917bf215546Sopenharmony_ci * 3D state. In particular, it groups the fragment shader descriptor with 2918bf215546Sopenharmony_ci * depth/stencil, blend, polygon offset, and multisampling state. These pieces 2919bf215546Sopenharmony_ci * of state are dirty tracked independently for the benefit of newer GPUs that 2920bf215546Sopenharmony_ci * separate the descriptors. FRAGMENT_RSD_DIRTY_MASK contains the list of 3D 2921bf215546Sopenharmony_ci * dirty flags that trigger re-emits of the fragment RSD. 2922bf215546Sopenharmony_ci * 2923bf215546Sopenharmony_ci * Obscurely, occlusion queries are included. Occlusion query state is nominally 2924bf215546Sopenharmony_ci * specified in the draw call descriptor, but must be considered when determing 2925bf215546Sopenharmony_ci * early-Z state which is part of the RSD. 2926bf215546Sopenharmony_ci */ 2927bf215546Sopenharmony_ci#define FRAGMENT_RSD_DIRTY_MASK ( \ 2928bf215546Sopenharmony_ci PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | \ 2929bf215546Sopenharmony_ci PAN_DIRTY_RASTERIZER | PAN_DIRTY_OQ) 2930bf215546Sopenharmony_ci 2931bf215546Sopenharmony_cistatic inline void 2932bf215546Sopenharmony_cipanfrost_update_shader_state(struct panfrost_batch *batch, 2933bf215546Sopenharmony_ci enum pipe_shader_type st) 2934bf215546Sopenharmony_ci{ 2935bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 2936bf215546Sopenharmony_ci struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st); 2937bf215546Sopenharmony_ci 2938bf215546Sopenharmony_ci bool frag = (st == PIPE_SHADER_FRAGMENT); 2939bf215546Sopenharmony_ci unsigned dirty_3d = ctx->dirty; 2940bf215546Sopenharmony_ci unsigned dirty = ctx->dirty_shader[st]; 2941bf215546Sopenharmony_ci 2942bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_STAGE_TEXTURE) { 2943bf215546Sopenharmony_ci batch->textures[st] = 2944bf215546Sopenharmony_ci panfrost_emit_texture_descriptors(batch, st); 2945bf215546Sopenharmony_ci } 2946bf215546Sopenharmony_ci 2947bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_STAGE_SAMPLER) { 2948bf215546Sopenharmony_ci batch->samplers[st] = 2949bf215546Sopenharmony_ci panfrost_emit_sampler_descriptors(batch, st); 2950bf215546Sopenharmony_ci } 2951bf215546Sopenharmony_ci 2952bf215546Sopenharmony_ci /* On Bifrost and older, the fragment shader descriptor is fused 2953bf215546Sopenharmony_ci * together with the renderer state; the combined renderer state 2954bf215546Sopenharmony_ci * descriptor is emitted below. Otherwise, the shader descriptor is 2955bf215546Sopenharmony_ci * standalone and is emitted here. 2956bf215546Sopenharmony_ci */ 2957bf215546Sopenharmony_ci if ((dirty & PAN_DIRTY_STAGE_SHADER) && !((PAN_ARCH <= 7) && frag)) { 2958bf215546Sopenharmony_ci batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st); 2959bf215546Sopenharmony_ci } 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci#if PAN_ARCH >= 9 2962bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_STAGE_IMAGE) 2963bf215546Sopenharmony_ci batch->images[st] = panfrost_emit_images(batch, st); 2964bf215546Sopenharmony_ci#endif 2965bf215546Sopenharmony_ci 2966bf215546Sopenharmony_ci if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) { 2967bf215546Sopenharmony_ci batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st, 2968bf215546Sopenharmony_ci NULL, &batch->push_uniforms[st], NULL); 2969bf215546Sopenharmony_ci } 2970bf215546Sopenharmony_ci 2971bf215546Sopenharmony_ci#if PAN_ARCH <= 7 2972bf215546Sopenharmony_ci /* On Bifrost and older, if the fragment shader changes OR any renderer 2973bf215546Sopenharmony_ci * state specified with the fragment shader, the whole renderer state 2974bf215546Sopenharmony_ci * descriptor is dirtied and must be reemited. 2975bf215546Sopenharmony_ci */ 2976bf215546Sopenharmony_ci if (frag && ((dirty & PAN_DIRTY_STAGE_SHADER) || 2977bf215546Sopenharmony_ci (dirty_3d & FRAGMENT_RSD_DIRTY_MASK))) { 2978bf215546Sopenharmony_ci 2979bf215546Sopenharmony_ci batch->rsd[st] = panfrost_emit_frag_shader_meta(batch); 2980bf215546Sopenharmony_ci } 2981bf215546Sopenharmony_ci 2982bf215546Sopenharmony_ci if (frag && (dirty & PAN_DIRTY_STAGE_IMAGE)) { 2983bf215546Sopenharmony_ci batch->attribs[st] = panfrost_emit_image_attribs(batch, 2984bf215546Sopenharmony_ci &batch->attrib_bufs[st], st); 2985bf215546Sopenharmony_ci } 2986bf215546Sopenharmony_ci#endif 2987bf215546Sopenharmony_ci} 2988bf215546Sopenharmony_ci 2989bf215546Sopenharmony_cistatic inline void 2990bf215546Sopenharmony_cipanfrost_update_state_3d(struct panfrost_batch *batch) 2991bf215546Sopenharmony_ci{ 2992bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 2993bf215546Sopenharmony_ci unsigned dirty = ctx->dirty; 2994bf215546Sopenharmony_ci 2995bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_TLS_SIZE) 2996bf215546Sopenharmony_ci panfrost_batch_adjust_stack_size(batch); 2997bf215546Sopenharmony_ci 2998bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_BLEND) 2999bf215546Sopenharmony_ci panfrost_set_batch_masks_blend(batch); 3000bf215546Sopenharmony_ci 3001bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_ZS) 3002bf215546Sopenharmony_ci panfrost_set_batch_masks_zs(batch); 3003bf215546Sopenharmony_ci 3004bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3005bf215546Sopenharmony_ci if ((dirty & (PAN_DIRTY_ZS | PAN_DIRTY_RASTERIZER)) || 3006bf215546Sopenharmony_ci (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & PAN_DIRTY_STAGE_SHADER)) 3007bf215546Sopenharmony_ci batch->depth_stencil = panfrost_emit_depth_stencil(batch); 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_BLEND) 3010bf215546Sopenharmony_ci batch->blend = panfrost_emit_blend_valhall(batch); 3011bf215546Sopenharmony_ci 3012bf215546Sopenharmony_ci if (dirty & PAN_DIRTY_VERTEX) { 3013bf215546Sopenharmony_ci batch->attribs[PIPE_SHADER_VERTEX] = 3014bf215546Sopenharmony_ci panfrost_emit_vertex_data(batch); 3015bf215546Sopenharmony_ci 3016bf215546Sopenharmony_ci batch->attrib_bufs[PIPE_SHADER_VERTEX] = 3017bf215546Sopenharmony_ci panfrost_emit_vertex_buffers(batch); 3018bf215546Sopenharmony_ci } 3019bf215546Sopenharmony_ci#endif 3020bf215546Sopenharmony_ci} 3021bf215546Sopenharmony_ci 3022bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3023bf215546Sopenharmony_cistatic mali_ptr 3024bf215546Sopenharmony_cipanfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count) 3025bf215546Sopenharmony_ci{ 3026bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(batch->ctx->base.screen); 3027bf215546Sopenharmony_ci 3028bf215546Sopenharmony_ci if (!vertex_count) 3029bf215546Sopenharmony_ci return 0; 3030bf215546Sopenharmony_ci 3031bf215546Sopenharmony_ci if (batch->tiler_ctx.bifrost) 3032bf215546Sopenharmony_ci return batch->tiler_ctx.bifrost; 3033bf215546Sopenharmony_ci 3034bf215546Sopenharmony_ci struct panfrost_ptr t = 3035bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP); 3036bf215546Sopenharmony_ci 3037bf215546Sopenharmony_ci GENX(pan_emit_tiler_heap)(dev, t.cpu); 3038bf215546Sopenharmony_ci 3039bf215546Sopenharmony_ci mali_ptr heap = t.gpu; 3040bf215546Sopenharmony_ci 3041bf215546Sopenharmony_ci t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT); 3042bf215546Sopenharmony_ci GENX(pan_emit_tiler_ctx)(dev, batch->key.width, batch->key.height, 3043bf215546Sopenharmony_ci util_framebuffer_get_num_samples(&batch->key), 3044bf215546Sopenharmony_ci pan_tristate_get(batch->first_provoking_vertex), 3045bf215546Sopenharmony_ci heap, t.cpu); 3046bf215546Sopenharmony_ci 3047bf215546Sopenharmony_ci batch->tiler_ctx.bifrost = t.gpu; 3048bf215546Sopenharmony_ci return batch->tiler_ctx.bifrost; 3049bf215546Sopenharmony_ci} 3050bf215546Sopenharmony_ci#endif 3051bf215546Sopenharmony_ci 3052bf215546Sopenharmony_ci/* Packs a primitive descriptor, mostly common between Midgard/Bifrost tiler 3053bf215546Sopenharmony_ci * jobs and Valhall IDVS jobs 3054bf215546Sopenharmony_ci */ 3055bf215546Sopenharmony_cistatic void 3056bf215546Sopenharmony_cipanfrost_emit_primitive(struct panfrost_context *ctx, 3057bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3058bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw, 3059bf215546Sopenharmony_ci mali_ptr indices, bool secondary_shader, void *out) 3060bf215546Sopenharmony_ci{ 3061bf215546Sopenharmony_ci UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; 3062bf215546Sopenharmony_ci 3063bf215546Sopenharmony_ci bool lines = (info->mode == PIPE_PRIM_LINES || 3064bf215546Sopenharmony_ci info->mode == PIPE_PRIM_LINE_LOOP || 3065bf215546Sopenharmony_ci info->mode == PIPE_PRIM_LINE_STRIP); 3066bf215546Sopenharmony_ci 3067bf215546Sopenharmony_ci pan_pack(out, PRIMITIVE, cfg) { 3068bf215546Sopenharmony_ci cfg.draw_mode = pan_draw_mode(info->mode); 3069bf215546Sopenharmony_ci if (panfrost_writes_point_size(ctx)) 3070bf215546Sopenharmony_ci cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; 3071bf215546Sopenharmony_ci 3072bf215546Sopenharmony_ci#if PAN_ARCH <= 8 3073bf215546Sopenharmony_ci /* For line primitives, PRIMITIVE.first_provoking_vertex must 3074bf215546Sopenharmony_ci * be set to true and the provoking vertex is selected with 3075bf215546Sopenharmony_ci * DRAW.flat_shading_vertex. 3076bf215546Sopenharmony_ci */ 3077bf215546Sopenharmony_ci if (lines) 3078bf215546Sopenharmony_ci cfg.first_provoking_vertex = true; 3079bf215546Sopenharmony_ci else 3080bf215546Sopenharmony_ci cfg.first_provoking_vertex = rast->flatshade_first; 3081bf215546Sopenharmony_ci 3082bf215546Sopenharmony_ci if (panfrost_is_implicit_prim_restart(info)) { 3083bf215546Sopenharmony_ci cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; 3084bf215546Sopenharmony_ci } else if (info->primitive_restart) { 3085bf215546Sopenharmony_ci cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT; 3086bf215546Sopenharmony_ci cfg.primitive_restart_index = info->restart_index; 3087bf215546Sopenharmony_ci } 3088bf215546Sopenharmony_ci 3089bf215546Sopenharmony_ci cfg.job_task_split = 6; 3090bf215546Sopenharmony_ci#else 3091bf215546Sopenharmony_ci struct panfrost_shader_state *fs = 3092bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 3093bf215546Sopenharmony_ci 3094bf215546Sopenharmony_ci cfg.allow_rotating_primitives = !(lines || fs->info.bifrost.uses_flat_shading); 3095bf215546Sopenharmony_ci cfg.primitive_restart = info->primitive_restart; 3096bf215546Sopenharmony_ci 3097bf215546Sopenharmony_ci /* Non-fixed restart indices should have been lowered */ 3098bf215546Sopenharmony_ci assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info)); 3099bf215546Sopenharmony_ci#endif 3100bf215546Sopenharmony_ci 3101bf215546Sopenharmony_ci cfg.index_count = ctx->indirect_draw ? 1 : draw->count; 3102bf215546Sopenharmony_ci cfg.index_type = panfrost_translate_index_size(info->index_size); 3103bf215546Sopenharmony_ci 3104bf215546Sopenharmony_ci 3105bf215546Sopenharmony_ci if (PAN_ARCH >= 9) { 3106bf215546Sopenharmony_ci /* Base vertex offset on Valhall is used for both 3107bf215546Sopenharmony_ci * indexed and non-indexed draws, in a simple way for 3108bf215546Sopenharmony_ci * either. Handle both cases. 3109bf215546Sopenharmony_ci */ 3110bf215546Sopenharmony_ci if (cfg.index_type) 3111bf215546Sopenharmony_ci cfg.base_vertex_offset = draw->index_bias; 3112bf215546Sopenharmony_ci else 3113bf215546Sopenharmony_ci cfg.base_vertex_offset = draw->start; 3114bf215546Sopenharmony_ci 3115bf215546Sopenharmony_ci /* Indices are moved outside the primitive descriptor 3116bf215546Sopenharmony_ci * on Valhall, so we don't need to set that here 3117bf215546Sopenharmony_ci */ 3118bf215546Sopenharmony_ci } else if (cfg.index_type) { 3119bf215546Sopenharmony_ci cfg.base_vertex_offset = draw->index_bias - ctx->offset_start; 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci#if PAN_ARCH <= 7 3122bf215546Sopenharmony_ci cfg.indices = indices; 3123bf215546Sopenharmony_ci#endif 3124bf215546Sopenharmony_ci } 3125bf215546Sopenharmony_ci 3126bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3127bf215546Sopenharmony_ci cfg.secondary_shader = secondary_shader; 3128bf215546Sopenharmony_ci#endif 3129bf215546Sopenharmony_ci } 3130bf215546Sopenharmony_ci} 3131bf215546Sopenharmony_ci 3132bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3133bf215546Sopenharmony_cistatic mali_ptr 3134bf215546Sopenharmony_cipanfrost_emit_resources(struct panfrost_batch *batch, 3135bf215546Sopenharmony_ci enum pipe_shader_type stage, 3136bf215546Sopenharmony_ci mali_ptr ubos, unsigned ubo_count) 3137bf215546Sopenharmony_ci{ 3138bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3139bf215546Sopenharmony_ci struct panfrost_ptr T; 3140bf215546Sopenharmony_ci unsigned nr_tables = 12; 3141bf215546Sopenharmony_ci 3142bf215546Sopenharmony_ci /* Although individual resources need only 16 byte alignment, the 3143bf215546Sopenharmony_ci * resource table as a whole must be 64-byte aligned. 3144bf215546Sopenharmony_ci */ 3145bf215546Sopenharmony_ci T = pan_pool_alloc_aligned(&batch->pool.base, nr_tables * pan_size(RESOURCE), 64); 3146bf215546Sopenharmony_ci memset(T.cpu, 0, nr_tables * pan_size(RESOURCE)); 3147bf215546Sopenharmony_ci 3148bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_UBO, ubos, ubo_count); 3149bf215546Sopenharmony_ci 3150bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, 3151bf215546Sopenharmony_ci batch->textures[stage], 3152bf215546Sopenharmony_ci ctx->sampler_view_count[stage]); 3153bf215546Sopenharmony_ci 3154bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, 3155bf215546Sopenharmony_ci batch->samplers[stage], 3156bf215546Sopenharmony_ci ctx->sampler_count[stage]); 3157bf215546Sopenharmony_ci 3158bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_IMAGE, 3159bf215546Sopenharmony_ci batch->images[stage], 3160bf215546Sopenharmony_ci util_last_bit(ctx->image_mask[stage])); 3161bf215546Sopenharmony_ci 3162bf215546Sopenharmony_ci if (stage == PIPE_SHADER_VERTEX) { 3163bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE, 3164bf215546Sopenharmony_ci batch->attribs[stage], 3165bf215546Sopenharmony_ci ctx->vertex->num_elements); 3166bf215546Sopenharmony_ci 3167bf215546Sopenharmony_ci panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER, 3168bf215546Sopenharmony_ci batch->attrib_bufs[stage], 3169bf215546Sopenharmony_ci util_last_bit(ctx->vb_mask)); 3170bf215546Sopenharmony_ci } 3171bf215546Sopenharmony_ci 3172bf215546Sopenharmony_ci return T.gpu | nr_tables; 3173bf215546Sopenharmony_ci} 3174bf215546Sopenharmony_ci 3175bf215546Sopenharmony_cistatic void 3176bf215546Sopenharmony_cipanfrost_emit_shader(struct panfrost_batch *batch, 3177bf215546Sopenharmony_ci struct MALI_SHADER_ENVIRONMENT *cfg, 3178bf215546Sopenharmony_ci enum pipe_shader_type stage, 3179bf215546Sopenharmony_ci mali_ptr shader_ptr, 3180bf215546Sopenharmony_ci mali_ptr thread_storage) 3181bf215546Sopenharmony_ci{ 3182bf215546Sopenharmony_ci unsigned fau_words = 0, ubo_count = 0; 3183bf215546Sopenharmony_ci mali_ptr ubos, resources; 3184bf215546Sopenharmony_ci 3185bf215546Sopenharmony_ci ubos = panfrost_emit_const_buf(batch, stage, &ubo_count, &cfg->fau, 3186bf215546Sopenharmony_ci &fau_words); 3187bf215546Sopenharmony_ci 3188bf215546Sopenharmony_ci resources = panfrost_emit_resources(batch, stage, ubos, ubo_count); 3189bf215546Sopenharmony_ci 3190bf215546Sopenharmony_ci cfg->thread_storage = thread_storage; 3191bf215546Sopenharmony_ci cfg->shader = shader_ptr; 3192bf215546Sopenharmony_ci cfg->resources = resources; 3193bf215546Sopenharmony_ci 3194bf215546Sopenharmony_ci /* Each entry of FAU is 64-bits */ 3195bf215546Sopenharmony_ci cfg->fau_count = DIV_ROUND_UP(fau_words, 2); 3196bf215546Sopenharmony_ci} 3197bf215546Sopenharmony_ci#endif 3198bf215546Sopenharmony_ci 3199bf215546Sopenharmony_cistatic void 3200bf215546Sopenharmony_cipanfrost_emit_draw(void *out, 3201bf215546Sopenharmony_ci struct panfrost_batch *batch, 3202bf215546Sopenharmony_ci bool fs_required, 3203bf215546Sopenharmony_ci enum pipe_prim_type prim, 3204bf215546Sopenharmony_ci mali_ptr pos, mali_ptr fs_vary, mali_ptr varyings) 3205bf215546Sopenharmony_ci{ 3206bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3207bf215546Sopenharmony_ci struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; 3208bf215546Sopenharmony_ci bool polygon = (prim == PIPE_PRIM_TRIANGLES); 3209bf215546Sopenharmony_ci 3210bf215546Sopenharmony_ci pan_pack(out, DRAW, cfg) { 3211bf215546Sopenharmony_ci /* 3212bf215546Sopenharmony_ci * From the Gallium documentation, 3213bf215546Sopenharmony_ci * pipe_rasterizer_state::cull_face "indicates which faces of 3214bf215546Sopenharmony_ci * polygons to cull". Points and lines are not considered 3215bf215546Sopenharmony_ci * polygons and should be drawn even if all faces are culled. 3216bf215546Sopenharmony_ci * The hardware does not take primitive type into account when 3217bf215546Sopenharmony_ci * culling, so we need to do that check ourselves. 3218bf215546Sopenharmony_ci */ 3219bf215546Sopenharmony_ci cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT); 3220bf215546Sopenharmony_ci cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK); 3221bf215546Sopenharmony_ci cfg.front_face_ccw = rast->front_ccw; 3222bf215546Sopenharmony_ci 3223bf215546Sopenharmony_ci if (ctx->occlusion_query && ctx->active_queries) { 3224bf215546Sopenharmony_ci if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER) 3225bf215546Sopenharmony_ci cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER; 3226bf215546Sopenharmony_ci else 3227bf215546Sopenharmony_ci cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE; 3228bf215546Sopenharmony_ci 3229bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc); 3230bf215546Sopenharmony_ci cfg.occlusion = rsrc->image.data.bo->ptr.gpu; 3231bf215546Sopenharmony_ci panfrost_batch_write_rsrc(ctx->batch, rsrc, 3232bf215546Sopenharmony_ci PIPE_SHADER_FRAGMENT); 3233bf215546Sopenharmony_ci } 3234bf215546Sopenharmony_ci 3235bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3236bf215546Sopenharmony_ci struct panfrost_shader_state *fs = 3237bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_ci cfg.multisample_enable = rast->multisample; 3240bf215546Sopenharmony_ci cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF; 3241bf215546Sopenharmony_ci 3242bf215546Sopenharmony_ci /* Use per-sample shading if required by API Also use it when a 3243bf215546Sopenharmony_ci * blend shader is used with multisampling, as this is handled 3244bf215546Sopenharmony_ci * by a single ST_TILE in the blend shader with the current 3245bf215546Sopenharmony_ci * sample ID, requiring per-sample shading. 3246bf215546Sopenharmony_ci */ 3247bf215546Sopenharmony_ci cfg.evaluate_per_sample = 3248bf215546Sopenharmony_ci (rast->multisample && 3249bf215546Sopenharmony_ci ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader)); 3250bf215546Sopenharmony_ci 3251bf215546Sopenharmony_ci cfg.single_sampled_lines = !rast->multisample; 3252bf215546Sopenharmony_ci 3253bf215546Sopenharmony_ci cfg.vertex_array.packet = true; 3254bf215546Sopenharmony_ci 3255bf215546Sopenharmony_ci cfg.minimum_z = batch->minimum_z; 3256bf215546Sopenharmony_ci cfg.maximum_z = batch->maximum_z; 3257bf215546Sopenharmony_ci 3258bf215546Sopenharmony_ci cfg.depth_stencil = batch->depth_stencil; 3259bf215546Sopenharmony_ci 3260bf215546Sopenharmony_ci if (fs_required) { 3261bf215546Sopenharmony_ci bool has_oq = ctx->occlusion_query && ctx->active_queries; 3262bf215546Sopenharmony_ci 3263bf215546Sopenharmony_ci struct pan_earlyzs_state earlyzs = 3264bf215546Sopenharmony_ci pan_earlyzs_get(fs->earlyzs, 3265bf215546Sopenharmony_ci ctx->depth_stencil->writes_zs || has_oq, 3266bf215546Sopenharmony_ci ctx->blend->base.alpha_to_coverage, 3267bf215546Sopenharmony_ci ctx->depth_stencil->zs_always_passes); 3268bf215546Sopenharmony_ci 3269bf215546Sopenharmony_ci cfg.pixel_kill_operation = earlyzs.kill; 3270bf215546Sopenharmony_ci cfg.zs_update_operation = earlyzs.update; 3271bf215546Sopenharmony_ci 3272bf215546Sopenharmony_ci cfg.allow_forward_pixel_to_kill = pan_allow_forward_pixel_to_kill(ctx, fs); 3273bf215546Sopenharmony_ci cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global; 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_ci /* Mask of render targets that may be written. A render 3276bf215546Sopenharmony_ci * target may be written if the fragment shader writes 3277bf215546Sopenharmony_ci * to it AND it actually exists. If the render target 3278bf215546Sopenharmony_ci * doesn't actually exist, the blend descriptor will be 3279bf215546Sopenharmony_ci * OFF so it may be omitted from the mask. 3280bf215546Sopenharmony_ci * 3281bf215546Sopenharmony_ci * Only set when there is a fragment shader, since 3282bf215546Sopenharmony_ci * otherwise no colour updates are possible. 3283bf215546Sopenharmony_ci */ 3284bf215546Sopenharmony_ci cfg.render_target_mask = 3285bf215546Sopenharmony_ci (fs->info.outputs_written >> FRAG_RESULT_DATA0) & 3286bf215546Sopenharmony_ci ctx->fb_rt_mask; 3287bf215546Sopenharmony_ci 3288bf215546Sopenharmony_ci /* Also use per-sample shading if required by the shader 3289bf215546Sopenharmony_ci */ 3290bf215546Sopenharmony_ci cfg.evaluate_per_sample |= fs->info.fs.sample_shading; 3291bf215546Sopenharmony_ci 3292bf215546Sopenharmony_ci /* Unlike Bifrost, alpha-to-coverage must be included in 3293bf215546Sopenharmony_ci * this identically-named flag. Confusing, isn't it? 3294bf215546Sopenharmony_ci */ 3295bf215546Sopenharmony_ci cfg.shader_modifies_coverage = fs->info.fs.writes_coverage || 3296bf215546Sopenharmony_ci fs->info.fs.can_discard || 3297bf215546Sopenharmony_ci ctx->blend->base.alpha_to_coverage; 3298bf215546Sopenharmony_ci 3299bf215546Sopenharmony_ci /* Blend descriptors are only accessed by a BLEND 3300bf215546Sopenharmony_ci * instruction on Valhall. It follows that if the 3301bf215546Sopenharmony_ci * fragment shader is omitted, we may also emit the 3302bf215546Sopenharmony_ci * blend descriptors. 3303bf215546Sopenharmony_ci */ 3304bf215546Sopenharmony_ci cfg.blend = batch->blend; 3305bf215546Sopenharmony_ci cfg.blend_count = MAX2(batch->key.nr_cbufs, 1); 3306bf215546Sopenharmony_ci cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage; 3307bf215546Sopenharmony_ci 3308bf215546Sopenharmony_ci cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0); 3309bf215546Sopenharmony_ci cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1); 3310bf215546Sopenharmony_ci 3311bf215546Sopenharmony_ci panfrost_emit_shader(batch, &cfg.shader, PIPE_SHADER_FRAGMENT, 3312bf215546Sopenharmony_ci batch->rsd[PIPE_SHADER_FRAGMENT], 3313bf215546Sopenharmony_ci batch->tls.gpu); 3314bf215546Sopenharmony_ci } else { 3315bf215546Sopenharmony_ci /* These operations need to be FORCE to benefit from the 3316bf215546Sopenharmony_ci * depth-only pass optimizations. 3317bf215546Sopenharmony_ci */ 3318bf215546Sopenharmony_ci cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY; 3319bf215546Sopenharmony_ci cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY; 3320bf215546Sopenharmony_ci 3321bf215546Sopenharmony_ci /* No shader and no blend => no shader or blend 3322bf215546Sopenharmony_ci * reasons to disable FPK. The only FPK-related state 3323bf215546Sopenharmony_ci * not covered is alpha-to-coverage which we don't set 3324bf215546Sopenharmony_ci * without blend. 3325bf215546Sopenharmony_ci */ 3326bf215546Sopenharmony_ci cfg.allow_forward_pixel_to_kill = true; 3327bf215546Sopenharmony_ci 3328bf215546Sopenharmony_ci /* No shader => no shader side effects */ 3329bf215546Sopenharmony_ci cfg.allow_forward_pixel_to_be_killed = true; 3330bf215546Sopenharmony_ci 3331bf215546Sopenharmony_ci /* Alpha isn't written so these are vacuous */ 3332bf215546Sopenharmony_ci cfg.overdraw_alpha0 = true; 3333bf215546Sopenharmony_ci cfg.overdraw_alpha1 = true; 3334bf215546Sopenharmony_ci } 3335bf215546Sopenharmony_ci#else 3336bf215546Sopenharmony_ci cfg.position = pos; 3337bf215546Sopenharmony_ci cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT]; 3338bf215546Sopenharmony_ci cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT]; 3339bf215546Sopenharmony_ci cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT]; 3340bf215546Sopenharmony_ci cfg.viewport = batch->viewport; 3341bf215546Sopenharmony_ci cfg.varyings = fs_vary; 3342bf215546Sopenharmony_ci cfg.varying_buffers = fs_vary ? varyings : 0; 3343bf215546Sopenharmony_ci cfg.thread_storage = batch->tls.gpu; 3344bf215546Sopenharmony_ci 3345bf215546Sopenharmony_ci /* For all primitives but lines DRAW.flat_shading_vertex must 3346bf215546Sopenharmony_ci * be set to 0 and the provoking vertex is selected with the 3347bf215546Sopenharmony_ci * PRIMITIVE.first_provoking_vertex field. 3348bf215546Sopenharmony_ci */ 3349bf215546Sopenharmony_ci if (prim == PIPE_PRIM_LINES) { 3350bf215546Sopenharmony_ci /* The logic is inverted across arches. */ 3351bf215546Sopenharmony_ci cfg.flat_shading_vertex = rast->flatshade_first 3352bf215546Sopenharmony_ci ^ (PAN_ARCH <= 5); 3353bf215546Sopenharmony_ci } 3354bf215546Sopenharmony_ci 3355bf215546Sopenharmony_ci pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT); 3356bf215546Sopenharmony_ci#endif 3357bf215546Sopenharmony_ci } 3358bf215546Sopenharmony_ci} 3359bf215546Sopenharmony_ci 3360bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3361bf215546Sopenharmony_cistatic void 3362bf215546Sopenharmony_cipanfrost_emit_malloc_vertex(struct panfrost_batch *batch, 3363bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3364bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw, 3365bf215546Sopenharmony_ci mali_ptr indices, bool secondary_shader, 3366bf215546Sopenharmony_ci void *job) 3367bf215546Sopenharmony_ci{ 3368bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3369bf215546Sopenharmony_ci 3370bf215546Sopenharmony_ci struct panfrost_shader_state *vs = 3371bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 3372bf215546Sopenharmony_ci 3373bf215546Sopenharmony_ci struct panfrost_shader_state *fs = 3374bf215546Sopenharmony_ci panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); 3375bf215546Sopenharmony_ci 3376bf215546Sopenharmony_ci bool fs_required = panfrost_fs_required(fs, ctx->blend, 3377bf215546Sopenharmony_ci &ctx->pipe_framebuffer, 3378bf215546Sopenharmony_ci ctx->depth_stencil); 3379bf215546Sopenharmony_ci 3380bf215546Sopenharmony_ci /* Varying shaders only feed data to the fragment shader, so if we omit 3381bf215546Sopenharmony_ci * the fragment shader, we should omit the varying shader too. 3382bf215546Sopenharmony_ci */ 3383bf215546Sopenharmony_ci secondary_shader &= fs_required; 3384bf215546Sopenharmony_ci 3385bf215546Sopenharmony_ci panfrost_emit_primitive(ctx, info, draw, 0, secondary_shader, 3386bf215546Sopenharmony_ci pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE)); 3387bf215546Sopenharmony_ci 3388bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) { 3389bf215546Sopenharmony_ci cfg.count = info->instance_count; 3390bf215546Sopenharmony_ci } 3391bf215546Sopenharmony_ci 3392bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) { 3393bf215546Sopenharmony_ci if (secondary_shader) { 3394bf215546Sopenharmony_ci unsigned v = vs->info.varyings.output_count; 3395bf215546Sopenharmony_ci unsigned f = fs->info.varyings.input_count; 3396bf215546Sopenharmony_ci unsigned slots = MAX2(v, f); 3397bf215546Sopenharmony_ci slots += util_bitcount(fs->key.fixed_varying_mask); 3398bf215546Sopenharmony_ci unsigned size = slots * 16; 3399bf215546Sopenharmony_ci 3400bf215546Sopenharmony_ci /* Assumes 16 byte slots. We could do better. */ 3401bf215546Sopenharmony_ci cfg.vertex_packet_stride = size + 16; 3402bf215546Sopenharmony_ci cfg.vertex_attribute_stride = size; 3403bf215546Sopenharmony_ci } else { 3404bf215546Sopenharmony_ci /* Hardware requirement for "no varyings" */ 3405bf215546Sopenharmony_ci cfg.vertex_packet_stride = 16; 3406bf215546Sopenharmony_ci cfg.vertex_attribute_stride = 0; 3407bf215546Sopenharmony_ci } 3408bf215546Sopenharmony_ci } 3409bf215546Sopenharmony_ci 3410bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) { 3411bf215546Sopenharmony_ci cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0); 3412bf215546Sopenharmony_ci } 3413bf215546Sopenharmony_ci 3414bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR)); 3415bf215546Sopenharmony_ci memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR), 3416bf215546Sopenharmony_ci &batch->scissor, pan_size(SCISSOR)); 3417bf215546Sopenharmony_ci 3418bf215546Sopenharmony_ci panfrost_emit_primitive_size(ctx, info->mode == PIPE_PRIM_POINTS, 0, 3419bf215546Sopenharmony_ci pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE)); 3420bf215546Sopenharmony_ci 3421bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) { 3422bf215546Sopenharmony_ci cfg.address = indices; 3423bf215546Sopenharmony_ci } 3424bf215546Sopenharmony_ci 3425bf215546Sopenharmony_ci panfrost_emit_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), 3426bf215546Sopenharmony_ci batch, fs_required, u_reduced_prim(info->mode), 0, 0, 0); 3427bf215546Sopenharmony_ci 3428bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) { 3429bf215546Sopenharmony_ci /* IDVS/points vertex shader */ 3430bf215546Sopenharmony_ci mali_ptr vs_ptr = batch->rsd[PIPE_SHADER_VERTEX]; 3431bf215546Sopenharmony_ci 3432bf215546Sopenharmony_ci /* IDVS/triangle vertex shader */ 3433bf215546Sopenharmony_ci if (vs_ptr && info->mode != PIPE_PRIM_POINTS) 3434bf215546Sopenharmony_ci vs_ptr += pan_size(SHADER_PROGRAM); 3435bf215546Sopenharmony_ci 3436bf215546Sopenharmony_ci panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, vs_ptr, 3437bf215546Sopenharmony_ci batch->tls.gpu); 3438bf215546Sopenharmony_ci } 3439bf215546Sopenharmony_ci 3440bf215546Sopenharmony_ci pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) { 3441bf215546Sopenharmony_ci /* If a varying shader is used, we configure it with the same 3442bf215546Sopenharmony_ci * state as the position shader for backwards compatible 3443bf215546Sopenharmony_ci * behaviour with Bifrost. This could be optimized. 3444bf215546Sopenharmony_ci */ 3445bf215546Sopenharmony_ci if (!secondary_shader) continue; 3446bf215546Sopenharmony_ci 3447bf215546Sopenharmony_ci mali_ptr ptr = batch->rsd[PIPE_SHADER_VERTEX] + 3448bf215546Sopenharmony_ci (2 * pan_size(SHADER_PROGRAM)); 3449bf215546Sopenharmony_ci 3450bf215546Sopenharmony_ci panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, 3451bf215546Sopenharmony_ci ptr, batch->tls.gpu); 3452bf215546Sopenharmony_ci } 3453bf215546Sopenharmony_ci} 3454bf215546Sopenharmony_ci#endif 3455bf215546Sopenharmony_ci 3456bf215546Sopenharmony_ci#if PAN_ARCH <= 7 3457bf215546Sopenharmony_cistatic void 3458bf215546Sopenharmony_cipanfrost_draw_emit_tiler(struct panfrost_batch *batch, 3459bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3460bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw, 3461bf215546Sopenharmony_ci void *invocation_template, 3462bf215546Sopenharmony_ci mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings, 3463bf215546Sopenharmony_ci mali_ptr pos, mali_ptr psiz, bool secondary_shader, 3464bf215546Sopenharmony_ci void *job) 3465bf215546Sopenharmony_ci{ 3466bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3467bf215546Sopenharmony_ci 3468bf215546Sopenharmony_ci void *section = pan_section_ptr(job, TILER_JOB, INVOCATION); 3469bf215546Sopenharmony_ci memcpy(section, invocation_template, pan_size(INVOCATION)); 3470bf215546Sopenharmony_ci 3471bf215546Sopenharmony_ci panfrost_emit_primitive(ctx, info, draw, indices, secondary_shader, 3472bf215546Sopenharmony_ci pan_section_ptr(job, TILER_JOB, PRIMITIVE)); 3473bf215546Sopenharmony_ci 3474bf215546Sopenharmony_ci void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); 3475bf215546Sopenharmony_ci enum pipe_prim_type prim = u_reduced_prim(info->mode); 3476bf215546Sopenharmony_ci 3477bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3478bf215546Sopenharmony_ci pan_section_pack(job, TILER_JOB, TILER, cfg) { 3479bf215546Sopenharmony_ci cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0); 3480bf215546Sopenharmony_ci } 3481bf215546Sopenharmony_ci 3482bf215546Sopenharmony_ci pan_section_pack(job, TILER_JOB, PADDING, cfg); 3483bf215546Sopenharmony_ci#endif 3484bf215546Sopenharmony_ci 3485bf215546Sopenharmony_ci panfrost_emit_draw(pan_section_ptr(job, TILER_JOB, DRAW), 3486bf215546Sopenharmony_ci batch, true, prim, pos, fs_vary, varyings); 3487bf215546Sopenharmony_ci 3488bf215546Sopenharmony_ci panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size); 3489bf215546Sopenharmony_ci} 3490bf215546Sopenharmony_ci#endif 3491bf215546Sopenharmony_ci 3492bf215546Sopenharmony_cistatic void 3493bf215546Sopenharmony_cipanfrost_launch_xfb(struct panfrost_batch *batch, 3494bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3495bf215546Sopenharmony_ci mali_ptr attribs, mali_ptr attrib_bufs, 3496bf215546Sopenharmony_ci unsigned count) 3497bf215546Sopenharmony_ci{ 3498bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3499bf215546Sopenharmony_ci 3500bf215546Sopenharmony_ci struct panfrost_ptr t = 3501bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); 3502bf215546Sopenharmony_ci 3503bf215546Sopenharmony_ci /* Nothing to do */ 3504bf215546Sopenharmony_ci if (batch->ctx->streamout.num_targets == 0) 3505bf215546Sopenharmony_ci return; 3506bf215546Sopenharmony_ci 3507bf215546Sopenharmony_ci /* TODO: XFB with index buffers */ 3508bf215546Sopenharmony_ci //assert(info->index_size == 0); 3509bf215546Sopenharmony_ci u_trim_pipe_prim(info->mode, &count); 3510bf215546Sopenharmony_ci 3511bf215546Sopenharmony_ci if (count == 0) 3512bf215546Sopenharmony_ci return; 3513bf215546Sopenharmony_ci 3514bf215546Sopenharmony_ci struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 3515bf215546Sopenharmony_ci struct panfrost_shader_variants v = { .variants = vs->xfb }; 3516bf215546Sopenharmony_ci 3517bf215546Sopenharmony_ci vs->xfb->stream_output = vs->stream_output; 3518bf215546Sopenharmony_ci 3519bf215546Sopenharmony_ci struct panfrost_shader_variants *saved_vs = ctx->shader[PIPE_SHADER_VERTEX]; 3520bf215546Sopenharmony_ci mali_ptr saved_rsd = batch->rsd[PIPE_SHADER_VERTEX]; 3521bf215546Sopenharmony_ci mali_ptr saved_ubo = batch->uniform_buffers[PIPE_SHADER_VERTEX]; 3522bf215546Sopenharmony_ci mali_ptr saved_push = batch->push_uniforms[PIPE_SHADER_VERTEX]; 3523bf215546Sopenharmony_ci 3524bf215546Sopenharmony_ci ctx->shader[PIPE_SHADER_VERTEX] = &v; 3525bf215546Sopenharmony_ci batch->rsd[PIPE_SHADER_VERTEX] = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX); 3526bf215546Sopenharmony_ci 3527bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3528bf215546Sopenharmony_ci pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) { 3529bf215546Sopenharmony_ci cfg.workgroup_size_x = 1; 3530bf215546Sopenharmony_ci cfg.workgroup_size_y = 1; 3531bf215546Sopenharmony_ci cfg.workgroup_size_z = 1; 3532bf215546Sopenharmony_ci 3533bf215546Sopenharmony_ci cfg.workgroup_count_x = count; 3534bf215546Sopenharmony_ci cfg.workgroup_count_y = info->instance_count; 3535bf215546Sopenharmony_ci cfg.workgroup_count_z = 1; 3536bf215546Sopenharmony_ci 3537bf215546Sopenharmony_ci panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_VERTEX, 3538bf215546Sopenharmony_ci batch->rsd[PIPE_SHADER_VERTEX], 3539bf215546Sopenharmony_ci batch->tls.gpu); 3540bf215546Sopenharmony_ci 3541bf215546Sopenharmony_ci /* TODO: Indexing. Also, this is a legacy feature... */ 3542bf215546Sopenharmony_ci cfg.compute.attribute_offset = batch->ctx->offset_start; 3543bf215546Sopenharmony_ci 3544bf215546Sopenharmony_ci /* Transform feedback shaders do not use barriers or shared 3545bf215546Sopenharmony_ci * memory, so we may merge workgroups. 3546bf215546Sopenharmony_ci */ 3547bf215546Sopenharmony_ci cfg.allow_merging_workgroups = true; 3548bf215546Sopenharmony_ci cfg.task_increment = 1; 3549bf215546Sopenharmony_ci cfg.task_axis = MALI_TASK_AXIS_Z; 3550bf215546Sopenharmony_ci } 3551bf215546Sopenharmony_ci#else 3552bf215546Sopenharmony_ci struct mali_invocation_packed invocation; 3553bf215546Sopenharmony_ci 3554bf215546Sopenharmony_ci panfrost_pack_work_groups_compute(&invocation, 3555bf215546Sopenharmony_ci 1, count, info->instance_count, 3556bf215546Sopenharmony_ci 1, 1, 1, PAN_ARCH <= 5, false); 3557bf215546Sopenharmony_ci 3558bf215546Sopenharmony_ci batch->uniform_buffers[PIPE_SHADER_VERTEX] = 3559bf215546Sopenharmony_ci panfrost_emit_const_buf(batch, PIPE_SHADER_VERTEX, NULL, 3560bf215546Sopenharmony_ci &batch->push_uniforms[PIPE_SHADER_VERTEX], NULL); 3561bf215546Sopenharmony_ci 3562bf215546Sopenharmony_ci panfrost_draw_emit_vertex(batch, info, &invocation, 0, 0, 3563bf215546Sopenharmony_ci attribs, attrib_bufs, t.cpu); 3564bf215546Sopenharmony_ci#endif 3565bf215546Sopenharmony_ci enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE; 3566bf215546Sopenharmony_ci#if PAN_ARCH <= 5 3567bf215546Sopenharmony_ci job_type = MALI_JOB_TYPE_VERTEX; 3568bf215546Sopenharmony_ci#endif 3569bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, job_type, 3570bf215546Sopenharmony_ci true, false, 0, 0, &t, false); 3571bf215546Sopenharmony_ci 3572bf215546Sopenharmony_ci ctx->shader[PIPE_SHADER_VERTEX] = saved_vs; 3573bf215546Sopenharmony_ci batch->rsd[PIPE_SHADER_VERTEX] = saved_rsd; 3574bf215546Sopenharmony_ci batch->uniform_buffers[PIPE_SHADER_VERTEX] = saved_ubo; 3575bf215546Sopenharmony_ci batch->push_uniforms[PIPE_SHADER_VERTEX] = saved_push; 3576bf215546Sopenharmony_ci} 3577bf215546Sopenharmony_ci 3578bf215546Sopenharmony_cistatic void 3579bf215546Sopenharmony_cipanfrost_direct_draw(struct panfrost_batch *batch, 3580bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3581bf215546Sopenharmony_ci unsigned drawid_offset, 3582bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) 3583bf215546Sopenharmony_ci{ 3584bf215546Sopenharmony_ci if (!draw->count || !info->instance_count) 3585bf215546Sopenharmony_ci return; 3586bf215546Sopenharmony_ci 3587bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3588bf215546Sopenharmony_ci 3589bf215546Sopenharmony_ci /* If we change whether we're drawing points, or whether point sprites 3590bf215546Sopenharmony_ci * are enabled (specified in the rasterizer), we may need to rebind 3591bf215546Sopenharmony_ci * shaders accordingly. This implicitly covers the case of rebinding 3592bf215546Sopenharmony_ci * framebuffers, because all dirty flags are set there. 3593bf215546Sopenharmony_ci */ 3594bf215546Sopenharmony_ci if ((ctx->dirty & PAN_DIRTY_RASTERIZER) || 3595bf215546Sopenharmony_ci ((ctx->active_prim == PIPE_PRIM_POINTS) ^ 3596bf215546Sopenharmony_ci (info->mode == PIPE_PRIM_POINTS))) { 3597bf215546Sopenharmony_ci 3598bf215546Sopenharmony_ci ctx->active_prim = info->mode; 3599bf215546Sopenharmony_ci panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT); 3600bf215546Sopenharmony_ci } 3601bf215546Sopenharmony_ci 3602bf215546Sopenharmony_ci /* Take into account a negative bias */ 3603bf215546Sopenharmony_ci ctx->indirect_draw = false; 3604bf215546Sopenharmony_ci ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0); 3605bf215546Sopenharmony_ci ctx->instance_count = info->instance_count; 3606bf215546Sopenharmony_ci ctx->base_vertex = info->index_size ? draw->index_bias : 0; 3607bf215546Sopenharmony_ci ctx->base_instance = info->start_instance; 3608bf215546Sopenharmony_ci ctx->active_prim = info->mode; 3609bf215546Sopenharmony_ci ctx->drawid = drawid_offset; 3610bf215546Sopenharmony_ci 3611bf215546Sopenharmony_ci struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 3612bf215546Sopenharmony_ci 3613bf215546Sopenharmony_ci bool idvs = vs->info.vs.idvs; 3614bf215546Sopenharmony_ci bool secondary_shader = vs->info.vs.secondary_enable; 3615bf215546Sopenharmony_ci 3616bf215546Sopenharmony_ci UNUSED struct panfrost_ptr tiler, vertex; 3617bf215546Sopenharmony_ci 3618bf215546Sopenharmony_ci if (idvs) { 3619bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3620bf215546Sopenharmony_ci tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB); 3621bf215546Sopenharmony_ci#elif PAN_ARCH >= 6 3622bf215546Sopenharmony_ci tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB); 3623bf215546Sopenharmony_ci#else 3624bf215546Sopenharmony_ci unreachable("IDVS is unsupported on Midgard"); 3625bf215546Sopenharmony_ci#endif 3626bf215546Sopenharmony_ci } else { 3627bf215546Sopenharmony_ci vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); 3628bf215546Sopenharmony_ci tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB); 3629bf215546Sopenharmony_ci } 3630bf215546Sopenharmony_ci 3631bf215546Sopenharmony_ci unsigned vertex_count = ctx->vertex_count; 3632bf215546Sopenharmony_ci 3633bf215546Sopenharmony_ci unsigned min_index = 0, max_index = 0; 3634bf215546Sopenharmony_ci mali_ptr indices = 0; 3635bf215546Sopenharmony_ci 3636bf215546Sopenharmony_ci if (info->index_size && PAN_ARCH >= 9) { 3637bf215546Sopenharmony_ci indices = panfrost_get_index_buffer(batch, info, draw); 3638bf215546Sopenharmony_ci } else if (info->index_size) { 3639bf215546Sopenharmony_ci indices = panfrost_get_index_buffer_bounded(batch, info, draw, 3640bf215546Sopenharmony_ci &min_index, 3641bf215546Sopenharmony_ci &max_index); 3642bf215546Sopenharmony_ci 3643bf215546Sopenharmony_ci /* Use the corresponding values */ 3644bf215546Sopenharmony_ci vertex_count = max_index - min_index + 1; 3645bf215546Sopenharmony_ci ctx->offset_start = min_index + draw->index_bias; 3646bf215546Sopenharmony_ci } else { 3647bf215546Sopenharmony_ci ctx->offset_start = draw->start; 3648bf215546Sopenharmony_ci } 3649bf215546Sopenharmony_ci 3650bf215546Sopenharmony_ci if (info->instance_count > 1) { 3651bf215546Sopenharmony_ci unsigned count = vertex_count; 3652bf215546Sopenharmony_ci 3653bf215546Sopenharmony_ci /* Index-Driven Vertex Shading requires different instances to 3654bf215546Sopenharmony_ci * have different cache lines for position results. Each vertex 3655bf215546Sopenharmony_ci * position is 16 bytes and the Mali cache line is 64 bytes, so 3656bf215546Sopenharmony_ci * the instance count must be aligned to 4 vertices. 3657bf215546Sopenharmony_ci */ 3658bf215546Sopenharmony_ci if (idvs) 3659bf215546Sopenharmony_ci count = ALIGN_POT(count, 4); 3660bf215546Sopenharmony_ci 3661bf215546Sopenharmony_ci ctx->padded_count = panfrost_padded_vertex_count(count); 3662bf215546Sopenharmony_ci } else 3663bf215546Sopenharmony_ci ctx->padded_count = vertex_count; 3664bf215546Sopenharmony_ci 3665bf215546Sopenharmony_ci panfrost_statistics_record(ctx, info, draw); 3666bf215546Sopenharmony_ci 3667bf215546Sopenharmony_ci#if PAN_ARCH <= 7 3668bf215546Sopenharmony_ci struct mali_invocation_packed invocation; 3669bf215546Sopenharmony_ci if (info->instance_count > 1) { 3670bf215546Sopenharmony_ci panfrost_pack_work_groups_compute(&invocation, 3671bf215546Sopenharmony_ci 1, vertex_count, info->instance_count, 3672bf215546Sopenharmony_ci 1, 1, 1, true, false); 3673bf215546Sopenharmony_ci } else { 3674bf215546Sopenharmony_ci pan_pack(&invocation, INVOCATION, cfg) { 3675bf215546Sopenharmony_ci cfg.invocations = MALI_POSITIVE(vertex_count); 3676bf215546Sopenharmony_ci cfg.size_y_shift = 0; 3677bf215546Sopenharmony_ci cfg.size_z_shift = 0; 3678bf215546Sopenharmony_ci cfg.workgroups_x_shift = 0; 3679bf215546Sopenharmony_ci cfg.workgroups_y_shift = 0; 3680bf215546Sopenharmony_ci cfg.workgroups_z_shift = 32; 3681bf215546Sopenharmony_ci cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT; 3682bf215546Sopenharmony_ci } 3683bf215546Sopenharmony_ci } 3684bf215546Sopenharmony_ci 3685bf215546Sopenharmony_ci /* Emit all sort of descriptors. */ 3686bf215546Sopenharmony_ci mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0; 3687bf215546Sopenharmony_ci 3688bf215546Sopenharmony_ci panfrost_emit_varying_descriptor(batch, 3689bf215546Sopenharmony_ci ctx->padded_count * 3690bf215546Sopenharmony_ci ctx->instance_count, 3691bf215546Sopenharmony_ci &vs_vary, &fs_vary, &varyings, 3692bf215546Sopenharmony_ci NULL, &pos, &psiz, 3693bf215546Sopenharmony_ci info->mode == PIPE_PRIM_POINTS); 3694bf215546Sopenharmony_ci 3695bf215546Sopenharmony_ci mali_ptr attribs, attrib_bufs; 3696bf215546Sopenharmony_ci attribs = panfrost_emit_vertex_data(batch, &attrib_bufs); 3697bf215546Sopenharmony_ci#endif 3698bf215546Sopenharmony_ci 3699bf215546Sopenharmony_ci panfrost_update_state_3d(batch); 3700bf215546Sopenharmony_ci panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX); 3701bf215546Sopenharmony_ci panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT); 3702bf215546Sopenharmony_ci panfrost_clean_state_3d(ctx); 3703bf215546Sopenharmony_ci 3704bf215546Sopenharmony_ci if (vs->xfb) { 3705bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3706bf215546Sopenharmony_ci mali_ptr attribs = 0, attrib_bufs = 0; 3707bf215546Sopenharmony_ci#endif 3708bf215546Sopenharmony_ci panfrost_launch_xfb(batch, info, attribs, attrib_bufs, draw->count); 3709bf215546Sopenharmony_ci } 3710bf215546Sopenharmony_ci 3711bf215546Sopenharmony_ci /* Increment transform feedback offsets */ 3712bf215546Sopenharmony_ci panfrost_update_streamout_offsets(ctx); 3713bf215546Sopenharmony_ci 3714bf215546Sopenharmony_ci /* Any side effects must be handled by the XFB shader, so we only need 3715bf215546Sopenharmony_ci * to run vertex shaders if we need rasterization. 3716bf215546Sopenharmony_ci */ 3717bf215546Sopenharmony_ci if (panfrost_batch_skip_rasterization(batch)) 3718bf215546Sopenharmony_ci return; 3719bf215546Sopenharmony_ci 3720bf215546Sopenharmony_ci#if PAN_ARCH >= 9 3721bf215546Sopenharmony_ci assert(idvs && "Memory allocated IDVS required on Valhall"); 3722bf215546Sopenharmony_ci 3723bf215546Sopenharmony_ci panfrost_emit_malloc_vertex(batch, info, draw, indices, 3724bf215546Sopenharmony_ci secondary_shader, tiler.cpu); 3725bf215546Sopenharmony_ci 3726bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, 3727bf215546Sopenharmony_ci MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0, 3728bf215546Sopenharmony_ci 0, &tiler, false); 3729bf215546Sopenharmony_ci#else 3730bf215546Sopenharmony_ci /* Fire off the draw itself */ 3731bf215546Sopenharmony_ci panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices, 3732bf215546Sopenharmony_ci fs_vary, varyings, pos, psiz, secondary_shader, 3733bf215546Sopenharmony_ci tiler.cpu); 3734bf215546Sopenharmony_ci if (idvs) { 3735bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3736bf215546Sopenharmony_ci panfrost_draw_emit_vertex_section(batch, 3737bf215546Sopenharmony_ci vs_vary, varyings, 3738bf215546Sopenharmony_ci attribs, attrib_bufs, 3739bf215546Sopenharmony_ci pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW)); 3740bf215546Sopenharmony_ci 3741bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, 3742bf215546Sopenharmony_ci MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 3743bf215546Sopenharmony_ci 0, 0, &tiler, false); 3744bf215546Sopenharmony_ci#endif 3745bf215546Sopenharmony_ci } else { 3746bf215546Sopenharmony_ci panfrost_draw_emit_vertex(batch, info, &invocation, 3747bf215546Sopenharmony_ci vs_vary, varyings, attribs, attrib_bufs, vertex.cpu); 3748bf215546Sopenharmony_ci panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler); 3749bf215546Sopenharmony_ci } 3750bf215546Sopenharmony_ci#endif 3751bf215546Sopenharmony_ci} 3752bf215546Sopenharmony_ci 3753bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS 3754bf215546Sopenharmony_cistatic void 3755bf215546Sopenharmony_cipanfrost_indirect_draw(struct panfrost_batch *batch, 3756bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3757bf215546Sopenharmony_ci unsigned drawid_offset, 3758bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 3759bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) 3760bf215546Sopenharmony_ci{ 3761bf215546Sopenharmony_ci /* Indirect draw count and multi-draw not supported. */ 3762bf215546Sopenharmony_ci assert(indirect->draw_count == 1 && !indirect->indirect_draw_count); 3763bf215546Sopenharmony_ci 3764bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3765bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(ctx->base.screen); 3766bf215546Sopenharmony_ci 3767bf215546Sopenharmony_ci /* TODO: update statistics (see panfrost_statistics_record()) */ 3768bf215546Sopenharmony_ci /* TODO: Increment transform feedback offsets */ 3769bf215546Sopenharmony_ci assert(ctx->streamout.num_targets == 0); 3770bf215546Sopenharmony_ci 3771bf215546Sopenharmony_ci ctx->active_prim = info->mode; 3772bf215546Sopenharmony_ci ctx->drawid = drawid_offset; 3773bf215546Sopenharmony_ci ctx->indirect_draw = true; 3774bf215546Sopenharmony_ci 3775bf215546Sopenharmony_ci struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); 3776bf215546Sopenharmony_ci 3777bf215546Sopenharmony_ci bool idvs = vs->info.vs.idvs; 3778bf215546Sopenharmony_ci bool secondary_shader = vs->info.vs.secondary_enable; 3779bf215546Sopenharmony_ci 3780bf215546Sopenharmony_ci struct panfrost_ptr tiler = { 0 }, vertex = { 0 }; 3781bf215546Sopenharmony_ci 3782bf215546Sopenharmony_ci if (idvs) { 3783bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3784bf215546Sopenharmony_ci tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB); 3785bf215546Sopenharmony_ci#else 3786bf215546Sopenharmony_ci unreachable("IDVS is unsupported on Midgard"); 3787bf215546Sopenharmony_ci#endif 3788bf215546Sopenharmony_ci } else { 3789bf215546Sopenharmony_ci vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); 3790bf215546Sopenharmony_ci tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB); 3791bf215546Sopenharmony_ci } 3792bf215546Sopenharmony_ci 3793bf215546Sopenharmony_ci struct panfrost_bo *index_buf = NULL; 3794bf215546Sopenharmony_ci 3795bf215546Sopenharmony_ci if (info->index_size) { 3796bf215546Sopenharmony_ci assert(!info->has_user_indices); 3797bf215546Sopenharmony_ci struct panfrost_resource *rsrc = pan_resource(info->index.resource); 3798bf215546Sopenharmony_ci index_buf = rsrc->image.data.bo; 3799bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX); 3800bf215546Sopenharmony_ci } 3801bf215546Sopenharmony_ci 3802bf215546Sopenharmony_ci mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0; 3803bf215546Sopenharmony_ci unsigned varying_buf_count; 3804bf215546Sopenharmony_ci 3805bf215546Sopenharmony_ci /* We want to create templates, set all count fields to 0 to reflect 3806bf215546Sopenharmony_ci * that. 3807bf215546Sopenharmony_ci */ 3808bf215546Sopenharmony_ci ctx->instance_count = ctx->vertex_count = ctx->padded_count = 0; 3809bf215546Sopenharmony_ci ctx->offset_start = 0; 3810bf215546Sopenharmony_ci 3811bf215546Sopenharmony_ci /* Set the {first,base}_vertex sysvals to NULL. Will be updated if the 3812bf215546Sopenharmony_ci * vertex shader uses gl_VertexID or gl_BaseVertex. 3813bf215546Sopenharmony_ci */ 3814bf215546Sopenharmony_ci ctx->first_vertex_sysval_ptr = 0; 3815bf215546Sopenharmony_ci ctx->base_vertex_sysval_ptr = 0; 3816bf215546Sopenharmony_ci ctx->base_instance_sysval_ptr = 0; 3817bf215546Sopenharmony_ci 3818bf215546Sopenharmony_ci panfrost_update_state_3d(batch); 3819bf215546Sopenharmony_ci panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX); 3820bf215546Sopenharmony_ci panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT); 3821bf215546Sopenharmony_ci panfrost_clean_state_3d(ctx); 3822bf215546Sopenharmony_ci 3823bf215546Sopenharmony_ci bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS); 3824bf215546Sopenharmony_ci 3825bf215546Sopenharmony_ci panfrost_emit_varying_descriptor(batch, 0, 3826bf215546Sopenharmony_ci &vs_vary, &fs_vary, &varyings, 3827bf215546Sopenharmony_ci &varying_buf_count, &pos, &psiz, 3828bf215546Sopenharmony_ci point_coord_replace); 3829bf215546Sopenharmony_ci 3830bf215546Sopenharmony_ci mali_ptr attribs, attrib_bufs; 3831bf215546Sopenharmony_ci attribs = panfrost_emit_vertex_data(batch, &attrib_bufs); 3832bf215546Sopenharmony_ci 3833bf215546Sopenharmony_ci /* Zero-ed invocation, the compute job will update it. */ 3834bf215546Sopenharmony_ci static struct mali_invocation_packed invocation; 3835bf215546Sopenharmony_ci 3836bf215546Sopenharmony_ci /* Fire off the draw itself */ 3837bf215546Sopenharmony_ci panfrost_draw_emit_tiler(batch, info, draw, &invocation, 3838bf215546Sopenharmony_ci index_buf ? index_buf->ptr.gpu : 0, 3839bf215546Sopenharmony_ci fs_vary, varyings, pos, psiz, secondary_shader, 3840bf215546Sopenharmony_ci tiler.cpu); 3841bf215546Sopenharmony_ci if (idvs) { 3842bf215546Sopenharmony_ci#if PAN_ARCH >= 6 3843bf215546Sopenharmony_ci panfrost_draw_emit_vertex_section(batch, 3844bf215546Sopenharmony_ci vs_vary, varyings, 3845bf215546Sopenharmony_ci attribs, attrib_bufs, 3846bf215546Sopenharmony_ci pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW)); 3847bf215546Sopenharmony_ci#endif 3848bf215546Sopenharmony_ci } else { 3849bf215546Sopenharmony_ci panfrost_draw_emit_vertex(batch, info, &invocation, 3850bf215546Sopenharmony_ci vs_vary, varyings, attribs, attrib_bufs, vertex.cpu); 3851bf215546Sopenharmony_ci } 3852bf215546Sopenharmony_ci 3853bf215546Sopenharmony_ci /* Add the varying heap BO to the batch if we're allocating varyings. */ 3854bf215546Sopenharmony_ci if (varyings) { 3855bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, 3856bf215546Sopenharmony_ci dev->indirect_draw_shaders.varying_heap, 3857bf215546Sopenharmony_ci PIPE_SHADER_VERTEX); 3858bf215546Sopenharmony_ci } 3859bf215546Sopenharmony_ci 3860bf215546Sopenharmony_ci assert(indirect->buffer); 3861bf215546Sopenharmony_ci 3862bf215546Sopenharmony_ci struct panfrost_resource *draw_buf = pan_resource(indirect->buffer); 3863bf215546Sopenharmony_ci 3864bf215546Sopenharmony_ci /* Don't count images: those attributes don't need to be patched. */ 3865bf215546Sopenharmony_ci unsigned attrib_count = 3866bf215546Sopenharmony_ci vs->info.attribute_count - 3867bf215546Sopenharmony_ci util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]); 3868bf215546Sopenharmony_ci 3869bf215546Sopenharmony_ci panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX); 3870bf215546Sopenharmony_ci 3871bf215546Sopenharmony_ci struct pan_indirect_draw_info draw_info = { 3872bf215546Sopenharmony_ci .last_indirect_draw = batch->indirect_draw_job_id, 3873bf215546Sopenharmony_ci .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset, 3874bf215546Sopenharmony_ci .index_buf = index_buf ? index_buf->ptr.gpu : 0, 3875bf215546Sopenharmony_ci .first_vertex_sysval = ctx->first_vertex_sysval_ptr, 3876bf215546Sopenharmony_ci .base_vertex_sysval = ctx->base_vertex_sysval_ptr, 3877bf215546Sopenharmony_ci .base_instance_sysval = ctx->base_instance_sysval_ptr, 3878bf215546Sopenharmony_ci .vertex_job = vertex.gpu, 3879bf215546Sopenharmony_ci .tiler_job = tiler.gpu, 3880bf215546Sopenharmony_ci .attrib_bufs = attrib_bufs, 3881bf215546Sopenharmony_ci .attribs = attribs, 3882bf215546Sopenharmony_ci .attrib_count = attrib_count, 3883bf215546Sopenharmony_ci .varying_bufs = varyings, 3884bf215546Sopenharmony_ci .index_size = info->index_size, 3885bf215546Sopenharmony_ci }; 3886bf215546Sopenharmony_ci 3887bf215546Sopenharmony_ci if (panfrost_writes_point_size(ctx)) 3888bf215546Sopenharmony_ci draw_info.flags |= PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE; 3889bf215546Sopenharmony_ci 3890bf215546Sopenharmony_ci if (vs->info.vs.writes_point_size) 3891bf215546Sopenharmony_ci draw_info.flags |= PAN_INDIRECT_DRAW_HAS_PSIZ; 3892bf215546Sopenharmony_ci 3893bf215546Sopenharmony_ci if (idvs) 3894bf215546Sopenharmony_ci draw_info.flags |= PAN_INDIRECT_DRAW_IDVS; 3895bf215546Sopenharmony_ci 3896bf215546Sopenharmony_ci if (info->primitive_restart) { 3897bf215546Sopenharmony_ci draw_info.restart_index = info->restart_index; 3898bf215546Sopenharmony_ci draw_info.flags |= PAN_INDIRECT_DRAW_PRIMITIVE_RESTART; 3899bf215546Sopenharmony_ci } 3900bf215546Sopenharmony_ci 3901bf215546Sopenharmony_ci batch->indirect_draw_job_id = 3902bf215546Sopenharmony_ci GENX(panfrost_emit_indirect_draw)(&batch->pool.base, 3903bf215546Sopenharmony_ci &batch->scoreboard, 3904bf215546Sopenharmony_ci &draw_info, 3905bf215546Sopenharmony_ci &batch->indirect_draw_ctx); 3906bf215546Sopenharmony_ci 3907bf215546Sopenharmony_ci if (idvs) { 3908bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, 3909bf215546Sopenharmony_ci MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 3910bf215546Sopenharmony_ci 0, 0, &tiler, false); 3911bf215546Sopenharmony_ci } else { 3912bf215546Sopenharmony_ci panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler); 3913bf215546Sopenharmony_ci } 3914bf215546Sopenharmony_ci} 3915bf215546Sopenharmony_ci#endif 3916bf215546Sopenharmony_ci 3917bf215546Sopenharmony_cistatic bool 3918bf215546Sopenharmony_cipanfrost_compatible_batch_state(struct panfrost_batch *batch, 3919bf215546Sopenharmony_ci bool points) 3920bf215546Sopenharmony_ci{ 3921bf215546Sopenharmony_ci /* Only applies on Valhall */ 3922bf215546Sopenharmony_ci if (PAN_ARCH < 9) 3923bf215546Sopenharmony_ci return true; 3924bf215546Sopenharmony_ci 3925bf215546Sopenharmony_ci struct panfrost_context *ctx = batch->ctx; 3926bf215546Sopenharmony_ci struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; 3927bf215546Sopenharmony_ci 3928bf215546Sopenharmony_ci bool coord = (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); 3929bf215546Sopenharmony_ci bool first = rast->flatshade_first; 3930bf215546Sopenharmony_ci 3931bf215546Sopenharmony_ci /* gl_PointCoord orientation only matters when drawing points, but 3932bf215546Sopenharmony_ci * provoking vertex doesn't matter for points. 3933bf215546Sopenharmony_ci */ 3934bf215546Sopenharmony_ci if (points) 3935bf215546Sopenharmony_ci return pan_tristate_set(&batch->sprite_coord_origin, coord); 3936bf215546Sopenharmony_ci else 3937bf215546Sopenharmony_ci return pan_tristate_set(&batch->first_provoking_vertex, first); 3938bf215546Sopenharmony_ci} 3939bf215546Sopenharmony_ci 3940bf215546Sopenharmony_cistatic void 3941bf215546Sopenharmony_cipanfrost_draw_vbo(struct pipe_context *pipe, 3942bf215546Sopenharmony_ci const struct pipe_draw_info *info, 3943bf215546Sopenharmony_ci unsigned drawid_offset, 3944bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 3945bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 3946bf215546Sopenharmony_ci unsigned num_draws) 3947bf215546Sopenharmony_ci{ 3948bf215546Sopenharmony_ci struct panfrost_context *ctx = pan_context(pipe); 3949bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(pipe->screen); 3950bf215546Sopenharmony_ci 3951bf215546Sopenharmony_ci if (!panfrost_render_condition_check(ctx)) 3952bf215546Sopenharmony_ci return; 3953bf215546Sopenharmony_ci 3954bf215546Sopenharmony_ci /* Emulate indirect draws unless we're using the experimental path */ 3955bf215546Sopenharmony_ci if ((!(dev->debug & PAN_DBG_INDIRECT) || !PAN_GPU_INDIRECTS) && indirect && indirect->buffer) { 3956bf215546Sopenharmony_ci assert(num_draws == 1); 3957bf215546Sopenharmony_ci util_draw_indirect(pipe, info, indirect); 3958bf215546Sopenharmony_ci return; 3959bf215546Sopenharmony_ci } 3960bf215546Sopenharmony_ci 3961bf215546Sopenharmony_ci /* Do some common setup */ 3962bf215546Sopenharmony_ci struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); 3963bf215546Sopenharmony_ci 3964bf215546Sopenharmony_ci /* Don't add too many jobs to a single batch. Hardware has a hard limit 3965bf215546Sopenharmony_ci * of 65536 jobs, but we choose a smaller soft limit (arbitrary) to 3966bf215546Sopenharmony_ci * avoid the risk of timeouts. This might not be a good idea. */ 3967bf215546Sopenharmony_ci if (unlikely(batch->scoreboard.job_index > 10000)) 3968bf215546Sopenharmony_ci batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws"); 3969bf215546Sopenharmony_ci 3970bf215546Sopenharmony_ci bool points = (info->mode == PIPE_PRIM_POINTS); 3971bf215546Sopenharmony_ci 3972bf215546Sopenharmony_ci if (unlikely(!panfrost_compatible_batch_state(batch, points))) { 3973bf215546Sopenharmony_ci batch = panfrost_get_fresh_batch_for_fbo(ctx, "State change"); 3974bf215546Sopenharmony_ci 3975bf215546Sopenharmony_ci ASSERTED bool succ = panfrost_compatible_batch_state(batch, points); 3976bf215546Sopenharmony_ci assert(succ && "must be able to set state for a fresh batch"); 3977bf215546Sopenharmony_ci } 3978bf215546Sopenharmony_ci 3979bf215546Sopenharmony_ci /* panfrost_batch_skip_rasterization reads 3980bf215546Sopenharmony_ci * batch->scissor_culls_everything, which is set by 3981bf215546Sopenharmony_ci * panfrost_emit_viewport, so call that first. 3982bf215546Sopenharmony_ci */ 3983bf215546Sopenharmony_ci if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR)) 3984bf215546Sopenharmony_ci batch->viewport = panfrost_emit_viewport(batch); 3985bf215546Sopenharmony_ci 3986bf215546Sopenharmony_ci /* Mark everything dirty when debugging */ 3987bf215546Sopenharmony_ci if (unlikely(dev->debug & PAN_DBG_DIRTY)) 3988bf215546Sopenharmony_ci panfrost_dirty_state_all(ctx); 3989bf215546Sopenharmony_ci 3990bf215546Sopenharmony_ci /* Conservatively assume draw parameters always change */ 3991bf215546Sopenharmony_ci ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID; 3992bf215546Sopenharmony_ci 3993bf215546Sopenharmony_ci if (indirect) { 3994bf215546Sopenharmony_ci assert(num_draws == 1); 3995bf215546Sopenharmony_ci assert(PAN_GPU_INDIRECTS); 3996bf215546Sopenharmony_ci 3997bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS 3998bf215546Sopenharmony_ci if (indirect->count_from_stream_output) { 3999bf215546Sopenharmony_ci struct pipe_draw_start_count_bias tmp_draw = *draws; 4000bf215546Sopenharmony_ci struct panfrost_streamout_target *so = 4001bf215546Sopenharmony_ci pan_so_target(indirect->count_from_stream_output); 4002bf215546Sopenharmony_ci 4003bf215546Sopenharmony_ci tmp_draw.start = 0; 4004bf215546Sopenharmony_ci tmp_draw.count = so->offset; 4005bf215546Sopenharmony_ci tmp_draw.index_bias = 0; 4006bf215546Sopenharmony_ci panfrost_direct_draw(batch, info, drawid_offset, &tmp_draw); 4007bf215546Sopenharmony_ci return; 4008bf215546Sopenharmony_ci } 4009bf215546Sopenharmony_ci 4010bf215546Sopenharmony_ci panfrost_indirect_draw(batch, info, drawid_offset, indirect, &draws[0]); 4011bf215546Sopenharmony_ci return; 4012bf215546Sopenharmony_ci#endif 4013bf215546Sopenharmony_ci } 4014bf215546Sopenharmony_ci 4015bf215546Sopenharmony_ci struct pipe_draw_info tmp_info = *info; 4016bf215546Sopenharmony_ci unsigned drawid = drawid_offset; 4017bf215546Sopenharmony_ci 4018bf215546Sopenharmony_ci for (unsigned i = 0; i < num_draws; i++) { 4019bf215546Sopenharmony_ci panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]); 4020bf215546Sopenharmony_ci 4021bf215546Sopenharmony_ci if (tmp_info.increment_draw_id) { 4022bf215546Sopenharmony_ci ctx->dirty |= PAN_DIRTY_DRAWID; 4023bf215546Sopenharmony_ci drawid++; 4024bf215546Sopenharmony_ci } 4025bf215546Sopenharmony_ci } 4026bf215546Sopenharmony_ci 4027bf215546Sopenharmony_ci} 4028bf215546Sopenharmony_ci 4029bf215546Sopenharmony_ci/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we 4030bf215546Sopenharmony_ci * construct the COMPUTE job and some of its payload. 4031bf215546Sopenharmony_ci */ 4032bf215546Sopenharmony_ci 4033bf215546Sopenharmony_cistatic void 4034bf215546Sopenharmony_cipanfrost_launch_grid(struct pipe_context *pipe, 4035bf215546Sopenharmony_ci const struct pipe_grid_info *info) 4036bf215546Sopenharmony_ci{ 4037bf215546Sopenharmony_ci struct panfrost_context *ctx = pan_context(pipe); 4038bf215546Sopenharmony_ci 4039bf215546Sopenharmony_ci /* XXX - shouldn't be necessary with working memory barriers. Affected 4040bf215546Sopenharmony_ci * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */ 4041bf215546Sopenharmony_ci panfrost_flush_all_batches(ctx, "Launch grid pre-barrier"); 4042bf215546Sopenharmony_ci 4043bf215546Sopenharmony_ci struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); 4044bf215546Sopenharmony_ci 4045bf215546Sopenharmony_ci struct panfrost_shader_state *cs = 4046bf215546Sopenharmony_ci &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0]; 4047bf215546Sopenharmony_ci 4048bf215546Sopenharmony_ci /* Indirect dispatch can't handle workgroup local storage since that 4049bf215546Sopenharmony_ci * would require dynamic memory allocation. Bail in this case. */ 4050bf215546Sopenharmony_ci if (info->indirect && ((cs->info.wls_size != 0) || !PAN_GPU_INDIRECTS)) { 4051bf215546Sopenharmony_ci struct pipe_transfer *transfer; 4052bf215546Sopenharmony_ci uint32_t *params = pipe_buffer_map_range(pipe, info->indirect, 4053bf215546Sopenharmony_ci info->indirect_offset, 4054bf215546Sopenharmony_ci 3 * sizeof(uint32_t), 4055bf215546Sopenharmony_ci PIPE_MAP_READ, 4056bf215546Sopenharmony_ci &transfer); 4057bf215546Sopenharmony_ci 4058bf215546Sopenharmony_ci struct pipe_grid_info direct = *info; 4059bf215546Sopenharmony_ci direct.indirect = NULL; 4060bf215546Sopenharmony_ci direct.grid[0] = params[0]; 4061bf215546Sopenharmony_ci direct.grid[1] = params[1]; 4062bf215546Sopenharmony_ci direct.grid[2] = params[2]; 4063bf215546Sopenharmony_ci pipe_buffer_unmap(pipe, transfer); 4064bf215546Sopenharmony_ci 4065bf215546Sopenharmony_ci if (params[0] && params[1] && params[2]) 4066bf215546Sopenharmony_ci panfrost_launch_grid(pipe, &direct); 4067bf215546Sopenharmony_ci 4068bf215546Sopenharmony_ci return; 4069bf215546Sopenharmony_ci } 4070bf215546Sopenharmony_ci 4071bf215546Sopenharmony_ci ctx->compute_grid = info; 4072bf215546Sopenharmony_ci 4073bf215546Sopenharmony_ci struct panfrost_ptr t = 4074bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB); 4075bf215546Sopenharmony_ci 4076bf215546Sopenharmony_ci /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so 4077bf215546Sopenharmony_ci * reuse the graphics path for this by lowering to Gallium */ 4078bf215546Sopenharmony_ci 4079bf215546Sopenharmony_ci struct pipe_constant_buffer ubuf = { 4080bf215546Sopenharmony_ci .buffer = NULL, 4081bf215546Sopenharmony_ci .buffer_offset = 0, 4082bf215546Sopenharmony_ci .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->req_input_mem, 4083bf215546Sopenharmony_ci .user_buffer = info->input 4084bf215546Sopenharmony_ci }; 4085bf215546Sopenharmony_ci 4086bf215546Sopenharmony_ci if (info->input) 4087bf215546Sopenharmony_ci pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf); 4088bf215546Sopenharmony_ci 4089bf215546Sopenharmony_ci /* Invoke according to the grid info */ 4090bf215546Sopenharmony_ci 4091bf215546Sopenharmony_ci unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] }; 4092bf215546Sopenharmony_ci 4093bf215546Sopenharmony_ci if (info->indirect) 4094bf215546Sopenharmony_ci num_wg[0] = num_wg[1] = num_wg[2] = 1; 4095bf215546Sopenharmony_ci 4096bf215546Sopenharmony_ci panfrost_update_shader_state(batch, PIPE_SHADER_COMPUTE); 4097bf215546Sopenharmony_ci 4098bf215546Sopenharmony_ci#if PAN_ARCH <= 7 4099bf215546Sopenharmony_ci panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION), 4100bf215546Sopenharmony_ci num_wg[0], num_wg[1], num_wg[2], 4101bf215546Sopenharmony_ci info->block[0], info->block[1], 4102bf215546Sopenharmony_ci info->block[2], 4103bf215546Sopenharmony_ci false, info->indirect != NULL); 4104bf215546Sopenharmony_ci 4105bf215546Sopenharmony_ci pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) { 4106bf215546Sopenharmony_ci cfg.job_task_split = 4107bf215546Sopenharmony_ci util_logbase2_ceil(info->block[0] + 1) + 4108bf215546Sopenharmony_ci util_logbase2_ceil(info->block[1] + 1) + 4109bf215546Sopenharmony_ci util_logbase2_ceil(info->block[2] + 1); 4110bf215546Sopenharmony_ci } 4111bf215546Sopenharmony_ci 4112bf215546Sopenharmony_ci pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) { 4113bf215546Sopenharmony_ci cfg.state = batch->rsd[PIPE_SHADER_COMPUTE]; 4114bf215546Sopenharmony_ci cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE); 4115bf215546Sopenharmony_ci cfg.thread_storage = panfrost_emit_shared_memory(batch, info); 4116bf215546Sopenharmony_ci cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE]; 4117bf215546Sopenharmony_ci cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE]; 4118bf215546Sopenharmony_ci cfg.textures = batch->textures[PIPE_SHADER_COMPUTE]; 4119bf215546Sopenharmony_ci cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE]; 4120bf215546Sopenharmony_ci } 4121bf215546Sopenharmony_ci#else 4122bf215546Sopenharmony_ci pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) { 4123bf215546Sopenharmony_ci cfg.workgroup_size_x = info->block[0]; 4124bf215546Sopenharmony_ci cfg.workgroup_size_y = info->block[1]; 4125bf215546Sopenharmony_ci cfg.workgroup_size_z = info->block[2]; 4126bf215546Sopenharmony_ci 4127bf215546Sopenharmony_ci cfg.workgroup_count_x = num_wg[0]; 4128bf215546Sopenharmony_ci cfg.workgroup_count_y = num_wg[1]; 4129bf215546Sopenharmony_ci cfg.workgroup_count_z = num_wg[2]; 4130bf215546Sopenharmony_ci 4131bf215546Sopenharmony_ci panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_COMPUTE, 4132bf215546Sopenharmony_ci batch->rsd[PIPE_SHADER_COMPUTE], 4133bf215546Sopenharmony_ci panfrost_emit_shared_memory(batch, info)); 4134bf215546Sopenharmony_ci 4135bf215546Sopenharmony_ci cfg.allow_merging_workgroups = cs->info.cs.allow_merging_workgroups; 4136bf215546Sopenharmony_ci cfg.task_increment = 1; 4137bf215546Sopenharmony_ci cfg.task_axis = MALI_TASK_AXIS_Z; 4138bf215546Sopenharmony_ci } 4139bf215546Sopenharmony_ci#endif 4140bf215546Sopenharmony_ci 4141bf215546Sopenharmony_ci unsigned indirect_dep = 0; 4142bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS 4143bf215546Sopenharmony_ci if (info->indirect) { 4144bf215546Sopenharmony_ci struct pan_indirect_dispatch_info indirect = { 4145bf215546Sopenharmony_ci .job = t.gpu, 4146bf215546Sopenharmony_ci .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu + 4147bf215546Sopenharmony_ci info->indirect_offset, 4148bf215546Sopenharmony_ci .num_wg_sysval = { 4149bf215546Sopenharmony_ci batch->num_wg_sysval[0], 4150bf215546Sopenharmony_ci batch->num_wg_sysval[1], 4151bf215546Sopenharmony_ci batch->num_wg_sysval[2], 4152bf215546Sopenharmony_ci }, 4153bf215546Sopenharmony_ci }; 4154bf215546Sopenharmony_ci 4155bf215546Sopenharmony_ci indirect_dep = GENX(pan_indirect_dispatch_emit)(&batch->pool.base, 4156bf215546Sopenharmony_ci &batch->scoreboard, 4157bf215546Sopenharmony_ci &indirect); 4158bf215546Sopenharmony_ci } 4159bf215546Sopenharmony_ci#endif 4160bf215546Sopenharmony_ci 4161bf215546Sopenharmony_ci panfrost_add_job(&batch->pool.base, &batch->scoreboard, 4162bf215546Sopenharmony_ci MALI_JOB_TYPE_COMPUTE, true, false, 4163bf215546Sopenharmony_ci indirect_dep, 0, &t, false); 4164bf215546Sopenharmony_ci panfrost_flush_all_batches(ctx, "Launch grid post-barrier"); 4165bf215546Sopenharmony_ci} 4166bf215546Sopenharmony_ci 4167bf215546Sopenharmony_cistatic void * 4168bf215546Sopenharmony_cipanfrost_create_rasterizer_state( 4169bf215546Sopenharmony_ci struct pipe_context *pctx, 4170bf215546Sopenharmony_ci const struct pipe_rasterizer_state *cso) 4171bf215546Sopenharmony_ci{ 4172bf215546Sopenharmony_ci struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); 4173bf215546Sopenharmony_ci 4174bf215546Sopenharmony_ci so->base = *cso; 4175bf215546Sopenharmony_ci 4176bf215546Sopenharmony_ci /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */ 4177bf215546Sopenharmony_ci assert(cso->offset_clamp == 0.0); 4178bf215546Sopenharmony_ci 4179bf215546Sopenharmony_ci#if PAN_ARCH <= 7 4180bf215546Sopenharmony_ci pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) { 4181bf215546Sopenharmony_ci cfg.multisample_enable = cso->multisample; 4182bf215546Sopenharmony_ci cfg.fixed_function_near_discard = cso->depth_clip_near; 4183bf215546Sopenharmony_ci cfg.fixed_function_far_discard = cso->depth_clip_far; 4184bf215546Sopenharmony_ci cfg.shader_depth_range_fixed = true; 4185bf215546Sopenharmony_ci } 4186bf215546Sopenharmony_ci 4187bf215546Sopenharmony_ci pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) { 4188bf215546Sopenharmony_ci cfg.front_facing_depth_bias = cso->offset_tri; 4189bf215546Sopenharmony_ci cfg.back_facing_depth_bias = cso->offset_tri; 4190bf215546Sopenharmony_ci cfg.single_sampled_lines = !cso->multisample; 4191bf215546Sopenharmony_ci } 4192bf215546Sopenharmony_ci#endif 4193bf215546Sopenharmony_ci 4194bf215546Sopenharmony_ci return so; 4195bf215546Sopenharmony_ci} 4196bf215546Sopenharmony_ci 4197bf215546Sopenharmony_ci#if PAN_ARCH >= 9 4198bf215546Sopenharmony_ci/* 4199bf215546Sopenharmony_ci * Given a pipe_vertex_element, pack the corresponding Valhall attribute 4200bf215546Sopenharmony_ci * descriptor. This function is called at CSO create time. Since 4201bf215546Sopenharmony_ci * pipe_vertex_element lacks a stride, the packed attribute descriptor will not 4202bf215546Sopenharmony_ci * be uploaded until draw time. 4203bf215546Sopenharmony_ci */ 4204bf215546Sopenharmony_cistatic void 4205bf215546Sopenharmony_cipanfrost_pack_attribute(struct panfrost_device *dev, 4206bf215546Sopenharmony_ci const struct pipe_vertex_element el, 4207bf215546Sopenharmony_ci struct mali_attribute_packed *out) 4208bf215546Sopenharmony_ci{ 4209bf215546Sopenharmony_ci pan_pack(out, ATTRIBUTE, cfg) { 4210bf215546Sopenharmony_ci cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER; 4211bf215546Sopenharmony_ci cfg.frequency = (el.instance_divisor > 0) ? 4212bf215546Sopenharmony_ci MALI_ATTRIBUTE_FREQUENCY_INSTANCE : 4213bf215546Sopenharmony_ci MALI_ATTRIBUTE_FREQUENCY_VERTEX; 4214bf215546Sopenharmony_ci cfg.format = dev->formats[el.src_format].hw; 4215bf215546Sopenharmony_ci cfg.offset = el.src_offset; 4216bf215546Sopenharmony_ci cfg.buffer_index = el.vertex_buffer_index; 4217bf215546Sopenharmony_ci 4218bf215546Sopenharmony_ci if (el.instance_divisor == 0) { 4219bf215546Sopenharmony_ci /* Per-vertex */ 4220bf215546Sopenharmony_ci cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D; 4221bf215546Sopenharmony_ci cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX; 4222bf215546Sopenharmony_ci cfg.offset_enable = true; 4223bf215546Sopenharmony_ci } else if (util_is_power_of_two_or_zero(el.instance_divisor)) { 4224bf215546Sopenharmony_ci /* Per-instance, POT divisor */ 4225bf215546Sopenharmony_ci cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; 4226bf215546Sopenharmony_ci cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE; 4227bf215546Sopenharmony_ci cfg.divisor_r = __builtin_ctz(el.instance_divisor); 4228bf215546Sopenharmony_ci } else { 4229bf215546Sopenharmony_ci /* Per-instance, NPOT divisor */ 4230bf215546Sopenharmony_ci cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; 4231bf215546Sopenharmony_ci cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE; 4232bf215546Sopenharmony_ci 4233bf215546Sopenharmony_ci cfg.divisor_d = 4234bf215546Sopenharmony_ci panfrost_compute_magic_divisor(el.instance_divisor, 4235bf215546Sopenharmony_ci &cfg.divisor_r, &cfg.divisor_e); 4236bf215546Sopenharmony_ci } 4237bf215546Sopenharmony_ci } 4238bf215546Sopenharmony_ci} 4239bf215546Sopenharmony_ci#endif 4240bf215546Sopenharmony_ci 4241bf215546Sopenharmony_cistatic void * 4242bf215546Sopenharmony_cipanfrost_create_vertex_elements_state( 4243bf215546Sopenharmony_ci struct pipe_context *pctx, 4244bf215546Sopenharmony_ci unsigned num_elements, 4245bf215546Sopenharmony_ci const struct pipe_vertex_element *elements) 4246bf215546Sopenharmony_ci{ 4247bf215546Sopenharmony_ci struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); 4248bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(pctx->screen); 4249bf215546Sopenharmony_ci 4250bf215546Sopenharmony_ci so->num_elements = num_elements; 4251bf215546Sopenharmony_ci memcpy(so->pipe, elements, sizeof(*elements) * num_elements); 4252bf215546Sopenharmony_ci 4253bf215546Sopenharmony_ci#if PAN_ARCH >= 9 4254bf215546Sopenharmony_ci for (unsigned i = 0; i < num_elements; ++i) 4255bf215546Sopenharmony_ci panfrost_pack_attribute(dev, elements[i], &so->attributes[i]); 4256bf215546Sopenharmony_ci#else 4257bf215546Sopenharmony_ci /* Assign attribute buffers corresponding to the vertex buffers, keyed 4258bf215546Sopenharmony_ci * for a particular divisor since that's how instancing works on Mali */ 4259bf215546Sopenharmony_ci for (unsigned i = 0; i < num_elements; ++i) { 4260bf215546Sopenharmony_ci so->element_buffer[i] = pan_assign_vertex_buffer( 4261bf215546Sopenharmony_ci so->buffers, &so->nr_bufs, 4262bf215546Sopenharmony_ci elements[i].vertex_buffer_index, 4263bf215546Sopenharmony_ci elements[i].instance_divisor); 4264bf215546Sopenharmony_ci } 4265bf215546Sopenharmony_ci 4266bf215546Sopenharmony_ci for (int i = 0; i < num_elements; ++i) { 4267bf215546Sopenharmony_ci enum pipe_format fmt = elements[i].src_format; 4268bf215546Sopenharmony_ci so->formats[i] = dev->formats[fmt].hw; 4269bf215546Sopenharmony_ci } 4270bf215546Sopenharmony_ci 4271bf215546Sopenharmony_ci /* Let's also prepare vertex builtins */ 4272bf215546Sopenharmony_ci so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw; 4273bf215546Sopenharmony_ci so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw; 4274bf215546Sopenharmony_ci#endif 4275bf215546Sopenharmony_ci 4276bf215546Sopenharmony_ci return so; 4277bf215546Sopenharmony_ci} 4278bf215546Sopenharmony_ci 4279bf215546Sopenharmony_cistatic inline unsigned 4280bf215546Sopenharmony_cipan_pipe_to_stencil_op(enum pipe_stencil_op in) 4281bf215546Sopenharmony_ci{ 4282bf215546Sopenharmony_ci switch (in) { 4283bf215546Sopenharmony_ci case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP; 4284bf215546Sopenharmony_ci case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO; 4285bf215546Sopenharmony_ci case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE; 4286bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT; 4287bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT; 4288bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP; 4289bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP; 4290bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT; 4291bf215546Sopenharmony_ci default: unreachable("Invalid stencil op"); 4292bf215546Sopenharmony_ci } 4293bf215546Sopenharmony_ci} 4294bf215546Sopenharmony_ci 4295bf215546Sopenharmony_ci#if PAN_ARCH <= 7 4296bf215546Sopenharmony_cistatic inline void 4297bf215546Sopenharmony_cipan_pipe_to_stencil(const struct pipe_stencil_state *in, 4298bf215546Sopenharmony_ci struct mali_stencil_packed *out) 4299bf215546Sopenharmony_ci{ 4300bf215546Sopenharmony_ci pan_pack(out, STENCIL, s) { 4301bf215546Sopenharmony_ci s.mask = in->valuemask; 4302bf215546Sopenharmony_ci s.compare_function = (enum mali_func) in->func; 4303bf215546Sopenharmony_ci s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op); 4304bf215546Sopenharmony_ci s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op); 4305bf215546Sopenharmony_ci s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op); 4306bf215546Sopenharmony_ci } 4307bf215546Sopenharmony_ci} 4308bf215546Sopenharmony_ci#endif 4309bf215546Sopenharmony_ci 4310bf215546Sopenharmony_cistatic bool 4311bf215546Sopenharmony_cipipe_zs_always_passes(const struct pipe_depth_stencil_alpha_state *zsa) 4312bf215546Sopenharmony_ci{ 4313bf215546Sopenharmony_ci if (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS) 4314bf215546Sopenharmony_ci return false; 4315bf215546Sopenharmony_ci 4316bf215546Sopenharmony_ci if (zsa->stencil[0].enabled && zsa->stencil[0].func != PIPE_FUNC_ALWAYS) 4317bf215546Sopenharmony_ci return false; 4318bf215546Sopenharmony_ci 4319bf215546Sopenharmony_ci if (zsa->stencil[1].enabled && zsa->stencil[1].func != PIPE_FUNC_ALWAYS) 4320bf215546Sopenharmony_ci return false; 4321bf215546Sopenharmony_ci 4322bf215546Sopenharmony_ci return true; 4323bf215546Sopenharmony_ci} 4324bf215546Sopenharmony_ci 4325bf215546Sopenharmony_cistatic void * 4326bf215546Sopenharmony_cipanfrost_create_depth_stencil_state(struct pipe_context *pipe, 4327bf215546Sopenharmony_ci const struct pipe_depth_stencil_alpha_state *zsa) 4328bf215546Sopenharmony_ci{ 4329bf215546Sopenharmony_ci struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state); 4330bf215546Sopenharmony_ci so->base = *zsa; 4331bf215546Sopenharmony_ci 4332bf215546Sopenharmony_ci const struct pipe_stencil_state front = zsa->stencil[0]; 4333bf215546Sopenharmony_ci const struct pipe_stencil_state back = 4334bf215546Sopenharmony_ci zsa->stencil[1].enabled ? zsa->stencil[1] : front; 4335bf215546Sopenharmony_ci 4336bf215546Sopenharmony_ci enum mali_func depth_func = zsa->depth_enabled ? 4337bf215546Sopenharmony_ci (enum mali_func) zsa->depth_func : MALI_FUNC_ALWAYS; 4338bf215546Sopenharmony_ci 4339bf215546Sopenharmony_ci /* Normalize (there's no separate enable) */ 4340bf215546Sopenharmony_ci if (PAN_ARCH <= 5 && !zsa->alpha_enabled) 4341bf215546Sopenharmony_ci so->base.alpha_func = MALI_FUNC_ALWAYS; 4342bf215546Sopenharmony_ci 4343bf215546Sopenharmony_ci#if PAN_ARCH <= 7 4344bf215546Sopenharmony_ci /* Prepack relevant parts of the Renderer State Descriptor. They will 4345bf215546Sopenharmony_ci * be ORed in at draw-time */ 4346bf215546Sopenharmony_ci pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) { 4347bf215546Sopenharmony_ci cfg.depth_function = depth_func; 4348bf215546Sopenharmony_ci cfg.depth_write_mask = zsa->depth_writemask; 4349bf215546Sopenharmony_ci } 4350bf215546Sopenharmony_ci 4351bf215546Sopenharmony_ci pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) { 4352bf215546Sopenharmony_ci cfg.stencil_enable = front.enabled; 4353bf215546Sopenharmony_ci cfg.stencil_mask_front = front.writemask; 4354bf215546Sopenharmony_ci cfg.stencil_mask_back = back.writemask; 4355bf215546Sopenharmony_ci 4356bf215546Sopenharmony_ci#if PAN_ARCH <= 5 4357bf215546Sopenharmony_ci cfg.alpha_test_compare_function = 4358bf215546Sopenharmony_ci (enum mali_func) so->base.alpha_func; 4359bf215546Sopenharmony_ci#endif 4360bf215546Sopenharmony_ci } 4361bf215546Sopenharmony_ci 4362bf215546Sopenharmony_ci /* Stencil tests have their own words in the RSD */ 4363bf215546Sopenharmony_ci pan_pipe_to_stencil(&front, &so->stencil_front); 4364bf215546Sopenharmony_ci pan_pipe_to_stencil(&back, &so->stencil_back); 4365bf215546Sopenharmony_ci#else 4366bf215546Sopenharmony_ci pan_pack(&so->desc, DEPTH_STENCIL, cfg) { 4367bf215546Sopenharmony_ci cfg.front_compare_function = (enum mali_func) front.func; 4368bf215546Sopenharmony_ci cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op); 4369bf215546Sopenharmony_ci cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op); 4370bf215546Sopenharmony_ci cfg.front_depth_pass = pan_pipe_to_stencil_op(front.zpass_op); 4371bf215546Sopenharmony_ci 4372bf215546Sopenharmony_ci cfg.back_compare_function = (enum mali_func) back.func; 4373bf215546Sopenharmony_ci cfg.back_stencil_fail = pan_pipe_to_stencil_op(back.fail_op); 4374bf215546Sopenharmony_ci cfg.back_depth_fail = pan_pipe_to_stencil_op(back.zfail_op); 4375bf215546Sopenharmony_ci cfg.back_depth_pass = pan_pipe_to_stencil_op(back.zpass_op); 4376bf215546Sopenharmony_ci 4377bf215546Sopenharmony_ci cfg.stencil_test_enable = front.enabled; 4378bf215546Sopenharmony_ci cfg.front_write_mask = front.writemask; 4379bf215546Sopenharmony_ci cfg.back_write_mask = back.writemask; 4380bf215546Sopenharmony_ci cfg.front_value_mask = front.valuemask; 4381bf215546Sopenharmony_ci cfg.back_value_mask = back.valuemask; 4382bf215546Sopenharmony_ci 4383bf215546Sopenharmony_ci cfg.depth_write_enable = zsa->depth_writemask; 4384bf215546Sopenharmony_ci cfg.depth_function = depth_func; 4385bf215546Sopenharmony_ci } 4386bf215546Sopenharmony_ci#endif 4387bf215546Sopenharmony_ci 4388bf215546Sopenharmony_ci so->enabled = zsa->stencil[0].enabled || 4389bf215546Sopenharmony_ci (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS); 4390bf215546Sopenharmony_ci 4391bf215546Sopenharmony_ci so->zs_always_passes = pipe_zs_always_passes(zsa); 4392bf215546Sopenharmony_ci so->writes_zs = util_writes_depth_stencil(zsa); 4393bf215546Sopenharmony_ci 4394bf215546Sopenharmony_ci /* TODO: Bounds test should be easy */ 4395bf215546Sopenharmony_ci assert(!zsa->depth_bounds_test); 4396bf215546Sopenharmony_ci 4397bf215546Sopenharmony_ci return so; 4398bf215546Sopenharmony_ci} 4399bf215546Sopenharmony_ci 4400bf215546Sopenharmony_cistatic struct pipe_sampler_view * 4401bf215546Sopenharmony_cipanfrost_create_sampler_view( 4402bf215546Sopenharmony_ci struct pipe_context *pctx, 4403bf215546Sopenharmony_ci struct pipe_resource *texture, 4404bf215546Sopenharmony_ci const struct pipe_sampler_view *template) 4405bf215546Sopenharmony_ci{ 4406bf215546Sopenharmony_ci struct panfrost_context *ctx = pan_context(pctx); 4407bf215546Sopenharmony_ci struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); 4408bf215546Sopenharmony_ci 4409bf215546Sopenharmony_ci pan_legalize_afbc_format(ctx, pan_resource(texture), template->format); 4410bf215546Sopenharmony_ci 4411bf215546Sopenharmony_ci pipe_reference(NULL, &texture->reference); 4412bf215546Sopenharmony_ci 4413bf215546Sopenharmony_ci so->base = *template; 4414bf215546Sopenharmony_ci so->base.texture = texture; 4415bf215546Sopenharmony_ci so->base.reference.count = 1; 4416bf215546Sopenharmony_ci so->base.context = pctx; 4417bf215546Sopenharmony_ci 4418bf215546Sopenharmony_ci panfrost_create_sampler_view_bo(so, pctx, texture); 4419bf215546Sopenharmony_ci 4420bf215546Sopenharmony_ci return (struct pipe_sampler_view *) so; 4421bf215546Sopenharmony_ci} 4422bf215546Sopenharmony_ci 4423bf215546Sopenharmony_ci/* A given Gallium blend state can be encoded to the hardware in numerous, 4424bf215546Sopenharmony_ci * dramatically divergent ways due to the interactions of blending with 4425bf215546Sopenharmony_ci * framebuffer formats. Conceptually, there are two modes: 4426bf215546Sopenharmony_ci * 4427bf215546Sopenharmony_ci * - Fixed-function blending (for suitable framebuffer formats, suitable blend 4428bf215546Sopenharmony_ci * state, and suitable blend constant) 4429bf215546Sopenharmony_ci * 4430bf215546Sopenharmony_ci * - Blend shaders (for everything else) 4431bf215546Sopenharmony_ci * 4432bf215546Sopenharmony_ci * A given Gallium blend configuration will compile to exactly one 4433bf215546Sopenharmony_ci * fixed-function blend state, if it compiles to any, although the constant 4434bf215546Sopenharmony_ci * will vary across runs as that is tracked outside of the Gallium CSO. 4435bf215546Sopenharmony_ci * 4436bf215546Sopenharmony_ci * However, that same blend configuration will compile to many different blend 4437bf215546Sopenharmony_ci * shaders, depending on the framebuffer formats active. The rationale is that 4438bf215546Sopenharmony_ci * blend shaders override not just fixed-function blending but also 4439bf215546Sopenharmony_ci * fixed-function format conversion, so blend shaders are keyed to a particular 4440bf215546Sopenharmony_ci * framebuffer format. As an example, the tilebuffer format is identical for 4441bf215546Sopenharmony_ci * RG16F and RG16UI -- both are simply 32-bit raw pixels -- so both require 4442bf215546Sopenharmony_ci * blend shaders. 4443bf215546Sopenharmony_ci * 4444bf215546Sopenharmony_ci * All of this state is encapsulated in the panfrost_blend_state struct 4445bf215546Sopenharmony_ci * (our subclass of pipe_blend_state). 4446bf215546Sopenharmony_ci */ 4447bf215546Sopenharmony_ci 4448bf215546Sopenharmony_ci/* Create a blend CSO. Essentially, try to compile a fixed-function 4449bf215546Sopenharmony_ci * expression and initialize blend shaders */ 4450bf215546Sopenharmony_ci 4451bf215546Sopenharmony_cistatic void * 4452bf215546Sopenharmony_cipanfrost_create_blend_state(struct pipe_context *pipe, 4453bf215546Sopenharmony_ci const struct pipe_blend_state *blend) 4454bf215546Sopenharmony_ci{ 4455bf215546Sopenharmony_ci struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); 4456bf215546Sopenharmony_ci so->base = *blend; 4457bf215546Sopenharmony_ci 4458bf215546Sopenharmony_ci so->pan.logicop_enable = blend->logicop_enable; 4459bf215546Sopenharmony_ci so->pan.logicop_func = blend->logicop_func; 4460bf215546Sopenharmony_ci so->pan.rt_count = blend->max_rt + 1; 4461bf215546Sopenharmony_ci 4462bf215546Sopenharmony_ci for (unsigned c = 0; c < so->pan.rt_count; ++c) { 4463bf215546Sopenharmony_ci unsigned g = blend->independent_blend_enable ? c : 0; 4464bf215546Sopenharmony_ci const struct pipe_rt_blend_state pipe = blend->rt[g]; 4465bf215546Sopenharmony_ci struct pan_blend_equation equation = {0}; 4466bf215546Sopenharmony_ci 4467bf215546Sopenharmony_ci equation.color_mask = pipe.colormask; 4468bf215546Sopenharmony_ci equation.blend_enable = pipe.blend_enable; 4469bf215546Sopenharmony_ci 4470bf215546Sopenharmony_ci if (pipe.blend_enable) { 4471bf215546Sopenharmony_ci equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func); 4472bf215546Sopenharmony_ci equation.rgb_src_factor = util_blend_factor_to_shader(pipe.rgb_src_factor); 4473bf215546Sopenharmony_ci equation.rgb_invert_src_factor = util_blend_factor_is_inverted(pipe.rgb_src_factor); 4474bf215546Sopenharmony_ci equation.rgb_dst_factor = util_blend_factor_to_shader(pipe.rgb_dst_factor); 4475bf215546Sopenharmony_ci equation.rgb_invert_dst_factor = util_blend_factor_is_inverted(pipe.rgb_dst_factor); 4476bf215546Sopenharmony_ci equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func); 4477bf215546Sopenharmony_ci equation.alpha_src_factor = util_blend_factor_to_shader(pipe.alpha_src_factor); 4478bf215546Sopenharmony_ci equation.alpha_invert_src_factor = util_blend_factor_is_inverted(pipe.alpha_src_factor); 4479bf215546Sopenharmony_ci equation.alpha_dst_factor = util_blend_factor_to_shader(pipe.alpha_dst_factor); 4480bf215546Sopenharmony_ci equation.alpha_invert_dst_factor = util_blend_factor_is_inverted(pipe.alpha_dst_factor); 4481bf215546Sopenharmony_ci } 4482bf215546Sopenharmony_ci 4483bf215546Sopenharmony_ci /* Determine some common properties */ 4484bf215546Sopenharmony_ci unsigned constant_mask = pan_blend_constant_mask(equation); 4485bf215546Sopenharmony_ci const bool supports_2src = pan_blend_supports_2src(PAN_ARCH); 4486bf215546Sopenharmony_ci so->info[c] = (struct pan_blend_info) { 4487bf215546Sopenharmony_ci .no_colour = (equation.color_mask == 0), 4488bf215546Sopenharmony_ci .opaque = pan_blend_is_opaque(equation), 4489bf215546Sopenharmony_ci .constant_mask = constant_mask, 4490bf215546Sopenharmony_ci 4491bf215546Sopenharmony_ci /* TODO: check the dest for the logicop */ 4492bf215546Sopenharmony_ci .load_dest = blend->logicop_enable || 4493bf215546Sopenharmony_ci pan_blend_reads_dest(equation), 4494bf215546Sopenharmony_ci 4495bf215546Sopenharmony_ci /* Could this possibly be fixed-function? */ 4496bf215546Sopenharmony_ci .fixed_function = !blend->logicop_enable && 4497bf215546Sopenharmony_ci pan_blend_can_fixed_function(equation, 4498bf215546Sopenharmony_ci supports_2src) && 4499bf215546Sopenharmony_ci (!constant_mask || 4500bf215546Sopenharmony_ci pan_blend_supports_constant(PAN_ARCH, c)), 4501bf215546Sopenharmony_ci 4502bf215546Sopenharmony_ci .alpha_zero_nop = pan_blend_alpha_zero_nop(equation), 4503bf215546Sopenharmony_ci .alpha_one_store = pan_blend_alpha_one_store(equation), 4504bf215546Sopenharmony_ci }; 4505bf215546Sopenharmony_ci 4506bf215546Sopenharmony_ci so->pan.rts[c].equation = equation; 4507bf215546Sopenharmony_ci 4508bf215546Sopenharmony_ci /* Bifrost needs to know if any render target loads its 4509bf215546Sopenharmony_ci * destination in the hot draw path, so precompute this */ 4510bf215546Sopenharmony_ci if (so->info[c].load_dest) 4511bf215546Sopenharmony_ci so->load_dest_mask |= BITFIELD_BIT(c); 4512bf215546Sopenharmony_ci 4513bf215546Sopenharmony_ci /* Converting equations to Mali style is expensive, do it at 4514bf215546Sopenharmony_ci * CSO create time instead of draw-time */ 4515bf215546Sopenharmony_ci if (so->info[c].fixed_function) { 4516bf215546Sopenharmony_ci so->equation[c] = pan_pack_blend(equation); 4517bf215546Sopenharmony_ci } 4518bf215546Sopenharmony_ci } 4519bf215546Sopenharmony_ci 4520bf215546Sopenharmony_ci return so; 4521bf215546Sopenharmony_ci} 4522bf215546Sopenharmony_ci 4523bf215546Sopenharmony_cistatic void 4524bf215546Sopenharmony_ciprepare_shader(struct panfrost_shader_state *state, 4525bf215546Sopenharmony_ci struct panfrost_pool *pool, bool upload) 4526bf215546Sopenharmony_ci{ 4527bf215546Sopenharmony_ci#if PAN_ARCH <= 7 4528bf215546Sopenharmony_ci void *out = &state->partial_rsd; 4529bf215546Sopenharmony_ci 4530bf215546Sopenharmony_ci if (upload) { 4531bf215546Sopenharmony_ci struct panfrost_ptr ptr = 4532bf215546Sopenharmony_ci pan_pool_alloc_desc(&pool->base, RENDERER_STATE); 4533bf215546Sopenharmony_ci 4534bf215546Sopenharmony_ci state->state = panfrost_pool_take_ref(pool, ptr.gpu); 4535bf215546Sopenharmony_ci out = ptr.cpu; 4536bf215546Sopenharmony_ci } 4537bf215546Sopenharmony_ci 4538bf215546Sopenharmony_ci pan_pack(out, RENDERER_STATE, cfg) { 4539bf215546Sopenharmony_ci pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg); 4540bf215546Sopenharmony_ci 4541bf215546Sopenharmony_ci } 4542bf215546Sopenharmony_ci#else 4543bf215546Sopenharmony_ci assert(upload); 4544bf215546Sopenharmony_ci 4545bf215546Sopenharmony_ci /* The address in the shader program descriptor must be non-null, but 4546bf215546Sopenharmony_ci * the entire shader program descriptor may be omitted. 4547bf215546Sopenharmony_ci * 4548bf215546Sopenharmony_ci * See dEQP-GLES31.functional.compute.basic.empty 4549bf215546Sopenharmony_ci */ 4550bf215546Sopenharmony_ci if (!state->bin.gpu) 4551bf215546Sopenharmony_ci return; 4552bf215546Sopenharmony_ci 4553bf215546Sopenharmony_ci bool vs = (state->info.stage == MESA_SHADER_VERTEX); 4554bf215546Sopenharmony_ci bool secondary_enable = (vs && state->info.vs.secondary_enable); 4555bf215546Sopenharmony_ci 4556bf215546Sopenharmony_ci unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1; 4557bf215546Sopenharmony_ci struct panfrost_ptr ptr = pan_pool_alloc_desc_array(&pool->base, 4558bf215546Sopenharmony_ci nr_variants, 4559bf215546Sopenharmony_ci SHADER_PROGRAM); 4560bf215546Sopenharmony_ci 4561bf215546Sopenharmony_ci state->state = panfrost_pool_take_ref(pool, ptr.gpu); 4562bf215546Sopenharmony_ci 4563bf215546Sopenharmony_ci /* Generic, or IDVS/points */ 4564bf215546Sopenharmony_ci pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) { 4565bf215546Sopenharmony_ci cfg.stage = pan_shader_stage(&state->info); 4566bf215546Sopenharmony_ci cfg.primary_shader = true; 4567bf215546Sopenharmony_ci cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); 4568bf215546Sopenharmony_ci cfg.binary = state->bin.gpu; 4569bf215546Sopenharmony_ci cfg.preload.r48_r63 = (state->info.preload >> 48); 4570bf215546Sopenharmony_ci 4571bf215546Sopenharmony_ci if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) 4572bf215546Sopenharmony_ci cfg.requires_helper_threads = state->info.contains_barrier; 4573bf215546Sopenharmony_ci } 4574bf215546Sopenharmony_ci 4575bf215546Sopenharmony_ci if (!vs) 4576bf215546Sopenharmony_ci return; 4577bf215546Sopenharmony_ci 4578bf215546Sopenharmony_ci /* IDVS/triangles */ 4579bf215546Sopenharmony_ci pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) { 4580bf215546Sopenharmony_ci cfg.stage = pan_shader_stage(&state->info); 4581bf215546Sopenharmony_ci cfg.primary_shader = true; 4582bf215546Sopenharmony_ci cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); 4583bf215546Sopenharmony_ci cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset; 4584bf215546Sopenharmony_ci cfg.preload.r48_r63 = (state->info.preload >> 48); 4585bf215546Sopenharmony_ci } 4586bf215546Sopenharmony_ci 4587bf215546Sopenharmony_ci if (!secondary_enable) 4588bf215546Sopenharmony_ci return; 4589bf215546Sopenharmony_ci 4590bf215546Sopenharmony_ci pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) { 4591bf215546Sopenharmony_ci unsigned work_count = state->info.vs.secondary_work_reg_count; 4592bf215546Sopenharmony_ci 4593bf215546Sopenharmony_ci cfg.stage = pan_shader_stage(&state->info); 4594bf215546Sopenharmony_ci cfg.primary_shader = false; 4595bf215546Sopenharmony_ci cfg.register_allocation = pan_register_allocation(work_count); 4596bf215546Sopenharmony_ci cfg.binary = state->bin.gpu + state->info.vs.secondary_offset; 4597bf215546Sopenharmony_ci cfg.preload.r48_r63 = (state->info.vs.secondary_preload >> 48); 4598bf215546Sopenharmony_ci } 4599bf215546Sopenharmony_ci#endif 4600bf215546Sopenharmony_ci} 4601bf215546Sopenharmony_ci 4602bf215546Sopenharmony_cistatic void 4603bf215546Sopenharmony_cipanfrost_get_sample_position(struct pipe_context *context, 4604bf215546Sopenharmony_ci unsigned sample_count, 4605bf215546Sopenharmony_ci unsigned sample_index, 4606bf215546Sopenharmony_ci float *out_value) 4607bf215546Sopenharmony_ci{ 4608bf215546Sopenharmony_ci panfrost_query_sample_position( 4609bf215546Sopenharmony_ci panfrost_sample_pattern(sample_count), 4610bf215546Sopenharmony_ci sample_index, 4611bf215546Sopenharmony_ci out_value); 4612bf215546Sopenharmony_ci} 4613bf215546Sopenharmony_ci 4614bf215546Sopenharmony_cistatic void 4615bf215546Sopenharmony_ciscreen_destroy(struct pipe_screen *pscreen) 4616bf215546Sopenharmony_ci{ 4617bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(pscreen); 4618bf215546Sopenharmony_ci GENX(pan_blitter_cleanup)(dev); 4619bf215546Sopenharmony_ci 4620bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS 4621bf215546Sopenharmony_ci GENX(panfrost_cleanup_indirect_draw_shaders)(dev); 4622bf215546Sopenharmony_ci GENX(pan_indirect_dispatch_cleanup)(dev); 4623bf215546Sopenharmony_ci#endif 4624bf215546Sopenharmony_ci} 4625bf215546Sopenharmony_ci 4626bf215546Sopenharmony_cistatic void 4627bf215546Sopenharmony_cipreload(struct panfrost_batch *batch, struct pan_fb_info *fb) 4628bf215546Sopenharmony_ci{ 4629bf215546Sopenharmony_ci GENX(pan_preload_fb)(&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu, 4630bf215546Sopenharmony_ci PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL); 4631bf215546Sopenharmony_ci} 4632bf215546Sopenharmony_ci 4633bf215546Sopenharmony_cistatic void 4634bf215546Sopenharmony_ciinit_batch(struct panfrost_batch *batch) 4635bf215546Sopenharmony_ci{ 4636bf215546Sopenharmony_ci /* Reserve the framebuffer and local storage descriptors */ 4637bf215546Sopenharmony_ci batch->framebuffer = 4638bf215546Sopenharmony_ci#if PAN_ARCH == 4 4639bf215546Sopenharmony_ci pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER); 4640bf215546Sopenharmony_ci#else 4641bf215546Sopenharmony_ci pan_pool_alloc_desc_aggregate(&batch->pool.base, 4642bf215546Sopenharmony_ci PAN_DESC(FRAMEBUFFER), 4643bf215546Sopenharmony_ci PAN_DESC(ZS_CRC_EXTENSION), 4644bf215546Sopenharmony_ci PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET)); 4645bf215546Sopenharmony_ci 4646bf215546Sopenharmony_ci batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD; 4647bf215546Sopenharmony_ci#endif 4648bf215546Sopenharmony_ci 4649bf215546Sopenharmony_ci#if PAN_ARCH >= 6 4650bf215546Sopenharmony_ci batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE); 4651bf215546Sopenharmony_ci#else 4652bf215546Sopenharmony_ci /* On Midgard, the TLS is embedded in the FB descriptor */ 4653bf215546Sopenharmony_ci batch->tls = batch->framebuffer; 4654bf215546Sopenharmony_ci#endif 4655bf215546Sopenharmony_ci} 4656bf215546Sopenharmony_ci 4657bf215546Sopenharmony_cistatic void 4658bf215546Sopenharmony_cipanfrost_sampler_view_destroy( 4659bf215546Sopenharmony_ci struct pipe_context *pctx, 4660bf215546Sopenharmony_ci struct pipe_sampler_view *pview) 4661bf215546Sopenharmony_ci{ 4662bf215546Sopenharmony_ci struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview; 4663bf215546Sopenharmony_ci 4664bf215546Sopenharmony_ci pipe_resource_reference(&pview->texture, NULL); 4665bf215546Sopenharmony_ci panfrost_bo_unreference(view->state.bo); 4666bf215546Sopenharmony_ci ralloc_free(view); 4667bf215546Sopenharmony_ci} 4668bf215546Sopenharmony_ci 4669bf215546Sopenharmony_cistatic void 4670bf215546Sopenharmony_cicontext_init(struct pipe_context *pipe) 4671bf215546Sopenharmony_ci{ 4672bf215546Sopenharmony_ci pipe->draw_vbo = panfrost_draw_vbo; 4673bf215546Sopenharmony_ci pipe->launch_grid = panfrost_launch_grid; 4674bf215546Sopenharmony_ci 4675bf215546Sopenharmony_ci pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state; 4676bf215546Sopenharmony_ci pipe->create_rasterizer_state = panfrost_create_rasterizer_state; 4677bf215546Sopenharmony_ci pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; 4678bf215546Sopenharmony_ci pipe->create_sampler_view = panfrost_create_sampler_view; 4679bf215546Sopenharmony_ci pipe->sampler_view_destroy = panfrost_sampler_view_destroy; 4680bf215546Sopenharmony_ci pipe->create_sampler_state = panfrost_create_sampler_state; 4681bf215546Sopenharmony_ci pipe->create_blend_state = panfrost_create_blend_state; 4682bf215546Sopenharmony_ci 4683bf215546Sopenharmony_ci pipe->get_sample_position = panfrost_get_sample_position; 4684bf215546Sopenharmony_ci} 4685bf215546Sopenharmony_ci 4686bf215546Sopenharmony_ci#if PAN_ARCH <= 5 4687bf215546Sopenharmony_ci 4688bf215546Sopenharmony_ci/* Returns the polygon list's GPU address if available, or otherwise allocates 4689bf215546Sopenharmony_ci * the polygon list. It's perfectly fast to use allocate/free BO directly, 4690bf215546Sopenharmony_ci * since we'll hit the BO cache and this is one-per-batch anyway. */ 4691bf215546Sopenharmony_ci 4692bf215546Sopenharmony_cistatic mali_ptr 4693bf215546Sopenharmony_cibatch_get_polygon_list(struct panfrost_batch *batch) 4694bf215546Sopenharmony_ci{ 4695bf215546Sopenharmony_ci struct panfrost_device *dev = pan_device(batch->ctx->base.screen); 4696bf215546Sopenharmony_ci 4697bf215546Sopenharmony_ci if (!batch->tiler_ctx.midgard.polygon_list) { 4698bf215546Sopenharmony_ci bool has_draws = batch->scoreboard.first_tiler != NULL; 4699bf215546Sopenharmony_ci unsigned size = 4700bf215546Sopenharmony_ci panfrost_tiler_get_polygon_list_size(dev, 4701bf215546Sopenharmony_ci batch->key.width, 4702bf215546Sopenharmony_ci batch->key.height, 4703bf215546Sopenharmony_ci has_draws); 4704bf215546Sopenharmony_ci size = util_next_power_of_two(size); 4705bf215546Sopenharmony_ci 4706bf215546Sopenharmony_ci /* Create the BO as invisible if we can. In the non-hierarchical tiler case, 4707bf215546Sopenharmony_ci * we need to write the polygon list manually because there's not WRITE_VALUE 4708bf215546Sopenharmony_ci * job in the chain (maybe we should add one...). */ 4709bf215546Sopenharmony_ci bool init_polygon_list = !has_draws && dev->model->quirks.no_hierarchical_tiling; 4710bf215546Sopenharmony_ci batch->tiler_ctx.midgard.polygon_list = 4711bf215546Sopenharmony_ci panfrost_batch_create_bo(batch, size, 4712bf215546Sopenharmony_ci init_polygon_list ? 0 : PAN_BO_INVISIBLE, 4713bf215546Sopenharmony_ci PIPE_SHADER_VERTEX, 4714bf215546Sopenharmony_ci "Polygon list"); 4715bf215546Sopenharmony_ci panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list, 4716bf215546Sopenharmony_ci PIPE_SHADER_FRAGMENT); 4717bf215546Sopenharmony_ci 4718bf215546Sopenharmony_ci if (init_polygon_list) { 4719bf215546Sopenharmony_ci assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu); 4720bf215546Sopenharmony_ci uint32_t *polygon_list_body = 4721bf215546Sopenharmony_ci batch->tiler_ctx.midgard.polygon_list->ptr.cpu + 4722bf215546Sopenharmony_ci MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; 4723bf215546Sopenharmony_ci 4724bf215546Sopenharmony_ci /* Magic for Mali T720 */ 4725bf215546Sopenharmony_ci polygon_list_body[0] = 0xa0000000; 4726bf215546Sopenharmony_ci } 4727bf215546Sopenharmony_ci 4728bf215546Sopenharmony_ci batch->tiler_ctx.midgard.disable = !has_draws; 4729bf215546Sopenharmony_ci } 4730bf215546Sopenharmony_ci 4731bf215546Sopenharmony_ci return batch->tiler_ctx.midgard.polygon_list->ptr.gpu; 4732bf215546Sopenharmony_ci} 4733bf215546Sopenharmony_ci#endif 4734bf215546Sopenharmony_ci 4735bf215546Sopenharmony_cistatic void 4736bf215546Sopenharmony_ciinit_polygon_list(struct panfrost_batch *batch) 4737bf215546Sopenharmony_ci{ 4738bf215546Sopenharmony_ci#if PAN_ARCH <= 5 4739bf215546Sopenharmony_ci mali_ptr polygon_list = batch_get_polygon_list(batch); 4740bf215546Sopenharmony_ci panfrost_scoreboard_initialize_tiler(&batch->pool.base, 4741bf215546Sopenharmony_ci &batch->scoreboard, 4742bf215546Sopenharmony_ci polygon_list); 4743bf215546Sopenharmony_ci#endif 4744bf215546Sopenharmony_ci} 4745bf215546Sopenharmony_ci 4746bf215546Sopenharmony_civoid 4747bf215546Sopenharmony_ciGENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) 4748bf215546Sopenharmony_ci{ 4749bf215546Sopenharmony_ci struct panfrost_device *dev = &screen->dev; 4750bf215546Sopenharmony_ci 4751bf215546Sopenharmony_ci screen->vtbl.prepare_shader = prepare_shader; 4752bf215546Sopenharmony_ci screen->vtbl.emit_tls = emit_tls; 4753bf215546Sopenharmony_ci screen->vtbl.emit_fbd = emit_fbd; 4754bf215546Sopenharmony_ci screen->vtbl.emit_fragment_job = emit_fragment_job; 4755bf215546Sopenharmony_ci screen->vtbl.screen_destroy = screen_destroy; 4756bf215546Sopenharmony_ci screen->vtbl.preload = preload; 4757bf215546Sopenharmony_ci screen->vtbl.context_init = context_init; 4758bf215546Sopenharmony_ci screen->vtbl.init_batch = init_batch; 4759bf215546Sopenharmony_ci screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked); 4760bf215546Sopenharmony_ci screen->vtbl.init_polygon_list = init_polygon_list; 4761bf215546Sopenharmony_ci screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options); 4762bf215546Sopenharmony_ci screen->vtbl.compile_shader = GENX(pan_shader_compile); 4763bf215546Sopenharmony_ci 4764bf215546Sopenharmony_ci GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base, 4765bf215546Sopenharmony_ci &screen->blitter.desc_pool.base); 4766bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS 4767bf215546Sopenharmony_ci GENX(pan_indirect_dispatch_init)(dev); 4768bf215546Sopenharmony_ci GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base); 4769bf215546Sopenharmony_ci#endif 4770bf215546Sopenharmony_ci} 4771