1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#ifndef __PAN_ENCODER_H 28#define __PAN_ENCODER_H 29 30#include "util/macros.h" 31 32#include <stdbool.h> 33#include "util/format/u_format.h" 34#include "pan_bo.h" 35#include "genxml/gen_macros.h" 36#include "pan_device.h" 37 38/* Tiler structure size computation */ 39 40unsigned 41panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 42 43unsigned 44panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy); 45 46unsigned 47panfrost_choose_hierarchy_mask( 48 unsigned width, unsigned height, 49 unsigned vertex_count, bool hierarchy); 50 51#if defined(PAN_ARCH) && PAN_ARCH <= 5 52static inline unsigned 53panfrost_tiler_get_polygon_list_size(const struct panfrost_device *dev, 54 unsigned fb_width, unsigned fb_height, 55 bool has_draws) 56{ 57 if (!has_draws) 58 return MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4; 59 60 bool hierarchy = !dev->model->quirks.no_hierarchical_tiling; 61 unsigned hierarchy_mask = 62 panfrost_choose_hierarchy_mask(fb_width, fb_height, 1, hierarchy); 63 64 return panfrost_tiler_full_size(fb_width, fb_height, hierarchy_mask, hierarchy) + 65 panfrost_tiler_header_size(fb_width, fb_height, hierarchy_mask, hierarchy); 66} 67#endif 68 69/* Stack sizes */ 70 71unsigned 72panfrost_get_stack_shift(unsigned stack_size); 73 74unsigned 75panfrost_get_total_stack_size( 76 unsigned thread_size, 77 unsigned threads_per_core, 78 unsigned core_id_range); 79 80/* Attributes / instancing */ 81 82unsigned 83panfrost_padded_vertex_count(unsigned vertex_count); 84 85unsigned 86panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags); 87 88#ifdef PAN_ARCH 89/* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */ 90 91#if PAN_ARCH <= 5 92static inline void 93panfrost_vertex_id(unsigned padded_count, 94 struct mali_attribute_buffer_packed *attr, 95 bool instanced) 96{ 97 pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) { 98 if (instanced) { 99 cfg.divisor_r = __builtin_ctz(padded_count); 100 cfg.divisor_p = padded_count >> (cfg.divisor_r + 1); 101 } else { 102 /* Large values so the modulo is a no-op */ 103 cfg.divisor_r = 0x1F; 104 cfg.divisor_p = 0x4; 105 } 106 } 107} 108 109static inline void 110panfrost_instance_id(unsigned padded_count, 111 struct mali_attribute_buffer_packed *attr, 112 bool instanced) 113{ 114 pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) { 115 if (!instanced || padded_count <= 1) { 116 /* Divide by large number to force to 0 */ 117 cfg.divisor_p = ((1u << 31) - 1); 118 cfg.divisor_r = 0x1F; 119 cfg.divisor_e = 0x1; 120 } else if(util_is_power_of_two_or_zero(padded_count)) { 121 /* Can't underflow since padded_count >= 2 */ 122 cfg.divisor_r = __builtin_ctz(padded_count) - 1; 123 } else { 124 cfg.divisor_p = 125 panfrost_compute_magic_divisor(padded_count, 126 &cfg.divisor_r, &cfg.divisor_e); 127 } 128 } 129} 130#endif /* PAN_ARCH <= 5 */ 131 132/* Sampler comparison functions are flipped in OpenGL from the hardware, so we 133 * need to be able to flip accordingly */ 134 135static inline enum mali_func 136panfrost_flip_compare_func(enum mali_func f) 137{ 138 switch (f) { 139 case MALI_FUNC_LESS: return MALI_FUNC_GREATER; 140 case MALI_FUNC_GREATER: return MALI_FUNC_LESS; 141 case MALI_FUNC_LEQUAL: return MALI_FUNC_GEQUAL; 142 case MALI_FUNC_GEQUAL: return MALI_FUNC_LEQUAL; 143 default: return f; 144 } 145 146} 147 148#if PAN_ARCH <= 7 149/* Compute shaders are invoked with a gl_NumWorkGroups X/Y/Z triplet. Vertex 150 * shaders are invoked as (1, vertex_count, instance_count). Compute shaders 151 * also have a gl_WorkGroupSize X/Y/Z triplet. These 6 values are packed 152 * together in a dynamic bitfield, packed by this routine. */ 153 154static inline void 155panfrost_pack_work_groups_compute( 156 struct mali_invocation_packed *out, 157 unsigned num_x, unsigned num_y, unsigned num_z, 158 unsigned size_x, unsigned size_y, unsigned size_z, 159 bool quirk_graphics, bool indirect_dispatch) 160{ 161 /* The values needing packing, in order, and the corresponding shifts. 162 * Indicies into shift are off-by-one to make the logic easier */ 163 164 unsigned values[6] = { size_x, size_y, size_z, num_x, num_y, num_z }; 165 unsigned shifts[7] = { 0 }; 166 uint32_t packed = 0; 167 168 for (unsigned i = 0; i < 6; ++i) { 169 /* Must be positive, otherwise we underflow */ 170 assert(values[i] >= 1); 171 172 /* OR it in, shifting as required */ 173 packed |= ((values[i] - 1) << shifts[i]); 174 175 /* How many bits did we use? */ 176 unsigned bit_count = util_logbase2_ceil(values[i]); 177 178 /* Set the next shift accordingly */ 179 shifts[i + 1] = shifts[i] + bit_count; 180 } 181 182 pan_pack(out, INVOCATION, cfg) { 183 cfg.invocations = packed; 184 cfg.size_y_shift = shifts[1]; 185 cfg.size_z_shift = shifts[2]; 186 cfg.workgroups_x_shift = shifts[3]; 187 188 if (!indirect_dispatch) { 189 /* Leave zero for the dispatch shader */ 190 cfg.workgroups_y_shift = shifts[4]; 191 cfg.workgroups_z_shift = shifts[5]; 192 } 193 194 /* Quirk: for non-instanced graphics, the blob sets 195 * workgroups_z_shift = 32. This doesn't appear to matter to 196 * the hardware, but it's good to be bit-identical. */ 197 198 if (quirk_graphics && (num_z <= 1)) 199 cfg.workgroups_z_shift = 32; 200 201 /* For graphics, set to the minimum efficient value. For 202 * compute, must equal the workgroup X shift for barriers to 203 * function correctly */ 204 205 cfg.thread_group_split = quirk_graphics ? 206 MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift; 207 } 208} 209#endif 210 211#if PAN_ARCH >= 5 212/* Format conversion */ 213static inline enum mali_z_internal_format 214panfrost_get_z_internal_format(enum pipe_format fmt) 215{ 216 switch (fmt) { 217 case PIPE_FORMAT_Z16_UNORM: 218 case PIPE_FORMAT_Z16_UNORM_S8_UINT: 219 return MALI_Z_INTERNAL_FORMAT_D16; 220 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 221 case PIPE_FORMAT_Z24X8_UNORM: 222 return MALI_Z_INTERNAL_FORMAT_D24; 223 case PIPE_FORMAT_Z32_FLOAT: 224 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 225 return MALI_Z_INTERNAL_FORMAT_D32; 226 default: 227 unreachable("Unsupported depth/stencil format."); 228 } 229} 230#endif 231 232#endif /* PAN_ARCH */ 233 234#if PAN_ARCH >= 9 235static inline void 236panfrost_make_resource_table(struct panfrost_ptr base, unsigned index, 237 mali_ptr address, unsigned resource_count) 238{ 239 if (resource_count == 0) 240 return; 241 242 pan_pack(base.cpu + index * pan_size(RESOURCE), RESOURCE, cfg) { 243 cfg.address = address; 244 cfg.size = resource_count * pan_size(BUFFER); 245 } 246} 247#endif 248 249#endif 250