1/* 2 * Copyright © 2021 Raspberry Pi Ltd 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "v3d_util.h" 25#include "util/macros.h" 26 27/* Choose a number of workgroups per supergroup that maximizes 28 * lane occupancy. We can pack up to 16 workgroups into a supergroup. 29 */ 30uint32_t 31v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, 32 bool has_subgroups, 33 bool has_tsy_barrier, 34 uint32_t threads, 35 uint32_t num_wgs, 36 uint32_t wg_size) 37{ 38 /* FIXME: subgroups may restrict supergroup packing. For now, we disable it 39 * completely if the shader uses subgroups. 40 */ 41 if (has_subgroups) 42 return 1; 43 44 /* Compute maximum number of batches in a supergroup for this workgroup size. 45 * Each batch is 16 elements, and we can have up to 16 work groups in a 46 * supergroup: 47 * 48 * max_batches_per_sg = (wg_size * max_wgs_per_sg) / elements_per_batch 49 * since max_wgs_per_sg = 16 and elements_per_batch = 16, we get: 50 * max_batches_per_sg = wg_size 51 */ 52 uint32_t max_batches_per_sg = wg_size; 53 54 /* QPU threads will stall at TSY barriers until the entire supergroup 55 * reaches the barrier. Limit the supergroup size to half the QPU threads 56 * available, so we can have at least 2 supergroups executing in parallel 57 * and we don't stall all our QPU threads when a supergroup hits a barrier. 58 */ 59 if (has_tsy_barrier) { 60 uint32_t max_qpu_threads = devinfo->qpu_count * threads; 61 max_batches_per_sg = MIN2(max_batches_per_sg, max_qpu_threads / 2); 62 } 63 uint32_t max_wgs_per_sg = max_batches_per_sg * 16 / wg_size; 64 65 uint32_t best_wgs_per_sg = 1; 66 uint32_t best_unused_lanes = 16; 67 for (uint32_t wgs_per_sg = 1; wgs_per_sg <= max_wgs_per_sg; wgs_per_sg++) { 68 /* Don't try to pack more workgroups per supergroup than the total amount 69 * of workgroups dispatched. 70 */ 71 if (wgs_per_sg > num_wgs) 72 return best_wgs_per_sg; 73 74 /* Compute wasted lines for this configuration and keep track of the 75 * config with less waste. 76 */ 77 uint32_t unused_lanes = (16 - ((wgs_per_sg * wg_size) % 16)) & 0x0f; 78 if (unused_lanes == 0) 79 return wgs_per_sg; 80 81 if (unused_lanes < best_unused_lanes) { 82 best_wgs_per_sg = wgs_per_sg; 83 best_unused_lanes = unused_lanes; 84 } 85 } 86 87 return best_wgs_per_sg; 88} 89 90void 91v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp, 92 bool msaa, bool double_buffer, 93 uint32_t *width, uint32_t *height) 94{ 95 static const uint8_t tile_sizes[] = { 96 64, 64, 97 64, 32, 98 32, 32, 99 32, 16, 100 16, 16, 101 16, 8, 102 8, 8 103 }; 104 105 uint32_t idx = 0; 106 if (color_attachment_count > 2) 107 idx += 2; 108 else if (color_attachment_count > 1) 109 idx += 1; 110 111 /* MSAA and double-buffer are mutually exclusive */ 112 assert(!msaa || !double_buffer); 113 if (msaa) 114 idx += 2; 115 else if (double_buffer) 116 idx += 1; 117 118 idx += max_color_bpp; 119 120 assert(idx < ARRAY_SIZE(tile_sizes) / 2); 121 122 *width = tile_sizes[idx * 2]; 123 *height = tile_sizes[idx * 2 + 1]; 124} 125 126/* Translates a pipe swizzle to the swizzle values used in the 127 * TEXTURE_SHADER_STATE packet. 128 */ 129uint32_t 130v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle) 131{ 132 switch (swizzle) { 133 case PIPE_SWIZZLE_0: 134 return 0; 135 case PIPE_SWIZZLE_1: 136 return 1; 137 case PIPE_SWIZZLE_X: 138 case PIPE_SWIZZLE_Y: 139 case PIPE_SWIZZLE_Z: 140 case PIPE_SWIZZLE_W: 141 return 2 + swizzle; 142 default: 143 unreachable("unknown swizzle"); 144 } 145} 146 147/* Translates a pipe primitive type to a hw value we can use in the various 148 * draw packets. 149 */ 150uint32_t 151v3d_hw_prim_type(enum pipe_prim_type prim_type) 152{ 153 switch (prim_type) { 154 case PIPE_PRIM_POINTS: 155 case PIPE_PRIM_LINES: 156 case PIPE_PRIM_LINE_LOOP: 157 case PIPE_PRIM_LINE_STRIP: 158 case PIPE_PRIM_TRIANGLES: 159 case PIPE_PRIM_TRIANGLE_STRIP: 160 case PIPE_PRIM_TRIANGLE_FAN: 161 return prim_type; 162 163 case PIPE_PRIM_LINES_ADJACENCY: 164 case PIPE_PRIM_LINE_STRIP_ADJACENCY: 165 case PIPE_PRIM_TRIANGLES_ADJACENCY: 166 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: 167 return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY); 168 169 default: 170 unreachable("Unsupported primitive type"); 171 } 172} 173