1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 */ 24 25#include "util/u_math.h" 26#include "pan_encoder.h" 27 28/* This file handles attribute descriptors. The 29 * bulk of the complexity is from instancing. See mali_job for 30 * notes on how this works. But basically, for small vertex 31 * counts, we have a lookup table, and for large vertex counts, 32 * we look at the high bits as a heuristic. This has to match 33 * exactly how the hardware calculates this (which is why the 34 * algorithm is so weird) or else instancing will break. */ 35 36/* Given an odd number (of the form 2k + 1), compute k */ 37#define ODD(odd) ((odd - 1) >> 1) 38 39static unsigned 40panfrost_small_padded_vertex_count(unsigned idx) 41{ 42 if (idx < 10) 43 return idx; 44 else 45 return (idx + 1) & ~1; 46} 47 48static unsigned 49panfrost_large_padded_vertex_count(uint32_t vertex_count) 50{ 51 /* First, we have to find the highest set one */ 52 unsigned highest = 32 - __builtin_clz(vertex_count); 53 54 /* Using that, we mask out the highest 4-bits */ 55 unsigned n = highest - 4; 56 unsigned nibble = (vertex_count >> n) & 0xF; 57 58 /* Great, we have the nibble. Now we can just try possibilities. Note 59 * that we don't care about the bottom most bit in most cases, and we 60 * know the top bit must be 1 */ 61 62 unsigned middle_two = (nibble >> 1) & 0x3; 63 64 switch (middle_two) { 65 case 0b00: 66 if (!(nibble & 1)) 67 return (1 << n) * 9; 68 else 69 return (1 << (n + 1)) * 5; 70 case 0b01: 71 return (1 << (n + 2)) * 3; 72 case 0b10: 73 return (1 << (n + 1)) * 7; 74 case 0b11: 75 return (1 << (n + 4)); 76 default: 77 return 0; /* unreachable */ 78 } 79} 80 81unsigned 82panfrost_padded_vertex_count(unsigned vertex_count) 83{ 84 if (vertex_count < 20) 85 return panfrost_small_padded_vertex_count(vertex_count); 86 else 87 return panfrost_large_padded_vertex_count(vertex_count); 88} 89 90/* The much, much more irritating case -- instancing is enabled. See 91 * panfrost_job.h for notes on how this works */ 92 93unsigned 94panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags) 95{ 96 /* We have a NPOT divisor. Here's the fun one (multipling by 97 * the inverse and shifting) */ 98 99 /* floor(log2(d)) */ 100 unsigned shift = util_logbase2(hw_divisor); 101 102 /* m = ceil(2^(32 + shift) / d) */ 103 uint64_t shift_hi = 32 + shift; 104 uint64_t t = 1ll << shift_hi; 105 double t_f = t; 106 double hw_divisor_d = hw_divisor; 107 double m_f = ceil(t_f / hw_divisor_d); 108 unsigned m = m_f; 109 110 /* Default case */ 111 uint32_t magic_divisor = m; 112 113 /* e = 2^(shift + 32) % d */ 114 uint64_t e = t % hw_divisor; 115 116 /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob 117 * seems to use a different condition */ 118 if (e <= (1ll << shift)) { 119 magic_divisor = m - 1; 120 *extra_flags = 1; 121 } 122 123 /* Top flag implicitly set */ 124 assert(magic_divisor & (1u << 31)); 125 magic_divisor &= ~(1u << 31); 126 *o_shift = shift; 127 128 return magic_divisor; 129} 130