1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "util/u_math.h" 26bf215546Sopenharmony_ci#include "pan_encoder.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* This file handles attribute descriptors. The 29bf215546Sopenharmony_ci * bulk of the complexity is from instancing. See mali_job for 30bf215546Sopenharmony_ci * notes on how this works. But basically, for small vertex 31bf215546Sopenharmony_ci * counts, we have a lookup table, and for large vertex counts, 32bf215546Sopenharmony_ci * we look at the high bits as a heuristic. This has to match 33bf215546Sopenharmony_ci * exactly how the hardware calculates this (which is why the 34bf215546Sopenharmony_ci * algorithm is so weird) or else instancing will break. */ 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci/* Given an odd number (of the form 2k + 1), compute k */ 37bf215546Sopenharmony_ci#define ODD(odd) ((odd - 1) >> 1) 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistatic unsigned 40bf215546Sopenharmony_cipanfrost_small_padded_vertex_count(unsigned idx) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci if (idx < 10) 43bf215546Sopenharmony_ci return idx; 44bf215546Sopenharmony_ci else 45bf215546Sopenharmony_ci return (idx + 1) & ~1; 46bf215546Sopenharmony_ci} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic unsigned 49bf215546Sopenharmony_cipanfrost_large_padded_vertex_count(uint32_t vertex_count) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci /* First, we have to find the highest set one */ 52bf215546Sopenharmony_ci unsigned highest = 32 - __builtin_clz(vertex_count); 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci /* Using that, we mask out the highest 4-bits */ 55bf215546Sopenharmony_ci unsigned n = highest - 4; 56bf215546Sopenharmony_ci unsigned nibble = (vertex_count >> n) & 0xF; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci /* Great, we have the nibble. Now we can just try possibilities. Note 59bf215546Sopenharmony_ci * that we don't care about the bottom most bit in most cases, and we 60bf215546Sopenharmony_ci * know the top bit must be 1 */ 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci unsigned middle_two = (nibble >> 1) & 0x3; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci switch (middle_two) { 65bf215546Sopenharmony_ci case 0b00: 66bf215546Sopenharmony_ci if (!(nibble & 1)) 67bf215546Sopenharmony_ci return (1 << n) * 9; 68bf215546Sopenharmony_ci else 69bf215546Sopenharmony_ci return (1 << (n + 1)) * 5; 70bf215546Sopenharmony_ci case 0b01: 71bf215546Sopenharmony_ci return (1 << (n + 2)) * 3; 72bf215546Sopenharmony_ci case 0b10: 73bf215546Sopenharmony_ci return (1 << (n + 1)) * 7; 74bf215546Sopenharmony_ci case 0b11: 75bf215546Sopenharmony_ci return (1 << (n + 4)); 76bf215546Sopenharmony_ci default: 77bf215546Sopenharmony_ci return 0; /* unreachable */ 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ciunsigned 82bf215546Sopenharmony_cipanfrost_padded_vertex_count(unsigned vertex_count) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci if (vertex_count < 20) 85bf215546Sopenharmony_ci return panfrost_small_padded_vertex_count(vertex_count); 86bf215546Sopenharmony_ci else 87bf215546Sopenharmony_ci return panfrost_large_padded_vertex_count(vertex_count); 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci/* The much, much more irritating case -- instancing is enabled. See 91bf215546Sopenharmony_ci * panfrost_job.h for notes on how this works */ 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ciunsigned 94bf215546Sopenharmony_cipanfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci /* We have a NPOT divisor. Here's the fun one (multipling by 97bf215546Sopenharmony_ci * the inverse and shifting) */ 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* floor(log2(d)) */ 100bf215546Sopenharmony_ci unsigned shift = util_logbase2(hw_divisor); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci /* m = ceil(2^(32 + shift) / d) */ 103bf215546Sopenharmony_ci uint64_t shift_hi = 32 + shift; 104bf215546Sopenharmony_ci uint64_t t = 1ll << shift_hi; 105bf215546Sopenharmony_ci double t_f = t; 106bf215546Sopenharmony_ci double hw_divisor_d = hw_divisor; 107bf215546Sopenharmony_ci double m_f = ceil(t_f / hw_divisor_d); 108bf215546Sopenharmony_ci unsigned m = m_f; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci /* Default case */ 111bf215546Sopenharmony_ci uint32_t magic_divisor = m; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* e = 2^(shift + 32) % d */ 114bf215546Sopenharmony_ci uint64_t e = t % hw_divisor; 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob 117bf215546Sopenharmony_ci * seems to use a different condition */ 118bf215546Sopenharmony_ci if (e <= (1ll << shift)) { 119bf215546Sopenharmony_ci magic_divisor = m - 1; 120bf215546Sopenharmony_ci *extra_flags = 1; 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci /* Top flag implicitly set */ 124bf215546Sopenharmony_ci assert(magic_divisor & (1u << 31)); 125bf215546Sopenharmony_ci magic_divisor &= ~(1u << 31); 126bf215546Sopenharmony_ci *o_shift = shift; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci return magic_divisor; 129bf215546Sopenharmony_ci} 130