1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "util/u_math.h"
26bf215546Sopenharmony_ci#include "pan_encoder.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/* This file handles attribute descriptors. The
29bf215546Sopenharmony_ci * bulk of the complexity is from instancing. See mali_job for
30bf215546Sopenharmony_ci * notes on how this works. But basically, for small vertex
31bf215546Sopenharmony_ci * counts, we have a lookup table, and for large vertex counts,
32bf215546Sopenharmony_ci * we look at the high bits as a heuristic. This has to match
33bf215546Sopenharmony_ci * exactly how the hardware calculates this (which is why the
34bf215546Sopenharmony_ci * algorithm is so weird) or else instancing will break. */
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci/* Given an odd number (of the form 2k + 1), compute k */
37bf215546Sopenharmony_ci#define ODD(odd) ((odd - 1) >> 1)
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistatic unsigned
40bf215546Sopenharmony_cipanfrost_small_padded_vertex_count(unsigned idx)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci        if (idx < 10)
43bf215546Sopenharmony_ci                return idx;
44bf215546Sopenharmony_ci        else
45bf215546Sopenharmony_ci                return (idx + 1) & ~1;
46bf215546Sopenharmony_ci}
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistatic unsigned
49bf215546Sopenharmony_cipanfrost_large_padded_vertex_count(uint32_t vertex_count)
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci        /* First, we have to find the highest set one */
52bf215546Sopenharmony_ci        unsigned highest = 32 - __builtin_clz(vertex_count);
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci        /* Using that, we mask out the highest 4-bits */
55bf215546Sopenharmony_ci        unsigned n = highest - 4;
56bf215546Sopenharmony_ci        unsigned nibble = (vertex_count >> n) & 0xF;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci        /* Great, we have the nibble. Now we can just try possibilities. Note
59bf215546Sopenharmony_ci         * that we don't care about the bottom most bit in most cases, and we
60bf215546Sopenharmony_ci         * know the top bit must be 1 */
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci        unsigned middle_two = (nibble >> 1) & 0x3;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci        switch (middle_two) {
65bf215546Sopenharmony_ci        case 0b00:
66bf215546Sopenharmony_ci                if (!(nibble & 1))
67bf215546Sopenharmony_ci                        return (1 << n) * 9;
68bf215546Sopenharmony_ci                else
69bf215546Sopenharmony_ci                        return (1 << (n + 1)) * 5;
70bf215546Sopenharmony_ci        case 0b01:
71bf215546Sopenharmony_ci                return (1 << (n + 2)) * 3;
72bf215546Sopenharmony_ci        case 0b10:
73bf215546Sopenharmony_ci                return (1 << (n + 1)) * 7;
74bf215546Sopenharmony_ci        case 0b11:
75bf215546Sopenharmony_ci                return (1 << (n + 4));
76bf215546Sopenharmony_ci        default:
77bf215546Sopenharmony_ci                return 0; /* unreachable */
78bf215546Sopenharmony_ci        }
79bf215546Sopenharmony_ci}
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ciunsigned
82bf215546Sopenharmony_cipanfrost_padded_vertex_count(unsigned vertex_count)
83bf215546Sopenharmony_ci{
84bf215546Sopenharmony_ci        if (vertex_count < 20)
85bf215546Sopenharmony_ci                return panfrost_small_padded_vertex_count(vertex_count);
86bf215546Sopenharmony_ci        else
87bf215546Sopenharmony_ci                return panfrost_large_padded_vertex_count(vertex_count);
88bf215546Sopenharmony_ci}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci/* The much, much more irritating case -- instancing is enabled. See
91bf215546Sopenharmony_ci * panfrost_job.h for notes on how this works */
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ciunsigned
94bf215546Sopenharmony_cipanfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci        /* We have a NPOT divisor. Here's the fun one (multipling by
97bf215546Sopenharmony_ci         * the inverse and shifting) */
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci        /* floor(log2(d)) */
100bf215546Sopenharmony_ci        unsigned shift = util_logbase2(hw_divisor);
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci        /* m = ceil(2^(32 + shift) / d) */
103bf215546Sopenharmony_ci        uint64_t shift_hi = 32 + shift;
104bf215546Sopenharmony_ci        uint64_t t = 1ll << shift_hi;
105bf215546Sopenharmony_ci        double t_f = t;
106bf215546Sopenharmony_ci        double hw_divisor_d = hw_divisor;
107bf215546Sopenharmony_ci        double m_f = ceil(t_f / hw_divisor_d);
108bf215546Sopenharmony_ci        unsigned m = m_f;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci        /* Default case */
111bf215546Sopenharmony_ci        uint32_t magic_divisor = m;
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci        /* e = 2^(shift + 32) % d */
114bf215546Sopenharmony_ci        uint64_t e = t % hw_divisor;
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci        /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
117bf215546Sopenharmony_ci         * seems to use a different condition */
118bf215546Sopenharmony_ci        if (e <= (1ll << shift)) {
119bf215546Sopenharmony_ci                magic_divisor = m - 1;
120bf215546Sopenharmony_ci                *extra_flags = 1;
121bf215546Sopenharmony_ci        }
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci        /* Top flag implicitly set */
124bf215546Sopenharmony_ci        assert(magic_divisor & (1u << 31));
125bf215546Sopenharmony_ci        magic_divisor &= ~(1u << 31);
126bf215546Sopenharmony_ci        *o_shift = shift;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci        return magic_divisor;
129bf215546Sopenharmony_ci}
130