1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #ifndef INTEL_PIXEL_HASH_H
24 #define INTEL_PIXEL_HASH_H
25 
26 /**
27  * Compute an \p n x \p m pixel hashing table usable as slice, subslice or
28  * pixel pipe hashing table.  The resulting table is the cyclic repetition of
29  * a fixed pattern with periodicity equal to \p period.
30  *
31  * If \p index is specified to be equal to \p period, a 2-way hashing table
32  * will be generated such that indices 0 and 1 are returned for the following
33  * fractions of entries respectively:
34  *
35  *   p_0 = ceil(period / 2) / period
36  *   p_1 = floor(period / 2) / period
37  *
38  * If \p index is even and less than \p period, a 3-way hashing table will be
39  * generated such that indices 0, 1 and 2 are returned for the following
40  * fractions of entries:
41  *
42  *   p_0 = (ceil(period / 2) - 1) / period
43  *   p_1 = floor(period / 2) / period
44  *   p_2 = 1 / period
45  *
46  * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
47  * and p_1 will be swapped for the result.  Note that in the context of pixel
48  * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
49  * transparently remaps logical indices found on the table to physical pixel
50  * pipe indices from the highest to lowest EU count.
51  */
52 UNUSED static void
intel_compute_pixel_hash_table_3way(unsigned n, unsigned m, unsigned period, unsigned index, bool flip, uint32_t *p)53 intel_compute_pixel_hash_table_3way(unsigned n, unsigned m,
54                                     unsigned period, unsigned index, bool flip,
55                                     uint32_t *p)
56 {
57    for (unsigned i = 0; i < n; i++) {
58       for (unsigned j = 0; j < m; j++) {
59          const unsigned k = (i + j) % period;
60          p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip);
61       }
62    }
63 }
64 
65 /**
66  * Compute an \p n x \p m pixel hashing table usable as slice,
67  * subslice or pixel pipe hashing table.  This generalizes the
68  * previous 3-way hash table function to an arbitrary number of ways
69  * given by the number of bits set in the \p mask argument, but
70  * doesn't allow the specification of different frequencies for
71  * different table indices.
72  */
73 UNUSED static void
intel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask, uint32_t *p)74 intel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask,
75                                     uint32_t *p)
76 {
77    /* Construct a table mapping consecutive indices to the physical
78     * indices given by the bits set on the mask argument.
79     */
80    unsigned phys_ids[sizeof(mask) * CHAR_BIT];
81    unsigned num_ids = 0;
82 
83    u_foreach_bit(i, mask)
84       phys_ids[num_ids++] = i;
85 
86    assert(num_ids > 0);
87 
88    /* Compute a permutation of the above indices that assigns indices
89     * as far as possible to adjacent entries.  This permutation is
90     * designed to be equivalent to the bit reversal of each index in
91     * cases where num_ids is a power of two, but doesn't actually
92     * require it to be a power of two in order to satisfy the required
93     * properties (which is necessary to handle configurations with
94     * arbitrary non-power of two fusing).  By construction, flipping
95     * bit l of its input will lead to a change in its result of the
96     * order of num_ids/2^(l+1) (see variable t below).  The
97     * bijectivity of this permutation can be verified easily by
98     * induction.
99     */
100    const unsigned bits = util_logbase2_ceil(num_ids);
101    unsigned swz[ARRAY_SIZE(phys_ids)];
102 
103    for (unsigned k = 0; k < num_ids; k++) {
104       unsigned t = num_ids;
105       unsigned s = 0;
106 
107       for (unsigned l = 0; l < bits; l++) {
108          if (k & (1u << l)) {
109             s += (t + 1) >> 1;
110             t >>= 1;
111          } else {
112             t = (t + 1) >> 1;
113          }
114       }
115 
116       swz[k] = s;
117    }
118 
119    /* Initialize the table with the cyclic repetition of a
120     * num_ids-periodic pattern.
121     *
122     * Note that the swz permutation only affects the ordering of rows.
123     * This is intentional in order to minimize the size of the
124     * contiguous area that needs to be rendered in parallel in order
125     * to utilize the whole GPU: A rendering rectangle of width W will
126     * need to be at least H blocks high, where H is bounded by
127     * 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz
128     * permutation.
129     */
130    for (unsigned i = 0; i < n; i++) {
131       const unsigned k = i % num_ids;
132       assert(swz[k] < num_ids);
133       for (unsigned j = 0; j < m; j++) {
134          p[j + m * i] = phys_ids[(j + swz[k]) % num_ids];
135       }
136    }
137 }
138 
139 #endif
140