1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci#ifndef INTEL_PIXEL_HASH_H
24bf215546Sopenharmony_ci#define INTEL_PIXEL_HASH_H
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci/**
27bf215546Sopenharmony_ci * Compute an \p n x \p m pixel hashing table usable as slice, subslice or
28bf215546Sopenharmony_ci * pixel pipe hashing table.  The resulting table is the cyclic repetition of
29bf215546Sopenharmony_ci * a fixed pattern with periodicity equal to \p period.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * If \p index is specified to be equal to \p period, a 2-way hashing table
32bf215546Sopenharmony_ci * will be generated such that indices 0 and 1 are returned for the following
33bf215546Sopenharmony_ci * fractions of entries respectively:
34bf215546Sopenharmony_ci *
35bf215546Sopenharmony_ci *   p_0 = ceil(period / 2) / period
36bf215546Sopenharmony_ci *   p_1 = floor(period / 2) / period
37bf215546Sopenharmony_ci *
38bf215546Sopenharmony_ci * If \p index is even and less than \p period, a 3-way hashing table will be
39bf215546Sopenharmony_ci * generated such that indices 0, 1 and 2 are returned for the following
40bf215546Sopenharmony_ci * fractions of entries:
41bf215546Sopenharmony_ci *
42bf215546Sopenharmony_ci *   p_0 = (ceil(period / 2) - 1) / period
43bf215546Sopenharmony_ci *   p_1 = floor(period / 2) / period
44bf215546Sopenharmony_ci *   p_2 = 1 / period
45bf215546Sopenharmony_ci *
46bf215546Sopenharmony_ci * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0
47bf215546Sopenharmony_ci * and p_1 will be swapped for the result.  Note that in the context of pixel
48bf215546Sopenharmony_ci * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware
49bf215546Sopenharmony_ci * transparently remaps logical indices found on the table to physical pixel
50bf215546Sopenharmony_ci * pipe indices from the highest to lowest EU count.
51bf215546Sopenharmony_ci */
52bf215546Sopenharmony_ciUNUSED static void
53bf215546Sopenharmony_ciintel_compute_pixel_hash_table_3way(unsigned n, unsigned m,
54bf215546Sopenharmony_ci                                    unsigned period, unsigned index, bool flip,
55bf215546Sopenharmony_ci                                    uint32_t *p)
56bf215546Sopenharmony_ci{
57bf215546Sopenharmony_ci   for (unsigned i = 0; i < n; i++) {
58bf215546Sopenharmony_ci      for (unsigned j = 0; j < m; j++) {
59bf215546Sopenharmony_ci         const unsigned k = (i + j) % period;
60bf215546Sopenharmony_ci         p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip);
61bf215546Sopenharmony_ci      }
62bf215546Sopenharmony_ci   }
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci/**
66bf215546Sopenharmony_ci * Compute an \p n x \p m pixel hashing table usable as slice,
67bf215546Sopenharmony_ci * subslice or pixel pipe hashing table.  This generalizes the
68bf215546Sopenharmony_ci * previous 3-way hash table function to an arbitrary number of ways
69bf215546Sopenharmony_ci * given by the number of bits set in the \p mask argument, but
70bf215546Sopenharmony_ci * doesn't allow the specification of different frequencies for
71bf215546Sopenharmony_ci * different table indices.
72bf215546Sopenharmony_ci */
73bf215546Sopenharmony_ciUNUSED static void
74bf215546Sopenharmony_ciintel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask,
75bf215546Sopenharmony_ci                                    uint32_t *p)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   /* Construct a table mapping consecutive indices to the physical
78bf215546Sopenharmony_ci    * indices given by the bits set on the mask argument.
79bf215546Sopenharmony_ci    */
80bf215546Sopenharmony_ci   unsigned phys_ids[sizeof(mask) * CHAR_BIT];
81bf215546Sopenharmony_ci   unsigned num_ids = 0;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   u_foreach_bit(i, mask)
84bf215546Sopenharmony_ci      phys_ids[num_ids++] = i;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   assert(num_ids > 0);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   /* Compute a permutation of the above indices that assigns indices
89bf215546Sopenharmony_ci    * as far as possible to adjacent entries.  This permutation is
90bf215546Sopenharmony_ci    * designed to be equivalent to the bit reversal of each index in
91bf215546Sopenharmony_ci    * cases where num_ids is a power of two, but doesn't actually
92bf215546Sopenharmony_ci    * require it to be a power of two in order to satisfy the required
93bf215546Sopenharmony_ci    * properties (which is necessary to handle configurations with
94bf215546Sopenharmony_ci    * arbitrary non-power of two fusing).  By construction, flipping
95bf215546Sopenharmony_ci    * bit l of its input will lead to a change in its result of the
96bf215546Sopenharmony_ci    * order of num_ids/2^(l+1) (see variable t below).  The
97bf215546Sopenharmony_ci    * bijectivity of this permutation can be verified easily by
98bf215546Sopenharmony_ci    * induction.
99bf215546Sopenharmony_ci    */
100bf215546Sopenharmony_ci   const unsigned bits = util_logbase2_ceil(num_ids);
101bf215546Sopenharmony_ci   unsigned swz[ARRAY_SIZE(phys_ids)];
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   for (unsigned k = 0; k < num_ids; k++) {
104bf215546Sopenharmony_ci      unsigned t = num_ids;
105bf215546Sopenharmony_ci      unsigned s = 0;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci      for (unsigned l = 0; l < bits; l++) {
108bf215546Sopenharmony_ci         if (k & (1u << l)) {
109bf215546Sopenharmony_ci            s += (t + 1) >> 1;
110bf215546Sopenharmony_ci            t >>= 1;
111bf215546Sopenharmony_ci         } else {
112bf215546Sopenharmony_ci            t = (t + 1) >> 1;
113bf215546Sopenharmony_ci         }
114bf215546Sopenharmony_ci      }
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci      swz[k] = s;
117bf215546Sopenharmony_ci   }
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci   /* Initialize the table with the cyclic repetition of a
120bf215546Sopenharmony_ci    * num_ids-periodic pattern.
121bf215546Sopenharmony_ci    *
122bf215546Sopenharmony_ci    * Note that the swz permutation only affects the ordering of rows.
123bf215546Sopenharmony_ci    * This is intentional in order to minimize the size of the
124bf215546Sopenharmony_ci    * contiguous area that needs to be rendered in parallel in order
125bf215546Sopenharmony_ci    * to utilize the whole GPU: A rendering rectangle of width W will
126bf215546Sopenharmony_ci    * need to be at least H blocks high, where H is bounded by
127bf215546Sopenharmony_ci    * 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz
128bf215546Sopenharmony_ci    * permutation.
129bf215546Sopenharmony_ci    */
130bf215546Sopenharmony_ci   for (unsigned i = 0; i < n; i++) {
131bf215546Sopenharmony_ci      const unsigned k = i % num_ids;
132bf215546Sopenharmony_ci      assert(swz[k] < num_ids);
133bf215546Sopenharmony_ci      for (unsigned j = 0; j < m; j++) {
134bf215546Sopenharmony_ci         p[j + m * i] = phys_ids[(j + swz[k]) % num_ids];
135bf215546Sopenharmony_ci      }
136bf215546Sopenharmony_ci   }
137bf215546Sopenharmony_ci}
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci#endif
140