1/* 2 * Copyright © 2021 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23#ifndef INTEL_PIXEL_HASH_H 24#define INTEL_PIXEL_HASH_H 25 26/** 27 * Compute an \p n x \p m pixel hashing table usable as slice, subslice or 28 * pixel pipe hashing table. The resulting table is the cyclic repetition of 29 * a fixed pattern with periodicity equal to \p period. 30 * 31 * If \p index is specified to be equal to \p period, a 2-way hashing table 32 * will be generated such that indices 0 and 1 are returned for the following 33 * fractions of entries respectively: 34 * 35 * p_0 = ceil(period / 2) / period 36 * p_1 = floor(period / 2) / period 37 * 38 * If \p index is even and less than \p period, a 3-way hashing table will be 39 * generated such that indices 0, 1 and 2 are returned for the following 40 * fractions of entries: 41 * 42 * p_0 = (ceil(period / 2) - 1) / period 43 * p_1 = floor(period / 2) / period 44 * p_2 = 1 / period 45 * 46 * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0 47 * and p_1 will be swapped for the result. Note that in the context of pixel 48 * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware 49 * transparently remaps logical indices found on the table to physical pixel 50 * pipe indices from the highest to lowest EU count. 51 */ 52UNUSED static void 53intel_compute_pixel_hash_table_3way(unsigned n, unsigned m, 54 unsigned period, unsigned index, bool flip, 55 uint32_t *p) 56{ 57 for (unsigned i = 0; i < n; i++) { 58 for (unsigned j = 0; j < m; j++) { 59 const unsigned k = (i + j) % period; 60 p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip); 61 } 62 } 63} 64 65/** 66 * Compute an \p n x \p m pixel hashing table usable as slice, 67 * subslice or pixel pipe hashing table. This generalizes the 68 * previous 3-way hash table function to an arbitrary number of ways 69 * given by the number of bits set in the \p mask argument, but 70 * doesn't allow the specification of different frequencies for 71 * different table indices. 72 */ 73UNUSED static void 74intel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask, 75 uint32_t *p) 76{ 77 /* Construct a table mapping consecutive indices to the physical 78 * indices given by the bits set on the mask argument. 79 */ 80 unsigned phys_ids[sizeof(mask) * CHAR_BIT]; 81 unsigned num_ids = 0; 82 83 u_foreach_bit(i, mask) 84 phys_ids[num_ids++] = i; 85 86 assert(num_ids > 0); 87 88 /* Compute a permutation of the above indices that assigns indices 89 * as far as possible to adjacent entries. This permutation is 90 * designed to be equivalent to the bit reversal of each index in 91 * cases where num_ids is a power of two, but doesn't actually 92 * require it to be a power of two in order to satisfy the required 93 * properties (which is necessary to handle configurations with 94 * arbitrary non-power of two fusing). By construction, flipping 95 * bit l of its input will lead to a change in its result of the 96 * order of num_ids/2^(l+1) (see variable t below). The 97 * bijectivity of this permutation can be verified easily by 98 * induction. 99 */ 100 const unsigned bits = util_logbase2_ceil(num_ids); 101 unsigned swz[ARRAY_SIZE(phys_ids)]; 102 103 for (unsigned k = 0; k < num_ids; k++) { 104 unsigned t = num_ids; 105 unsigned s = 0; 106 107 for (unsigned l = 0; l < bits; l++) { 108 if (k & (1u << l)) { 109 s += (t + 1) >> 1; 110 t >>= 1; 111 } else { 112 t = (t + 1) >> 1; 113 } 114 } 115 116 swz[k] = s; 117 } 118 119 /* Initialize the table with the cyclic repetition of a 120 * num_ids-periodic pattern. 121 * 122 * Note that the swz permutation only affects the ordering of rows. 123 * This is intentional in order to minimize the size of the 124 * contiguous area that needs to be rendered in parallel in order 125 * to utilize the whole GPU: A rendering rectangle of width W will 126 * need to be at least H blocks high, where H is bounded by 127 * 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz 128 * permutation. 129 */ 130 for (unsigned i = 0; i < n; i++) { 131 const unsigned k = i % num_ids; 132 assert(swz[k] < num_ids); 133 for (unsigned j = 0; j < m; j++) { 134 p[j + m * i] = phys_ids[(j + swz[k]) % num_ids]; 135 } 136 } 137} 138 139#endif 140