1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci#ifndef INTEL_PIXEL_HASH_H 24bf215546Sopenharmony_ci#define INTEL_PIXEL_HASH_H 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci/** 27bf215546Sopenharmony_ci * Compute an \p n x \p m pixel hashing table usable as slice, subslice or 28bf215546Sopenharmony_ci * pixel pipe hashing table. The resulting table is the cyclic repetition of 29bf215546Sopenharmony_ci * a fixed pattern with periodicity equal to \p period. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * If \p index is specified to be equal to \p period, a 2-way hashing table 32bf215546Sopenharmony_ci * will be generated such that indices 0 and 1 are returned for the following 33bf215546Sopenharmony_ci * fractions of entries respectively: 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * p_0 = ceil(period / 2) / period 36bf215546Sopenharmony_ci * p_1 = floor(period / 2) / period 37bf215546Sopenharmony_ci * 38bf215546Sopenharmony_ci * If \p index is even and less than \p period, a 3-way hashing table will be 39bf215546Sopenharmony_ci * generated such that indices 0, 1 and 2 are returned for the following 40bf215546Sopenharmony_ci * fractions of entries: 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * p_0 = (ceil(period / 2) - 1) / period 43bf215546Sopenharmony_ci * p_1 = floor(period / 2) / period 44bf215546Sopenharmony_ci * p_2 = 1 / period 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * The equations above apply if \p flip is equal to 0, if it is equal to 1 p_0 47bf215546Sopenharmony_ci * and p_1 will be swapped for the result. Note that in the context of pixel 48bf215546Sopenharmony_ci * pipe hashing this can be always 0 on Gfx12 platforms, since the hardware 49bf215546Sopenharmony_ci * transparently remaps logical indices found on the table to physical pixel 50bf215546Sopenharmony_ci * pipe indices from the highest to lowest EU count. 51bf215546Sopenharmony_ci */ 52bf215546Sopenharmony_ciUNUSED static void 53bf215546Sopenharmony_ciintel_compute_pixel_hash_table_3way(unsigned n, unsigned m, 54bf215546Sopenharmony_ci unsigned period, unsigned index, bool flip, 55bf215546Sopenharmony_ci uint32_t *p) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci for (unsigned i = 0; i < n; i++) { 58bf215546Sopenharmony_ci for (unsigned j = 0; j < m; j++) { 59bf215546Sopenharmony_ci const unsigned k = (i + j) % period; 60bf215546Sopenharmony_ci p[j + m * i] = (k == index ? 2 : (k & 1) ^ flip); 61bf215546Sopenharmony_ci } 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci/** 66bf215546Sopenharmony_ci * Compute an \p n x \p m pixel hashing table usable as slice, 67bf215546Sopenharmony_ci * subslice or pixel pipe hashing table. This generalizes the 68bf215546Sopenharmony_ci * previous 3-way hash table function to an arbitrary number of ways 69bf215546Sopenharmony_ci * given by the number of bits set in the \p mask argument, but 70bf215546Sopenharmony_ci * doesn't allow the specification of different frequencies for 71bf215546Sopenharmony_ci * different table indices. 72bf215546Sopenharmony_ci */ 73bf215546Sopenharmony_ciUNUSED static void 74bf215546Sopenharmony_ciintel_compute_pixel_hash_table_nway(unsigned n, unsigned m, uint32_t mask, 75bf215546Sopenharmony_ci uint32_t *p) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci /* Construct a table mapping consecutive indices to the physical 78bf215546Sopenharmony_ci * indices given by the bits set on the mask argument. 79bf215546Sopenharmony_ci */ 80bf215546Sopenharmony_ci unsigned phys_ids[sizeof(mask) * CHAR_BIT]; 81bf215546Sopenharmony_ci unsigned num_ids = 0; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci u_foreach_bit(i, mask) 84bf215546Sopenharmony_ci phys_ids[num_ids++] = i; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci assert(num_ids > 0); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci /* Compute a permutation of the above indices that assigns indices 89bf215546Sopenharmony_ci * as far as possible to adjacent entries. This permutation is 90bf215546Sopenharmony_ci * designed to be equivalent to the bit reversal of each index in 91bf215546Sopenharmony_ci * cases where num_ids is a power of two, but doesn't actually 92bf215546Sopenharmony_ci * require it to be a power of two in order to satisfy the required 93bf215546Sopenharmony_ci * properties (which is necessary to handle configurations with 94bf215546Sopenharmony_ci * arbitrary non-power of two fusing). By construction, flipping 95bf215546Sopenharmony_ci * bit l of its input will lead to a change in its result of the 96bf215546Sopenharmony_ci * order of num_ids/2^(l+1) (see variable t below). The 97bf215546Sopenharmony_ci * bijectivity of this permutation can be verified easily by 98bf215546Sopenharmony_ci * induction. 99bf215546Sopenharmony_ci */ 100bf215546Sopenharmony_ci const unsigned bits = util_logbase2_ceil(num_ids); 101bf215546Sopenharmony_ci unsigned swz[ARRAY_SIZE(phys_ids)]; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci for (unsigned k = 0; k < num_ids; k++) { 104bf215546Sopenharmony_ci unsigned t = num_ids; 105bf215546Sopenharmony_ci unsigned s = 0; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci for (unsigned l = 0; l < bits; l++) { 108bf215546Sopenharmony_ci if (k & (1u << l)) { 109bf215546Sopenharmony_ci s += (t + 1) >> 1; 110bf215546Sopenharmony_ci t >>= 1; 111bf215546Sopenharmony_ci } else { 112bf215546Sopenharmony_ci t = (t + 1) >> 1; 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci swz[k] = s; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci /* Initialize the table with the cyclic repetition of a 120bf215546Sopenharmony_ci * num_ids-periodic pattern. 121bf215546Sopenharmony_ci * 122bf215546Sopenharmony_ci * Note that the swz permutation only affects the ordering of rows. 123bf215546Sopenharmony_ci * This is intentional in order to minimize the size of the 124bf215546Sopenharmony_ci * contiguous area that needs to be rendered in parallel in order 125bf215546Sopenharmony_ci * to utilize the whole GPU: A rendering rectangle of width W will 126bf215546Sopenharmony_ci * need to be at least H blocks high, where H is bounded by 127bf215546Sopenharmony_ci * 2^ceil(log2(num_ids/W)) thanks to the above definition of the swz 128bf215546Sopenharmony_ci * permutation. 129bf215546Sopenharmony_ci */ 130bf215546Sopenharmony_ci for (unsigned i = 0; i < n; i++) { 131bf215546Sopenharmony_ci const unsigned k = i % num_ids; 132bf215546Sopenharmony_ci assert(swz[k] < num_ids); 133bf215546Sopenharmony_ci for (unsigned j = 0; j < m; j++) { 134bf215546Sopenharmony_ci p[j + m * i] = phys_ids[(j + swz[k]) % num_ids]; 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci} 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci#endif 140