1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22bf215546Sopenharmony_ci * SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "si_build_pm4.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* For MSAA sample positions. */ 28bf215546Sopenharmony_ci#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ 29bf215546Sopenharmony_ci ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \ 30bf215546Sopenharmony_ci (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \ 31bf215546Sopenharmony_ci (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28)) 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci/* For obtaining location coordinates from registers */ 34bf215546Sopenharmony_ci#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0))) 35bf215546Sopenharmony_ci#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf) 36bf215546Sopenharmony_ci#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2) 37bf215546Sopenharmony_ci#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1) 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci/* The following sample ordering is required by EQAA. 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * Sample 0 is approx. in the top-left quadrant. 42bf215546Sopenharmony_ci * Sample 1 is approx. in the bottom-right quadrant. 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * Sample 2 is approx. in the bottom-left quadrant. 45bf215546Sopenharmony_ci * Sample 3 is approx. in the top-right quadrant. 46bf215546Sopenharmony_ci * (sample I={2,3} adds more detail to the vicinity of sample I-2) 47bf215546Sopenharmony_ci * 48bf215546Sopenharmony_ci * Sample 4 is approx. in the same quadrant as sample 0. (top-left) 49bf215546Sopenharmony_ci * Sample 5 is approx. in the same quadrant as sample 1. (bottom-right) 50bf215546Sopenharmony_ci * Sample 6 is approx. in the same quadrant as sample 2. (bottom-left) 51bf215546Sopenharmony_ci * Sample 7 is approx. in the same quadrant as sample 3. (top-right) 52bf215546Sopenharmony_ci * (sample I={4,5,6,7} adds more detail to the vicinity of sample I-4) 53bf215546Sopenharmony_ci * 54bf215546Sopenharmony_ci * The next 8 samples add more detail to the vicinity of the previous samples. 55bf215546Sopenharmony_ci * (sample I (I >= 8) adds more detail to the vicinity of sample I-8) 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * The ordering is specified such that: 58bf215546Sopenharmony_ci * If we take the first 2 samples, we should get good 2x MSAA. 59bf215546Sopenharmony_ci * If we add 2 more samples, we should get good 4x MSAA with the same sample locations. 60bf215546Sopenharmony_ci * If we add 4 more samples, we should get good 8x MSAA with the same sample locations. 61bf215546Sopenharmony_ci * If we add 8 more samples, we should get perfect 16x MSAA with the same sample locations. 62bf215546Sopenharmony_ci * 63bf215546Sopenharmony_ci * The ordering also allows finding samples in the same vicinity. 64bf215546Sopenharmony_ci * 65bf215546Sopenharmony_ci * Group N of 2 samples in the same vicinity in 16x MSAA: {N,N+8} 66bf215546Sopenharmony_ci * Group N of 2 samples in the same vicinity in 8x MSAA: {N,N+4} 67bf215546Sopenharmony_ci * Group N of 2 samples in the same vicinity in 4x MSAA: {N,N+2} 68bf215546Sopenharmony_ci * 69bf215546Sopenharmony_ci * Groups of 4 samples in the same vicinity in 16x MSAA: 70bf215546Sopenharmony_ci * Top left: {0,4,8,12} 71bf215546Sopenharmony_ci * Bottom right: {1,5,9,13} 72bf215546Sopenharmony_ci * Bottom left: {2,6,10,14} 73bf215546Sopenharmony_ci * Top right: {3,7,11,15} 74bf215546Sopenharmony_ci * 75bf215546Sopenharmony_ci * Groups of 4 samples in the same vicinity in 8x MSAA: 76bf215546Sopenharmony_ci * Left half: {0,2,4,6} 77bf215546Sopenharmony_ci * Right half: {1,3,5,7} 78bf215546Sopenharmony_ci * 79bf215546Sopenharmony_ci * Groups of 8 samples in the same vicinity in 16x MSAA: 80bf215546Sopenharmony_ci * Left half: {0,2,4,6,8,10,12,14} 81bf215546Sopenharmony_ci * Right half: {1,3,5,7,9,11,13,15} 82bf215546Sopenharmony_ci */ 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci/* Important note: We have to use the standard DX positions because shader-based culling 85bf215546Sopenharmony_ci * relies on them. 86bf215546Sopenharmony_ci */ 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci/* 1x MSAA */ 89bf215546Sopenharmony_cistatic const uint32_t sample_locs_1x = 90bf215546Sopenharmony_ci FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0); /* S1, S2, S3 fields are not used by 1x */ 91bf215546Sopenharmony_cistatic const uint64_t centroid_priority_1x = 0x0000000000000000ull; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci/* 2x MSAA (the positions are sorted for EQAA) */ 94bf215546Sopenharmony_cistatic const uint32_t sample_locs_2x = 95bf215546Sopenharmony_ci FILL_SREG(-4, -4, 4, 4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 2x MSAA */ 96bf215546Sopenharmony_cistatic const uint64_t centroid_priority_2x = 0x1010101010101010ull; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci/* 4x MSAA (the positions are sorted for EQAA) */ 99bf215546Sopenharmony_cistatic const uint32_t sample_locs_4x = FILL_SREG(-2, -6, 2, 6, -6, 2, 6, -2); 100bf215546Sopenharmony_cistatic const uint64_t centroid_priority_4x = 0x3210321032103210ull; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci/* 8x MSAA (the positions are sorted for EQAA) */ 103bf215546Sopenharmony_cistatic const uint32_t sample_locs_8x[] = { 104bf215546Sopenharmony_ci FILL_SREG(-3, -5, 5, 1, -1, 3, 7, -7), 105bf215546Sopenharmony_ci FILL_SREG(-7, -1, 3, 7, -5, 5, 1, -3), 106bf215546Sopenharmony_ci /* The following are unused by hardware, but we emit them to IBs 107bf215546Sopenharmony_ci * instead of multiple SET_CONTEXT_REG packets. */ 108bf215546Sopenharmony_ci 0, 109bf215546Sopenharmony_ci 0, 110bf215546Sopenharmony_ci}; 111bf215546Sopenharmony_cistatic const uint64_t centroid_priority_8x = 0x3546012735460127ull; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci/* 16x MSAA (the positions are sorted for EQAA) */ 114bf215546Sopenharmony_cistatic const uint32_t sample_locs_16x[] = { 115bf215546Sopenharmony_ci FILL_SREG(-5, -2, 5, 3, -2, 6, 3, -5), 116bf215546Sopenharmony_ci FILL_SREG(-4, -6, 1, 1, -6, 4, 7, -4), 117bf215546Sopenharmony_ci FILL_SREG(-1, -3, 6, 7, -3, 2, 0, -7), 118bf215546Sopenharmony_ci FILL_SREG(-7, -8, 2, 5, -8, 0, 4, -1), 119bf215546Sopenharmony_ci}; 120bf215546Sopenharmony_cistatic const uint64_t centroid_priority_16x = 0xc97e64b231d0fa85ull; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistatic void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count, 123bf215546Sopenharmony_ci unsigned sample_index, float *out_value) 124bf215546Sopenharmony_ci{ 125bf215546Sopenharmony_ci const uint32_t *sample_locs; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci switch (sample_count) { 128bf215546Sopenharmony_ci case 1: 129bf215546Sopenharmony_ci default: 130bf215546Sopenharmony_ci sample_locs = &sample_locs_1x; 131bf215546Sopenharmony_ci break; 132bf215546Sopenharmony_ci case 2: 133bf215546Sopenharmony_ci sample_locs = &sample_locs_2x; 134bf215546Sopenharmony_ci break; 135bf215546Sopenharmony_ci case 4: 136bf215546Sopenharmony_ci sample_locs = &sample_locs_4x; 137bf215546Sopenharmony_ci break; 138bf215546Sopenharmony_ci case 8: 139bf215546Sopenharmony_ci sample_locs = sample_locs_8x; 140bf215546Sopenharmony_ci break; 141bf215546Sopenharmony_ci case 16: 142bf215546Sopenharmony_ci sample_locs = sample_locs_16x; 143bf215546Sopenharmony_ci break; 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f; 147bf215546Sopenharmony_ci out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f; 148bf215546Sopenharmony_ci} 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_cistatic void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, 151bf215546Sopenharmony_ci uint32_t sample_locs) 152bf215546Sopenharmony_ci{ 153bf215546Sopenharmony_ci radeon_begin(cs); 154bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 155bf215546Sopenharmony_ci radeon_emit(centroid_priority); 156bf215546Sopenharmony_ci radeon_emit(centroid_priority >> 32); 157bf215546Sopenharmony_ci radeon_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs); 158bf215546Sopenharmony_ci radeon_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); 159bf215546Sopenharmony_ci radeon_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); 160bf215546Sopenharmony_ci radeon_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); 161bf215546Sopenharmony_ci radeon_end(); 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_cistatic void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, 165bf215546Sopenharmony_ci const uint32_t *sample_locs, unsigned num_samples) 166bf215546Sopenharmony_ci{ 167bf215546Sopenharmony_ci radeon_begin(cs); 168bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 169bf215546Sopenharmony_ci radeon_emit(centroid_priority); 170bf215546Sopenharmony_ci radeon_emit(centroid_priority >> 32); 171bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 172bf215546Sopenharmony_ci num_samples == 8 ? 14 : 16); 173bf215546Sopenharmony_ci radeon_emit_array(sample_locs, 4); 174bf215546Sopenharmony_ci radeon_emit_array(sample_locs, 4); 175bf215546Sopenharmony_ci radeon_emit_array(sample_locs, 4); 176bf215546Sopenharmony_ci radeon_emit_array(sample_locs, num_samples == 8 ? 2 : 4); 177bf215546Sopenharmony_ci radeon_end(); 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_civoid si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) 181bf215546Sopenharmony_ci{ 182bf215546Sopenharmony_ci switch (nr_samples) { 183bf215546Sopenharmony_ci default: 184bf215546Sopenharmony_ci case 1: 185bf215546Sopenharmony_ci si_emit_max_4_sample_locs(cs, centroid_priority_1x, sample_locs_1x); 186bf215546Sopenharmony_ci break; 187bf215546Sopenharmony_ci case 2: 188bf215546Sopenharmony_ci si_emit_max_4_sample_locs(cs, centroid_priority_2x, sample_locs_2x); 189bf215546Sopenharmony_ci break; 190bf215546Sopenharmony_ci case 4: 191bf215546Sopenharmony_ci si_emit_max_4_sample_locs(cs, centroid_priority_4x, sample_locs_4x); 192bf215546Sopenharmony_ci break; 193bf215546Sopenharmony_ci case 8: 194bf215546Sopenharmony_ci si_emit_max_16_sample_locs(cs, centroid_priority_8x, sample_locs_8x, 8); 195bf215546Sopenharmony_ci break; 196bf215546Sopenharmony_ci case 16: 197bf215546Sopenharmony_ci si_emit_max_16_sample_locs(cs, centroid_priority_16x, sample_locs_16x, 16); 198bf215546Sopenharmony_ci break; 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci} 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_civoid si_init_msaa_functions(struct si_context *sctx) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci int i; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci sctx->b.get_sample_position = si_get_sample_position; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci si_get_sample_position(&sctx->b, 1, 0, sctx->sample_positions.x1[0]); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci for (i = 0; i < 2; i++) 211bf215546Sopenharmony_ci si_get_sample_position(&sctx->b, 2, i, sctx->sample_positions.x2[i]); 212bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 213bf215546Sopenharmony_ci si_get_sample_position(&sctx->b, 4, i, sctx->sample_positions.x4[i]); 214bf215546Sopenharmony_ci for (i = 0; i < 8; i++) 215bf215546Sopenharmony_ci si_get_sample_position(&sctx->b, 8, i, sctx->sample_positions.x8[i]); 216bf215546Sopenharmony_ci for (i = 0; i < 16; i++) 217bf215546Sopenharmony_ci si_get_sample_position(&sctx->b, 16, i, sctx->sample_positions.x16[i]); 218bf215546Sopenharmony_ci} 219