1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Marek Olšák <maraeo@gmail.com>
24 *
25 */
26
27#include "r600_cs.h"
28#include "evergreend.h"
29
30/* 2xMSAA
31 * There are two locations (4, 4), (-4, -4). */
32const uint32_t eg_sample_locs_2x[4] = {
33	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
34	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
35	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
36	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
37};
38const unsigned eg_max_dist_2x = 4;
39/* 4xMSAA
40 * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
41const uint32_t eg_sample_locs_4x[4] = {
42	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
43	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
44	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
45	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
46};
47const unsigned eg_max_dist_4x = 6;
48
49/* Cayman 8xMSAA */
50static const uint32_t cm_sample_locs_8x[] = {
51	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
52	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
53	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
54	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
55	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
56	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
57	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
58	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
59};
60static const unsigned cm_max_dist_8x = 8;
61/* Cayman 16xMSAA */
62static const uint32_t cm_sample_locs_16x[] = {
63	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
64	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
65	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
66	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
67	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
68	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
69	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
70	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
71	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
72	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
73	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
74	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
75	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
76	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
77	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
78	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
79};
80static const unsigned cm_max_dist_16x = 8;
81
82void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
83				unsigned sample_index, float *out_value)
84{
85	int offset, index;
86	struct {
87		int idx:4;
88	} val;
89	switch (sample_count) {
90	case 1:
91	default:
92		out_value[0] = out_value[1] = 0.5;
93		break;
94	case 2:
95		offset = 4 * (sample_index * 2);
96		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
97		out_value[0] = (float)(val.idx + 8) / 16.0f;
98		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
99		out_value[1] = (float)(val.idx + 8) / 16.0f;
100		break;
101	case 4:
102		offset = 4 * (sample_index * 2);
103		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
104		out_value[0] = (float)(val.idx + 8) / 16.0f;
105		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
106		out_value[1] = (float)(val.idx + 8) / 16.0f;
107		break;
108	case 8:
109		offset = 4 * (sample_index % 4 * 2);
110		index = (sample_index / 4) * 4;
111		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
112		out_value[0] = (float)(val.idx + 8) / 16.0f;
113		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
114		out_value[1] = (float)(val.idx + 8) / 16.0f;
115		break;
116	case 16:
117		offset = 4 * (sample_index % 4 * 2);
118		index = (sample_index / 4) * 4;
119		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
120		out_value[0] = (float)(val.idx + 8) / 16.0f;
121		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
122		out_value[1] = (float)(val.idx + 8) / 16.0f;
123		break;
124	}
125}
126
127void cayman_init_msaa(struct pipe_context *ctx)
128{
129	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
130	int i;
131
132	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
133
134	for (i = 0; i < 2; i++)
135		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
136	for (i = 0; i < 4; i++)
137		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
138	for (i = 0; i < 8; i++)
139		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
140	for (i = 0; i < 16; i++)
141		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
142}
143
144static void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples)
145{
146	switch (nr_samples) {
147	default:
148	case 1:
149		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
150		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
151		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
152		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
153		break;
154	case 2:
155		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
156		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
157		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
158		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
159		break;
160	case 4:
161		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
162		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
163		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
164		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
165		break;
166	case 8:
167		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
168		radeon_emit(cs, cm_sample_locs_8x[0]);
169		radeon_emit(cs, cm_sample_locs_8x[4]);
170		radeon_emit(cs, 0);
171		radeon_emit(cs, 0);
172		radeon_emit(cs, cm_sample_locs_8x[1]);
173		radeon_emit(cs, cm_sample_locs_8x[5]);
174		radeon_emit(cs, 0);
175		radeon_emit(cs, 0);
176		radeon_emit(cs, cm_sample_locs_8x[2]);
177		radeon_emit(cs, cm_sample_locs_8x[6]);
178		radeon_emit(cs, 0);
179		radeon_emit(cs, 0);
180		radeon_emit(cs, cm_sample_locs_8x[3]);
181		radeon_emit(cs, cm_sample_locs_8x[7]);
182		break;
183	case 16:
184		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
185		radeon_emit(cs, cm_sample_locs_16x[0]);
186		radeon_emit(cs, cm_sample_locs_16x[4]);
187		radeon_emit(cs, cm_sample_locs_16x[8]);
188		radeon_emit(cs, cm_sample_locs_16x[12]);
189		radeon_emit(cs, cm_sample_locs_16x[1]);
190		radeon_emit(cs, cm_sample_locs_16x[5]);
191		radeon_emit(cs, cm_sample_locs_16x[9]);
192		radeon_emit(cs, cm_sample_locs_16x[13]);
193		radeon_emit(cs, cm_sample_locs_16x[2]);
194		radeon_emit(cs, cm_sample_locs_16x[6]);
195		radeon_emit(cs, cm_sample_locs_16x[10]);
196		radeon_emit(cs, cm_sample_locs_16x[14]);
197		radeon_emit(cs, cm_sample_locs_16x[3]);
198		radeon_emit(cs, cm_sample_locs_16x[7]);
199		radeon_emit(cs, cm_sample_locs_16x[11]);
200		radeon_emit(cs, cm_sample_locs_16x[15]);
201		break;
202	}
203}
204
205void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
206			    int ps_iter_samples, int overrast_samples)
207{
208	int setup_samples = nr_samples > 1 ? nr_samples :
209			    overrast_samples > 1 ? overrast_samples : 0;
210	/* Required by OpenGL line rasterization.
211	 *
212	 * TODO: We should also enable perpendicular endcaps for AA lines,
213	 *       but that requires implementing line stippling in the pixel
214	 *       shader. SC can only do line stippling with axis-aligned
215	 *       endcaps.
216	 */
217	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
218	unsigned sc_mode_cntl_1 =
219		EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
220		EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
221
222	if (nr_samples > 1) {
223		cayman_emit_msaa_sample_locs(cs, nr_samples);
224	}
225
226	if (setup_samples > 1) {
227		/* indexed by log2(nr_samples) */
228		const unsigned max_dist[] = {
229			0,
230			eg_max_dist_2x,
231			eg_max_dist_4x,
232			cm_max_dist_8x,
233			cm_max_dist_16x
234		};
235		unsigned log_samples = util_logbase2(setup_samples);
236		unsigned log_ps_iter_samples =
237			util_logbase2(util_next_power_of_two(ps_iter_samples));
238
239		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
240		radeon_emit(cs, sc_line_cntl |
241			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
242		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
243			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
244			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
245
246		if (nr_samples > 1) {
247			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
248					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
249					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
250					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
251					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
252					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
253					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
254			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
255					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
256					       sc_mode_cntl_1);
257		} else if (overrast_samples > 1) {
258			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
259					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
260					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
261					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
262			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
263					       sc_mode_cntl_1);
264		}
265	} else {
266		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
267		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
268		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
269
270		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
271				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
272				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
273		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
274				       sc_mode_cntl_1);
275	}
276}
277