1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/compiler.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "tgsi/tgsi_scan.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "gallivm/lp_bld_const.h"
44 #include "gallivm/lp_bld_arit.h"
45 #include "gallivm/lp_bld_swizzle.h"
46 #include "gallivm/lp_bld_flow.h"
47 #include "gallivm/lp_bld_logic.h"
48 #include "gallivm/lp_bld_struct.h"
49 #include "gallivm/lp_bld_gather.h"
50 #include "lp_bld_interp.h"
51
52
53 /*
54 * The shader JIT function operates on blocks of quads.
55 * Each block has 2x2 quads and each quad has 2x2 pixels.
56 *
57 * We iterate over the quads in order 0, 1, 2, 3:
58 *
59 * #################
60 * # | # | #
61 * #---0---#---1---#
62 * # | # | #
63 * #################
64 * # | # | #
65 * #---2---#---3---#
66 * # | # | #
67 * #################
68 *
69 * If we iterate over multiple quads at once, quads 01 and 23 are processed
70 * together.
71 *
72 * Within each quad, we have four pixels which are represented in SOA
73 * order:
74 *
75 * #########
76 * # 0 | 1 #
77 * #---+---#
78 * # 2 | 3 #
79 * #########
80 *
81 * So the green channel (for example) of the four pixels is stored in
82 * a single vector register: {g0, g1, g2, g3}.
83 * The order stays the same even with multiple quads:
84 * 0 1 4 5
85 * 2 3 6 7
86 * is stored as g0..g7
87 */
88
89
90 /**
91 * Do one perspective divide per quad.
92 *
93 * For perspective interpolation, the final attribute value is given
94 *
95 * a' = a/w = a * oow
96 *
97 * where
98 *
99 * a = a0 + dadx*x + dady*y
100 * w = w0 + dwdx*x + dwdy*y
101 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102 *
103 * Instead of computing the division per pixel, with this macro we compute the
104 * division on the upper left pixel of each quad, and use a linear
105 * approximation in the remaining pixels, given by:
106 *
107 * da'dx = (dadx - dwdx*a)*oow
108 * da'dy = (dady - dwdy*a)*oow
109 *
110 * Ironically, this actually makes things slower -- probably because the
111 * divide hardware unit is rarely used, whereas the multiply unit is typically
112 * already saturated.
113 */
114 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
115
116
117 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119
120
121 static void
attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)122 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123 {
124 if(attrib == 0)
125 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126 else
127 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128 }
129
130 static void
calc_offsets(struct lp_build_context *coeff_bld, unsigned quad_start_index, LLVMValueRef *pixoffx, LLVMValueRef *pixoffy)131 calc_offsets(struct lp_build_context *coeff_bld,
132 unsigned quad_start_index,
133 LLVMValueRef *pixoffx,
134 LLVMValueRef *pixoffy)
135 {
136 unsigned i;
137 unsigned num_pix = coeff_bld->type.length;
138 struct gallivm_state *gallivm = coeff_bld->gallivm;
139 LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140 LLVMValueRef nr, pixxf, pixyf;
141
142 *pixoffx = coeff_bld->undef;
143 *pixoffy = coeff_bld->undef;
144
145 for (i = 0; i < num_pix; i++) {
146 nr = lp_build_const_int32(gallivm, i);
147 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148 (quad_start_index & 1) * 2);
149 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150 (quad_start_index & 2));
151 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153 }
154 }
155
156 static void
calc_centroid_offsets(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, LLVMValueRef mask_store, LLVMValueRef pix_center_offset, LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)157 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158 struct gallivm_state *gallivm,
159 LLVMValueRef loop_iter,
160 LLVMValueRef mask_store,
161 LLVMValueRef pix_center_offset,
162 LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163 {
164 struct lp_build_context *coeff_bld = &bld->coeff_bld;
165 LLVMBuilderRef builder = gallivm->builder;
166 LLVMValueRef s_mask_and = NULL;
167 LLVMValueRef centroid_x_offset = pix_center_offset;
168 LLVMValueRef centroid_y_offset = pix_center_offset;
169 for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170 LLVMValueRef sample_cov;
171 LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172
173 s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174 sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175 if (s == bld->coverage_samples - 1)
176 s_mask_and = sample_cov;
177 else
178 s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179
180 LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181 LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182
183 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185 x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186 y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187 centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188 centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189 }
190 *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191 *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192 }
193
194 /* Note: this assumes the pointer to elem_type is in address space 0 */
195 static LLVMValueRef
load_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type, LLVMValueRef ptr, const char *name)196 load_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type, LLVMValueRef ptr, const char *name) {
197 ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(elem_type, 0), name);
198 return LLVMBuildLoad2(builder, elem_type, ptr, name);
199 }
200
201 static LLVMValueRef
indexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type, LLVMTypeRef elem_type, LLVMValueRef ptr, LLVMValueRef index, const char *name)202 indexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type,
203 LLVMTypeRef elem_type, LLVMValueRef ptr, LLVMValueRef index, const char *name) {
204 ptr = LLVMBuildGEP2(builder, gep_type, ptr, &index, 1, name);
205 return load_casted(builder, elem_type, ptr, name);
206 }
207
208 /* Much easier, and significantly less instructions in the per-stamp
209 * part (less than half) but overall more instructions so a loss if
210 * most quads are active. Might be a win though with larger vectors.
211 * No ability to do per-quad divide (doable but not implemented)
212 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
213 */
214 static void
coeffs_init_simple(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr)215 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
216 LLVMValueRef a0_ptr,
217 LLVMValueRef dadx_ptr,
218 LLVMValueRef dady_ptr)
219 {
220 struct lp_build_context *coeff_bld = &bld->coeff_bld;
221 struct lp_build_context *setup_bld = &bld->setup_bld;
222 struct gallivm_state *gallivm = coeff_bld->gallivm;
223 LLVMBuilderRef builder = gallivm->builder;
224 unsigned attrib;
225
226 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
227 /*
228 * always fetch all 4 values for performance/simplicity
229 * Note: we do that here because it seems to generate better
230 * code. It generates a lot of moves initially but less
231 * moves later. As far as I can tell this looks like a
232 * llvm issue, instead of simply reloading the values from
233 * the passed in pointers it if it runs out of registers
234 * it spills/reloads them. Maybe some optimization passes
235 * would help.
236 * Might want to investigate this again later.
237 */
238 const enum lp_interp interp = bld->interp[attrib];
239 LLVMValueRef index = lp_build_const_int32(gallivm,
240 attrib * TGSI_NUM_CHANNELS);
241 LLVMValueRef dadxaos = setup_bld->zero;
242 LLVMValueRef dadyaos = setup_bld->zero;
243 LLVMValueRef a0aos = setup_bld->zero;
244
245 /* See: lp_state_fs.c / generate_fragment() / fs_elem_type */
246 LLVMTypeRef fs_elem_type = LLVMFloatTypeInContext(gallivm->context);
247
248 switch (interp) {
249 case LP_INTERP_PERSPECTIVE:
250 FALLTHROUGH;
251
252 case LP_INTERP_LINEAR:
253 dadxaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dadx_ptr, index, "");
254 dadyaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dady_ptr, index, "");
255 attrib_name(dadxaos, attrib, 0, ".dadxaos");
256 attrib_name(dadyaos, attrib, 0, ".dadyaos");
257 FALLTHROUGH;
258
259 case LP_INTERP_CONSTANT:
260 case LP_INTERP_FACING:
261 a0aos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, a0_ptr, index, "");
262 attrib_name(a0aos, attrib, 0, ".a0aos");
263 break;
264
265 case LP_INTERP_POSITION:
266 /* Nothing to do as the position coeffs are already setup in slot 0 */
267 continue;
268
269 default:
270 assert(0);
271 break;
272 }
273 bld->a0aos[attrib] = a0aos;
274 bld->dadxaos[attrib] = dadxaos;
275 bld->dadyaos[attrib] = dadyaos;
276 }
277 }
278
279 /**
280 * Interpolate the shader input attribute values.
281 * This is called for each (group of) quad(s).
282 */
283 static void
attribs_update_simple(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, LLVMValueRef mask_store, LLVMValueRef sample_id, int start, int end)284 attribs_update_simple(struct lp_build_interp_soa_context *bld,
285 struct gallivm_state *gallivm,
286 LLVMValueRef loop_iter,
287 LLVMValueRef mask_store,
288 LLVMValueRef sample_id,
289 int start,
290 int end)
291 {
292 LLVMBuilderRef builder = gallivm->builder;
293 struct lp_build_context *coeff_bld = &bld->coeff_bld;
294 struct lp_build_context *setup_bld = &bld->setup_bld;
295 LLVMValueRef oow = NULL;
296 unsigned attrib;
297 LLVMValueRef pixoffx;
298 LLVMValueRef pixoffy;
299 LLVMValueRef ptr;
300 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
301
302 /* could do this with code-generated passed in pixel offsets too */
303
304 assert(loop_iter);
305 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, "");
306 pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
307 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, "");
308 pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
309
310 pixoffx = LLVMBuildFAdd(builder, pixoffx,
311 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
312 pixoffy = LLVMBuildFAdd(builder, pixoffy,
313 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
314
315 for (attrib = start; attrib < end; attrib++) {
316 const unsigned mask = bld->mask[attrib];
317 const enum lp_interp interp = bld->interp[attrib];
318 const enum tgsi_interpolate_loc loc = bld->interp_loc[attrib];
319 unsigned chan;
320
321 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
322 if (mask & (1 << chan)) {
323 LLVMValueRef index;
324 LLVMValueRef dadx = coeff_bld->zero;
325 LLVMValueRef dady = coeff_bld->zero;
326 LLVMValueRef a = coeff_bld->zero;
327 LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
328
329 index = lp_build_const_int32(gallivm, chan);
330 switch (interp) {
331 case LP_INTERP_PERSPECTIVE:
332 FALLTHROUGH;
333
334 case LP_INTERP_LINEAR:
335 if (attrib == 0 && chan == 0) {
336 dadx = coeff_bld->one;
337 if (sample_id) {
338 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
339 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
340 a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
341 } else {
342 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
343 }
344 }
345 else if (attrib == 0 && chan == 1) {
346 dady = coeff_bld->one;
347 if (sample_id) {
348 LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
349 y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
350 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
351 a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
352 } else {
353 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
354 }
355 }
356 else {
357 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
358 coeff_bld->type, bld->dadxaos[attrib],
359 index);
360 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
361 coeff_bld->type, bld->dadyaos[attrib],
362 index);
363 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
364 coeff_bld->type, bld->a0aos[attrib],
365 index);
366
367 if (bld->coverage_samples > 1) {
368 LLVMValueRef xoffset = pix_center_offset;
369 LLVMValueRef yoffset = pix_center_offset;
370 if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
371 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
372 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
373
374 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
375 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
376 xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
377 yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
378 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
379 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
380 pix_center_offset, &xoffset, &yoffset);
381 }
382 chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
383 chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
384 }
385 }
386
387 /*
388 * a = a0 + (x * dadx + y * dady)
389 */
390 a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
391 a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
392
393 if (interp == LP_INTERP_PERSPECTIVE) {
394 if (oow == NULL) {
395 LLVMValueRef w = bld->attribs[0][3];
396 assert(attrib != 0);
397 assert(bld->mask[0] & TGSI_WRITEMASK_W);
398 oow = lp_build_rcp(coeff_bld, w);
399 }
400 a = lp_build_mul(coeff_bld, a, oow);
401 }
402 break;
403
404 case LP_INTERP_CONSTANT:
405 case LP_INTERP_FACING:
406 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
407 coeff_bld->type, bld->a0aos[attrib],
408 index);
409 break;
410
411 case LP_INTERP_POSITION:
412 assert(attrib > 0);
413 a = bld->attribs[0][chan];
414 break;
415
416 default:
417 assert(0);
418 break;
419 }
420
421 if ((attrib == 0) && (chan == 2)) {
422 /* add polygon-offset value, stored in the X component of a0 */
423 LLVMValueRef offset =
424 lp_build_extract_broadcast(gallivm, setup_bld->type,
425 coeff_bld->type, bld->a0aos[0],
426 lp_build_const_int32(gallivm, 0));
427 a = LLVMBuildFAdd(builder, a, offset, "");
428 }
429
430 bld->attribs[attrib][chan] = a;
431 }
432 }
433 }
434 }
435
436 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, unsigned attrib, unsigned chan, LLVMValueRef indir_index, LLVMValueRef pixoffx, LLVMValueRef pixoffy)437 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
438 struct gallivm_state *gallivm,
439 unsigned attrib, unsigned chan,
440 LLVMValueRef indir_index,
441 LLVMValueRef pixoffx,
442 LLVMValueRef pixoffy)
443 {
444 LLVMBuilderRef builder = gallivm->builder;
445 struct lp_build_context *coeff_bld = &bld->coeff_bld;
446 const enum lp_interp interp = bld->interp[attrib];
447 LLVMValueRef dadx = coeff_bld->zero;
448 LLVMValueRef dady = coeff_bld->zero;
449 LLVMValueRef a = coeff_bld->zero;
450
451 LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
452
453 indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
454 LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
455 index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
456
457 /* size up to byte indices */
458 index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
459
460 struct lp_type dst_type = coeff_bld->type;
461 dst_type.length = 1;
462 switch (interp) {
463 case LP_INTERP_PERSPECTIVE:
464 FALLTHROUGH;
465 case LP_INTERP_LINEAR:
466
467 dadx = lp_build_gather(gallivm, coeff_bld->type.length,
468 coeff_bld->type.width, dst_type,
469 true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
470
471 dady = lp_build_gather(gallivm, coeff_bld->type.length,
472 coeff_bld->type.width, dst_type,
473 true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
474
475 a = lp_build_gather(gallivm, coeff_bld->type.length,
476 coeff_bld->type.width, dst_type,
477 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
478
479 /*
480 * a = a0 + (x * dadx + y * dady)
481 */
482 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
483 a = lp_build_fmuladd(builder, dady, pixoffy, a);
484
485 if (interp == LP_INTERP_PERSPECTIVE) {
486 LLVMValueRef w = bld->attribs[0][3];
487 assert(attrib != 0);
488 assert(bld->mask[0] & TGSI_WRITEMASK_W);
489 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
490 a = lp_build_mul(coeff_bld, a, oow);
491 }
492
493 break;
494 case LP_INTERP_CONSTANT:
495 case LP_INTERP_FACING:
496 a = lp_build_gather(gallivm, coeff_bld->type.length,
497 coeff_bld->type.width, dst_type,
498 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
499 break;
500 default:
501 assert(0);
502 break;
503 }
504 return a;
505 }
506
507 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, LLVMValueRef mask_store, unsigned attrib, unsigned chan, enum tgsi_interpolate_loc loc, LLVMValueRef indir_index, LLVMValueRef offsets[2])508 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
509 struct gallivm_state *gallivm,
510 LLVMValueRef loop_iter,
511 LLVMValueRef mask_store,
512 unsigned attrib, unsigned chan,
513 enum tgsi_interpolate_loc loc,
514 LLVMValueRef indir_index,
515 LLVMValueRef offsets[2])
516 {
517 LLVMBuilderRef builder = gallivm->builder;
518 struct lp_build_context *coeff_bld = &bld->coeff_bld;
519 struct lp_build_context *setup_bld = &bld->setup_bld;
520 LLVMValueRef pixoffx;
521 LLVMValueRef pixoffy;
522 LLVMValueRef ptr;
523
524 /* could do this with code-generated passed in pixel offsets too */
525
526 assert(loop_iter);
527 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, "");
528 pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
529 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, "");
530 pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
531
532 pixoffx = LLVMBuildFAdd(builder, pixoffx,
533 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
534 pixoffy = LLVMBuildFAdd(builder, pixoffy,
535 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
536
537 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
538
539 if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
540 if (bld->coverage_samples > 1) {
541 pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
542 pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
543 }
544
545 if (offsets[0])
546 pixoffx = LLVMBuildFAdd(builder, pixoffx,
547 offsets[0], "");
548 if (offsets[1])
549 pixoffy = LLVMBuildFAdd(builder, pixoffy,
550 offsets[1], "");
551 } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
552 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
553 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
554
555 LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
556 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
557 LLVMValueRef xoffset = lp_build_gather(gallivm,
558 bld->coeff_bld.type.length,
559 bld->coeff_bld.type.width,
560 lp_elem_type(bld->coeff_bld.type),
561 false,
562 base_ptr,
563 x_val_idx, true);
564 LLVMValueRef yoffset = lp_build_gather(gallivm,
565 bld->coeff_bld.type.length,
566 bld->coeff_bld.type.width,
567 lp_elem_type(bld->coeff_bld.type),
568 false,
569 base_ptr,
570 y_val_idx, true);
571
572 if (bld->coverage_samples > 1) {
573 pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
574 pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
575 }
576 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
577 LLVMValueRef centroid_x_offset, centroid_y_offset;
578
579 /* for centroid find covered samples for this quad. */
580 /* if all samples are covered use pixel centers */
581 if (bld->coverage_samples > 1) {
582 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
583 pix_center_offset, ¢roid_x_offset,
584 ¢roid_y_offset);
585
586 pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
587 pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
588 }
589 }
590
591 // remap attrib properly.
592 attrib++;
593
594 if (indir_index)
595 return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
596 indir_index, pixoffx, pixoffy);
597
598
599 const enum lp_interp interp = bld->interp[attrib];
600 LLVMValueRef dadx = coeff_bld->zero;
601 LLVMValueRef dady = coeff_bld->zero;
602 LLVMValueRef a = coeff_bld->zero;
603
604 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
605
606 switch (interp) {
607 case LP_INTERP_PERSPECTIVE:
608 FALLTHROUGH;
609 case LP_INTERP_LINEAR:
610 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
611 coeff_bld->type, bld->dadxaos[attrib],
612 index);
613
614 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
615 coeff_bld->type, bld->dadyaos[attrib],
616 index);
617
618 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
619 coeff_bld->type, bld->a0aos[attrib],
620 index);
621
622 /*
623 * a = a0 + (x * dadx + y * dady)
624 */
625 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
626 a = lp_build_fmuladd(builder, dady, pixoffy, a);
627
628 if (interp == LP_INTERP_PERSPECTIVE) {
629 LLVMValueRef w = bld->attribs[0][3];
630 assert(attrib != 0);
631 assert(bld->mask[0] & TGSI_WRITEMASK_W);
632 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
633 a = lp_build_mul(coeff_bld, a, oow);
634 }
635
636 break;
637 case LP_INTERP_CONSTANT:
638 case LP_INTERP_FACING:
639 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
640 coeff_bld->type, bld->a0aos[attrib],
641 index);
642 break;
643 default:
644 assert(0);
645 break;
646 }
647 return a;
648 }
649
650 /**
651 * Generate the position vectors.
652 *
653 * Parameter x0, y0 are the integer values with upper left coordinates.
654 */
655 static void
pos_init(struct lp_build_interp_soa_context *bld, LLVMValueRef x0, LLVMValueRef y0)656 pos_init(struct lp_build_interp_soa_context *bld,
657 LLVMValueRef x0,
658 LLVMValueRef y0)
659 {
660 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
661 struct lp_build_context *coeff_bld = &bld->coeff_bld;
662
663 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
664 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
665 }
666
667
668 /**
669 * Initialize fragment shader input attribute info.
670 */
671 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, unsigned num_inputs, const struct lp_shader_input *inputs, boolean pixel_center_integer, unsigned coverage_samples, LLVMValueRef sample_pos_array, LLVMValueRef num_loop, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, LLVMValueRef y0)672 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
673 struct gallivm_state *gallivm,
674 unsigned num_inputs,
675 const struct lp_shader_input *inputs,
676 boolean pixel_center_integer,
677 unsigned coverage_samples,
678 LLVMValueRef sample_pos_array,
679 LLVMValueRef num_loop,
680 LLVMBuilderRef builder,
681 struct lp_type type,
682 LLVMValueRef a0_ptr,
683 LLVMValueRef dadx_ptr,
684 LLVMValueRef dady_ptr,
685 LLVMValueRef x0,
686 LLVMValueRef y0)
687 {
688 struct lp_type coeff_type;
689 struct lp_type setup_type;
690 unsigned attrib;
691 unsigned chan;
692
693 memset(bld, 0, sizeof *bld);
694
695 memset(&coeff_type, 0, sizeof coeff_type);
696 coeff_type.floating = TRUE;
697 coeff_type.sign = TRUE;
698 coeff_type.width = 32;
699 coeff_type.length = type.length;
700
701 memset(&setup_type, 0, sizeof setup_type);
702 setup_type.floating = TRUE;
703 setup_type.sign = TRUE;
704 setup_type.width = 32;
705 setup_type.length = TGSI_NUM_CHANNELS;
706
707
708 /* XXX: we don't support interpolating into any other types */
709 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
710
711 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
712 lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
713
714 /* For convenience */
715 bld->pos = bld->attribs[0];
716 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
717
718 /* Position */
719 bld->mask[0] = TGSI_WRITEMASK_XYZW;
720 bld->interp[0] = LP_INTERP_LINEAR;
721 bld->interp_loc[0] = 0;
722
723 /* Inputs */
724 for (attrib = 0; attrib < num_inputs; ++attrib) {
725 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
726 bld->interp[1 + attrib] = inputs[attrib].interp;
727 bld->interp_loc[1 + attrib] = inputs[attrib].location;
728 }
729 bld->num_attribs = 1 + num_inputs;
730
731 /* needed for indirect */
732 bld->a0_ptr = a0_ptr;
733 bld->dadx_ptr = dadx_ptr;
734 bld->dady_ptr = dady_ptr;
735
736 /* Ensure all masked out input channels have a valid value */
737 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
738 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
739 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
740 }
741 }
742
743 if (pixel_center_integer) {
744 bld->pos_offset = 0.0;
745 } else {
746 bld->pos_offset = 0.5;
747 }
748 bld->coverage_samples = coverage_samples;
749 bld->num_loop = num_loop;
750 bld->sample_pos_array = sample_pos_array;
751
752 pos_init(bld, x0, y0);
753
754 /*
755 * Simple method (single step interpolation) may be slower if vector length
756 * is just 4, but the results are different (generally less accurate) with
757 * the other method, so always use more accurate version.
758 */
759 {
760 /* XXX this should use a global static table */
761 unsigned i;
762 unsigned num_loops = 16 / type.length;
763 LLVMValueRef pixoffx, pixoffy, index;
764 LLVMValueRef ptr;
765
766 bld->store_elem_type = lp_build_vec_type(gallivm, type);
767 bld->xoffset_store = lp_build_array_alloca(gallivm,
768 bld->store_elem_type,
769 lp_build_const_int32(gallivm, num_loops),
770 "");
771 bld->yoffset_store = lp_build_array_alloca(gallivm,
772 bld->store_elem_type,
773 lp_build_const_int32(gallivm, num_loops),
774 "");
775 for (i = 0; i < num_loops; i++) {
776 index = lp_build_const_int32(gallivm, i);
777 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
778 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &index, 1, "");
779 LLVMBuildStore(builder, pixoffx, ptr);
780 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &index, 1, "");
781 LLVMBuildStore(builder, pixoffy, ptr);
782 }
783 }
784 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
785 }
786
787
788 /*
789 * Advance the position and inputs to the given quad within the block.
790 */
791
792 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef quad_start_index, LLVMValueRef mask_store, LLVMValueRef sample_id)793 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
794 struct gallivm_state *gallivm,
795 LLVMValueRef quad_start_index,
796 LLVMValueRef mask_store,
797 LLVMValueRef sample_id)
798 {
799 attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
800 }
801
802 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef quad_start_index, LLVMValueRef sample_id)803 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
804 struct gallivm_state *gallivm,
805 LLVMValueRef quad_start_index,
806 LLVMValueRef sample_id)
807 {
808 attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
809 }
810
811