1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc.
4bf215546Sopenharmony_ci * Copyright 2007-2008 VMware, Inc.
5bf215546Sopenharmony_ci * All Rights Reserved.
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
9bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
10bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
11bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
12bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
13bf215546Sopenharmony_ci * the following conditions:
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
16bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
17bf215546Sopenharmony_ci * of the Software.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci **************************************************************************/
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/**
30bf215546Sopenharmony_ci * @file
31bf215546Sopenharmony_ci * Position and shader input interpolation.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com>
34bf215546Sopenharmony_ci */
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h"
37bf215546Sopenharmony_ci#include "util/compiler.h"
38bf215546Sopenharmony_ci#include "util/u_debug.h"
39bf215546Sopenharmony_ci#include "util/u_memory.h"
40bf215546Sopenharmony_ci#include "util/u_math.h"
41bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h"
42bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h"
43bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h"
44bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h"
45bf215546Sopenharmony_ci#include "gallivm/lp_bld_swizzle.h"
46bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h"
47bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h"
48bf215546Sopenharmony_ci#include "gallivm/lp_bld_struct.h"
49bf215546Sopenharmony_ci#include "gallivm/lp_bld_gather.h"
50bf215546Sopenharmony_ci#include "lp_bld_interp.h"
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci/*
54bf215546Sopenharmony_ci * The shader JIT function operates on blocks of quads.
55bf215546Sopenharmony_ci * Each block has 2x2 quads and each quad has 2x2 pixels.
56bf215546Sopenharmony_ci *
57bf215546Sopenharmony_ci * We iterate over the quads in order 0, 1, 2, 3:
58bf215546Sopenharmony_ci *
59bf215546Sopenharmony_ci * #################
60bf215546Sopenharmony_ci * #   |   #   |   #
61bf215546Sopenharmony_ci * #---0---#---1---#
62bf215546Sopenharmony_ci * #   |   #   |   #
63bf215546Sopenharmony_ci * #################
64bf215546Sopenharmony_ci * #   |   #   |   #
65bf215546Sopenharmony_ci * #---2---#---3---#
66bf215546Sopenharmony_ci * #   |   #   |   #
67bf215546Sopenharmony_ci * #################
68bf215546Sopenharmony_ci *
69bf215546Sopenharmony_ci * If we iterate over multiple quads at once, quads 01 and 23 are processed
70bf215546Sopenharmony_ci * together.
71bf215546Sopenharmony_ci *
72bf215546Sopenharmony_ci * Within each quad, we have four pixels which are represented in SOA
73bf215546Sopenharmony_ci * order:
74bf215546Sopenharmony_ci *
75bf215546Sopenharmony_ci * #########
76bf215546Sopenharmony_ci * # 0 | 1 #
77bf215546Sopenharmony_ci * #---+---#
78bf215546Sopenharmony_ci * # 2 | 3 #
79bf215546Sopenharmony_ci * #########
80bf215546Sopenharmony_ci *
81bf215546Sopenharmony_ci * So the green channel (for example) of the four pixels is stored in
82bf215546Sopenharmony_ci * a single vector register: {g0, g1, g2, g3}.
83bf215546Sopenharmony_ci * The order stays the same even with multiple quads:
84bf215546Sopenharmony_ci * 0 1 4 5
85bf215546Sopenharmony_ci * 2 3 6 7
86bf215546Sopenharmony_ci * is stored as g0..g7
87bf215546Sopenharmony_ci */
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci/**
91bf215546Sopenharmony_ci * Do one perspective divide per quad.
92bf215546Sopenharmony_ci *
93bf215546Sopenharmony_ci * For perspective interpolation, the final attribute value is given
94bf215546Sopenharmony_ci *
95bf215546Sopenharmony_ci *  a' = a/w = a * oow
96bf215546Sopenharmony_ci *
97bf215546Sopenharmony_ci * where
98bf215546Sopenharmony_ci *
99bf215546Sopenharmony_ci *  a = a0 + dadx*x + dady*y
100bf215546Sopenharmony_ci *  w = w0 + dwdx*x + dwdy*y
101bf215546Sopenharmony_ci *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102bf215546Sopenharmony_ci *
103bf215546Sopenharmony_ci * Instead of computing the division per pixel, with this macro we compute the
104bf215546Sopenharmony_ci * division on the upper left pixel of each quad, and use a linear
105bf215546Sopenharmony_ci * approximation in the remaining pixels, given by:
106bf215546Sopenharmony_ci *
107bf215546Sopenharmony_ci *  da'dx = (dadx - dwdx*a)*oow
108bf215546Sopenharmony_ci *  da'dy = (dady - dwdy*a)*oow
109bf215546Sopenharmony_ci *
110bf215546Sopenharmony_ci * Ironically, this actually makes things slower -- probably because the
111bf215546Sopenharmony_ci * divide hardware unit is rarely used, whereas the multiply unit is typically
112bf215546Sopenharmony_ci * already saturated.
113bf215546Sopenharmony_ci */
114bf215546Sopenharmony_ci#define PERSPECTIVE_DIVIDE_PER_QUAD 0
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_cistatic const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
118bf215546Sopenharmony_cistatic const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistatic void
122bf215546Sopenharmony_ciattrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
123bf215546Sopenharmony_ci{
124bf215546Sopenharmony_ci   if(attrib == 0)
125bf215546Sopenharmony_ci      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
126bf215546Sopenharmony_ci   else
127bf215546Sopenharmony_ci      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
128bf215546Sopenharmony_ci}
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_cistatic void
131bf215546Sopenharmony_cicalc_offsets(struct lp_build_context *coeff_bld,
132bf215546Sopenharmony_ci             unsigned quad_start_index,
133bf215546Sopenharmony_ci             LLVMValueRef *pixoffx,
134bf215546Sopenharmony_ci             LLVMValueRef *pixoffy)
135bf215546Sopenharmony_ci{
136bf215546Sopenharmony_ci   unsigned i;
137bf215546Sopenharmony_ci   unsigned num_pix = coeff_bld->type.length;
138bf215546Sopenharmony_ci   struct gallivm_state *gallivm = coeff_bld->gallivm;
139bf215546Sopenharmony_ci   LLVMBuilderRef builder = coeff_bld->gallivm->builder;
140bf215546Sopenharmony_ci   LLVMValueRef nr, pixxf, pixyf;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   *pixoffx = coeff_bld->undef;
143bf215546Sopenharmony_ci   *pixoffy = coeff_bld->undef;
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   for (i = 0; i < num_pix; i++) {
146bf215546Sopenharmony_ci      nr = lp_build_const_int32(gallivm, i);
147bf215546Sopenharmony_ci      pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
148bf215546Sopenharmony_ci                                   (quad_start_index & 1) * 2);
149bf215546Sopenharmony_ci      pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
150bf215546Sopenharmony_ci                                   (quad_start_index & 2));
151bf215546Sopenharmony_ci      *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
152bf215546Sopenharmony_ci      *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153bf215546Sopenharmony_ci   }
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistatic void
157bf215546Sopenharmony_cicalc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158bf215546Sopenharmony_ci                      struct gallivm_state *gallivm,
159bf215546Sopenharmony_ci                      LLVMValueRef loop_iter,
160bf215546Sopenharmony_ci                      LLVMValueRef mask_store,
161bf215546Sopenharmony_ci                      LLVMValueRef pix_center_offset,
162bf215546Sopenharmony_ci                      LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
163bf215546Sopenharmony_ci{
164bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
165bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
166bf215546Sopenharmony_ci   LLVMValueRef s_mask_and = NULL;
167bf215546Sopenharmony_ci   LLVMValueRef centroid_x_offset = pix_center_offset;
168bf215546Sopenharmony_ci   LLVMValueRef centroid_y_offset = pix_center_offset;
169bf215546Sopenharmony_ci   for (int s = bld->coverage_samples - 1; s >= 0; s--) {
170bf215546Sopenharmony_ci      LLVMValueRef sample_cov;
171bf215546Sopenharmony_ci      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
174bf215546Sopenharmony_ci      sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
175bf215546Sopenharmony_ci      if (s == bld->coverage_samples - 1)
176bf215546Sopenharmony_ci         s_mask_and = sample_cov;
177bf215546Sopenharmony_ci      else
178bf215546Sopenharmony_ci         s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci      LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
181bf215546Sopenharmony_ci      LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci      x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
184bf215546Sopenharmony_ci      y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
185bf215546Sopenharmony_ci      x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
186bf215546Sopenharmony_ci      y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
187bf215546Sopenharmony_ci      centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
188bf215546Sopenharmony_ci      centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
189bf215546Sopenharmony_ci   }
190bf215546Sopenharmony_ci   *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
191bf215546Sopenharmony_ci   *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci/* Note: this assumes the pointer to elem_type is in address space 0 */
195bf215546Sopenharmony_cistatic LLVMValueRef
196bf215546Sopenharmony_ciload_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type, LLVMValueRef ptr, const char *name) {
197bf215546Sopenharmony_ci   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(elem_type, 0), name);
198bf215546Sopenharmony_ci   return LLVMBuildLoad2(builder, elem_type, ptr, name);
199bf215546Sopenharmony_ci}
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_cistatic LLVMValueRef
202bf215546Sopenharmony_ciindexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type,
203bf215546Sopenharmony_ci                  LLVMTypeRef elem_type, LLVMValueRef ptr, LLVMValueRef index, const char *name) {
204bf215546Sopenharmony_ci   ptr = LLVMBuildGEP2(builder, gep_type, ptr, &index, 1, name);
205bf215546Sopenharmony_ci   return load_casted(builder, elem_type, ptr, name);
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci/* Much easier, and significantly less instructions in the per-stamp
209bf215546Sopenharmony_ci * part (less than half) but overall more instructions so a loss if
210bf215546Sopenharmony_ci * most quads are active. Might be a win though with larger vectors.
211bf215546Sopenharmony_ci * No ability to do per-quad divide (doable but not implemented)
212bf215546Sopenharmony_ci * Could be made to work with passed in pixel offsets (i.e. active quad merging).
213bf215546Sopenharmony_ci */
214bf215546Sopenharmony_cistatic void
215bf215546Sopenharmony_cicoeffs_init_simple(struct lp_build_interp_soa_context *bld,
216bf215546Sopenharmony_ci                   LLVMValueRef a0_ptr,
217bf215546Sopenharmony_ci                   LLVMValueRef dadx_ptr,
218bf215546Sopenharmony_ci                   LLVMValueRef dady_ptr)
219bf215546Sopenharmony_ci{
220bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
221bf215546Sopenharmony_ci   struct lp_build_context *setup_bld = &bld->setup_bld;
222bf215546Sopenharmony_ci   struct gallivm_state *gallivm = coeff_bld->gallivm;
223bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
224bf215546Sopenharmony_ci   unsigned attrib;
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
227bf215546Sopenharmony_ci      /*
228bf215546Sopenharmony_ci       * always fetch all 4 values for performance/simplicity
229bf215546Sopenharmony_ci       * Note: we do that here because it seems to generate better
230bf215546Sopenharmony_ci       * code. It generates a lot of moves initially but less
231bf215546Sopenharmony_ci       * moves later. As far as I can tell this looks like a
232bf215546Sopenharmony_ci       * llvm issue, instead of simply reloading the values from
233bf215546Sopenharmony_ci       * the passed in pointers it if it runs out of registers
234bf215546Sopenharmony_ci       * it spills/reloads them. Maybe some optimization passes
235bf215546Sopenharmony_ci       * would help.
236bf215546Sopenharmony_ci       * Might want to investigate this again later.
237bf215546Sopenharmony_ci       */
238bf215546Sopenharmony_ci      const enum lp_interp interp = bld->interp[attrib];
239bf215546Sopenharmony_ci      LLVMValueRef index = lp_build_const_int32(gallivm,
240bf215546Sopenharmony_ci                                attrib * TGSI_NUM_CHANNELS);
241bf215546Sopenharmony_ci      LLVMValueRef dadxaos = setup_bld->zero;
242bf215546Sopenharmony_ci      LLVMValueRef dadyaos = setup_bld->zero;
243bf215546Sopenharmony_ci      LLVMValueRef a0aos = setup_bld->zero;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci      /* See: lp_state_fs.c / generate_fragment() / fs_elem_type */
246bf215546Sopenharmony_ci      LLVMTypeRef fs_elem_type = LLVMFloatTypeInContext(gallivm->context);
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci      switch (interp) {
249bf215546Sopenharmony_ci      case LP_INTERP_PERSPECTIVE:
250bf215546Sopenharmony_ci         FALLTHROUGH;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci      case LP_INTERP_LINEAR:
253bf215546Sopenharmony_ci         dadxaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dadx_ptr, index, "");
254bf215546Sopenharmony_ci         dadyaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dady_ptr, index, "");
255bf215546Sopenharmony_ci         attrib_name(dadxaos, attrib, 0, ".dadxaos");
256bf215546Sopenharmony_ci         attrib_name(dadyaos, attrib, 0, ".dadyaos");
257bf215546Sopenharmony_ci         FALLTHROUGH;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci      case LP_INTERP_CONSTANT:
260bf215546Sopenharmony_ci      case LP_INTERP_FACING:
261bf215546Sopenharmony_ci         a0aos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, a0_ptr, index, "");
262bf215546Sopenharmony_ci         attrib_name(a0aos, attrib, 0, ".a0aos");
263bf215546Sopenharmony_ci         break;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci      case LP_INTERP_POSITION:
266bf215546Sopenharmony_ci         /* Nothing to do as the position coeffs are already setup in slot 0 */
267bf215546Sopenharmony_ci         continue;
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci      default:
270bf215546Sopenharmony_ci         assert(0);
271bf215546Sopenharmony_ci         break;
272bf215546Sopenharmony_ci      }
273bf215546Sopenharmony_ci      bld->a0aos[attrib] = a0aos;
274bf215546Sopenharmony_ci      bld->dadxaos[attrib] = dadxaos;
275bf215546Sopenharmony_ci      bld->dadyaos[attrib] = dadyaos;
276bf215546Sopenharmony_ci   }
277bf215546Sopenharmony_ci}
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci/**
280bf215546Sopenharmony_ci * Interpolate the shader input attribute values.
281bf215546Sopenharmony_ci * This is called for each (group of) quad(s).
282bf215546Sopenharmony_ci */
283bf215546Sopenharmony_cistatic void
284bf215546Sopenharmony_ciattribs_update_simple(struct lp_build_interp_soa_context *bld,
285bf215546Sopenharmony_ci                      struct gallivm_state *gallivm,
286bf215546Sopenharmony_ci                      LLVMValueRef loop_iter,
287bf215546Sopenharmony_ci                      LLVMValueRef mask_store,
288bf215546Sopenharmony_ci                      LLVMValueRef sample_id,
289bf215546Sopenharmony_ci                      int start,
290bf215546Sopenharmony_ci                      int end)
291bf215546Sopenharmony_ci{
292bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
293bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
294bf215546Sopenharmony_ci   struct lp_build_context *setup_bld = &bld->setup_bld;
295bf215546Sopenharmony_ci   LLVMValueRef oow = NULL;
296bf215546Sopenharmony_ci   unsigned attrib;
297bf215546Sopenharmony_ci   LLVMValueRef pixoffx;
298bf215546Sopenharmony_ci   LLVMValueRef pixoffy;
299bf215546Sopenharmony_ci   LLVMValueRef ptr;
300bf215546Sopenharmony_ci   LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   /* could do this with code-generated passed in pixel offsets too */
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   assert(loop_iter);
305bf215546Sopenharmony_ci   ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, "");
306bf215546Sopenharmony_ci   pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
307bf215546Sopenharmony_ci   ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, "");
308bf215546Sopenharmony_ci   pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   pixoffx = LLVMBuildFAdd(builder, pixoffx,
311bf215546Sopenharmony_ci                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
312bf215546Sopenharmony_ci   pixoffy = LLVMBuildFAdd(builder, pixoffy,
313bf215546Sopenharmony_ci                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   for (attrib = start; attrib < end; attrib++) {
316bf215546Sopenharmony_ci      const unsigned mask = bld->mask[attrib];
317bf215546Sopenharmony_ci      const enum lp_interp interp = bld->interp[attrib];
318bf215546Sopenharmony_ci      const enum tgsi_interpolate_loc loc = bld->interp_loc[attrib];
319bf215546Sopenharmony_ci      unsigned chan;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
322bf215546Sopenharmony_ci         if (mask & (1 << chan)) {
323bf215546Sopenharmony_ci            LLVMValueRef index;
324bf215546Sopenharmony_ci            LLVMValueRef dadx = coeff_bld->zero;
325bf215546Sopenharmony_ci            LLVMValueRef dady = coeff_bld->zero;
326bf215546Sopenharmony_ci            LLVMValueRef a = coeff_bld->zero;
327bf215546Sopenharmony_ci            LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci            index = lp_build_const_int32(gallivm, chan);
330bf215546Sopenharmony_ci            switch (interp) {
331bf215546Sopenharmony_ci            case LP_INTERP_PERSPECTIVE:
332bf215546Sopenharmony_ci               FALLTHROUGH;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci            case LP_INTERP_LINEAR:
335bf215546Sopenharmony_ci               if (attrib == 0 && chan == 0) {
336bf215546Sopenharmony_ci                  dadx = coeff_bld->one;
337bf215546Sopenharmony_ci                  if (sample_id) {
338bf215546Sopenharmony_ci                     LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
339bf215546Sopenharmony_ci                     x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
340bf215546Sopenharmony_ci                     a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
341bf215546Sopenharmony_ci                  } else {
342bf215546Sopenharmony_ci                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
343bf215546Sopenharmony_ci                  }
344bf215546Sopenharmony_ci               }
345bf215546Sopenharmony_ci               else if (attrib == 0 && chan == 1) {
346bf215546Sopenharmony_ci                  dady = coeff_bld->one;
347bf215546Sopenharmony_ci                  if (sample_id) {
348bf215546Sopenharmony_ci                     LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
349bf215546Sopenharmony_ci                     y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
350bf215546Sopenharmony_ci                     y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
351bf215546Sopenharmony_ci                     a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
352bf215546Sopenharmony_ci                  } else {
353bf215546Sopenharmony_ci                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
354bf215546Sopenharmony_ci                  }
355bf215546Sopenharmony_ci               }
356bf215546Sopenharmony_ci               else {
357bf215546Sopenharmony_ci                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
358bf215546Sopenharmony_ci                                                    coeff_bld->type, bld->dadxaos[attrib],
359bf215546Sopenharmony_ci                                                    index);
360bf215546Sopenharmony_ci                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
361bf215546Sopenharmony_ci                                                    coeff_bld->type, bld->dadyaos[attrib],
362bf215546Sopenharmony_ci                                                    index);
363bf215546Sopenharmony_ci                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
364bf215546Sopenharmony_ci                                                 coeff_bld->type, bld->a0aos[attrib],
365bf215546Sopenharmony_ci                                                 index);
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci                  if (bld->coverage_samples > 1) {
368bf215546Sopenharmony_ci                     LLVMValueRef xoffset = pix_center_offset;
369bf215546Sopenharmony_ci                     LLVMValueRef yoffset = pix_center_offset;
370bf215546Sopenharmony_ci                     if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
371bf215546Sopenharmony_ci                        LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
372bf215546Sopenharmony_ci                        LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci                        x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
375bf215546Sopenharmony_ci                        y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
376bf215546Sopenharmony_ci                        xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
377bf215546Sopenharmony_ci                        yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
378bf215546Sopenharmony_ci                     } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
379bf215546Sopenharmony_ci                        calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
380bf215546Sopenharmony_ci                                              pix_center_offset, &xoffset, &yoffset);
381bf215546Sopenharmony_ci                     }
382bf215546Sopenharmony_ci                     chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
383bf215546Sopenharmony_ci                     chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
384bf215546Sopenharmony_ci                  }
385bf215546Sopenharmony_ci               }
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci               /*
388bf215546Sopenharmony_ci                * a = a0 + (x * dadx + y * dady)
389bf215546Sopenharmony_ci                */
390bf215546Sopenharmony_ci               a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
391bf215546Sopenharmony_ci               a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci               if (interp == LP_INTERP_PERSPECTIVE) {
394bf215546Sopenharmony_ci                  if (oow == NULL) {
395bf215546Sopenharmony_ci                     LLVMValueRef w = bld->attribs[0][3];
396bf215546Sopenharmony_ci                     assert(attrib != 0);
397bf215546Sopenharmony_ci                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
398bf215546Sopenharmony_ci                     oow = lp_build_rcp(coeff_bld, w);
399bf215546Sopenharmony_ci                  }
400bf215546Sopenharmony_ci                  a = lp_build_mul(coeff_bld, a, oow);
401bf215546Sopenharmony_ci               }
402bf215546Sopenharmony_ci               break;
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci            case LP_INTERP_CONSTANT:
405bf215546Sopenharmony_ci            case LP_INTERP_FACING:
406bf215546Sopenharmony_ci               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
407bf215546Sopenharmony_ci                                              coeff_bld->type, bld->a0aos[attrib],
408bf215546Sopenharmony_ci                                              index);
409bf215546Sopenharmony_ci               break;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci            case LP_INTERP_POSITION:
412bf215546Sopenharmony_ci               assert(attrib > 0);
413bf215546Sopenharmony_ci               a = bld->attribs[0][chan];
414bf215546Sopenharmony_ci               break;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci            default:
417bf215546Sopenharmony_ci               assert(0);
418bf215546Sopenharmony_ci               break;
419bf215546Sopenharmony_ci            }
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci            if ((attrib == 0) && (chan == 2)) {
422bf215546Sopenharmony_ci               /* add polygon-offset value, stored in the X component of a0 */
423bf215546Sopenharmony_ci               LLVMValueRef offset =
424bf215546Sopenharmony_ci                  lp_build_extract_broadcast(gallivm, setup_bld->type,
425bf215546Sopenharmony_ci                                             coeff_bld->type, bld->a0aos[0],
426bf215546Sopenharmony_ci                                             lp_build_const_int32(gallivm, 0));
427bf215546Sopenharmony_ci               a = LLVMBuildFAdd(builder, a, offset, "");
428bf215546Sopenharmony_ci            }
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci            bld->attribs[attrib][chan] = a;
431bf215546Sopenharmony_ci         }
432bf215546Sopenharmony_ci      }
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_cistatic LLVMValueRef
437bf215546Sopenharmony_cilp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
438bf215546Sopenharmony_ci                             struct gallivm_state *gallivm,
439bf215546Sopenharmony_ci                             unsigned attrib, unsigned chan,
440bf215546Sopenharmony_ci                             LLVMValueRef indir_index,
441bf215546Sopenharmony_ci                             LLVMValueRef pixoffx,
442bf215546Sopenharmony_ci                             LLVMValueRef pixoffy)
443bf215546Sopenharmony_ci{
444bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
445bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
446bf215546Sopenharmony_ci   const enum lp_interp interp = bld->interp[attrib];
447bf215546Sopenharmony_ci   LLVMValueRef dadx = coeff_bld->zero;
448bf215546Sopenharmony_ci   LLVMValueRef dady = coeff_bld->zero;
449bf215546Sopenharmony_ci   LLVMValueRef a = coeff_bld->zero;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci   indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
454bf215546Sopenharmony_ci   LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
455bf215546Sopenharmony_ci   index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   /* size up to byte indices */
458bf215546Sopenharmony_ci   index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   struct lp_type dst_type = coeff_bld->type;
461bf215546Sopenharmony_ci   dst_type.length = 1;
462bf215546Sopenharmony_ci   switch (interp) {
463bf215546Sopenharmony_ci   case LP_INTERP_PERSPECTIVE:
464bf215546Sopenharmony_ci      FALLTHROUGH;
465bf215546Sopenharmony_ci   case LP_INTERP_LINEAR:
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci      dadx = lp_build_gather(gallivm, coeff_bld->type.length,
468bf215546Sopenharmony_ci                             coeff_bld->type.width, dst_type,
469bf215546Sopenharmony_ci                             true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci      dady = lp_build_gather(gallivm, coeff_bld->type.length,
472bf215546Sopenharmony_ci                             coeff_bld->type.width, dst_type,
473bf215546Sopenharmony_ci                             true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci      a = lp_build_gather(gallivm, coeff_bld->type.length,
476bf215546Sopenharmony_ci                          coeff_bld->type.width, dst_type,
477bf215546Sopenharmony_ci                          true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci      /*
480bf215546Sopenharmony_ci       * a = a0 + (x * dadx + y * dady)
481bf215546Sopenharmony_ci       */
482bf215546Sopenharmony_ci      a = lp_build_fmuladd(builder, dadx, pixoffx, a);
483bf215546Sopenharmony_ci      a = lp_build_fmuladd(builder, dady, pixoffy, a);
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci      if (interp == LP_INTERP_PERSPECTIVE) {
486bf215546Sopenharmony_ci        LLVMValueRef w = bld->attribs[0][3];
487bf215546Sopenharmony_ci        assert(attrib != 0);
488bf215546Sopenharmony_ci        assert(bld->mask[0] & TGSI_WRITEMASK_W);
489bf215546Sopenharmony_ci        LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
490bf215546Sopenharmony_ci        a = lp_build_mul(coeff_bld, a, oow);
491bf215546Sopenharmony_ci      }
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci      break;
494bf215546Sopenharmony_ci   case LP_INTERP_CONSTANT:
495bf215546Sopenharmony_ci   case LP_INTERP_FACING:
496bf215546Sopenharmony_ci      a = lp_build_gather(gallivm, coeff_bld->type.length,
497bf215546Sopenharmony_ci                          coeff_bld->type.width, dst_type,
498bf215546Sopenharmony_ci                          true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
499bf215546Sopenharmony_ci      break;
500bf215546Sopenharmony_ci   default:
501bf215546Sopenharmony_ci      assert(0);
502bf215546Sopenharmony_ci      break;
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci   return a;
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ciLLVMValueRef
508bf215546Sopenharmony_cilp_build_interp_soa(struct lp_build_interp_soa_context *bld,
509bf215546Sopenharmony_ci                    struct gallivm_state *gallivm,
510bf215546Sopenharmony_ci                    LLVMValueRef loop_iter,
511bf215546Sopenharmony_ci                    LLVMValueRef mask_store,
512bf215546Sopenharmony_ci                    unsigned attrib, unsigned chan,
513bf215546Sopenharmony_ci                    enum tgsi_interpolate_loc loc,
514bf215546Sopenharmony_ci                    LLVMValueRef indir_index,
515bf215546Sopenharmony_ci                    LLVMValueRef offsets[2])
516bf215546Sopenharmony_ci{
517bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
518bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
519bf215546Sopenharmony_ci   struct lp_build_context *setup_bld = &bld->setup_bld;
520bf215546Sopenharmony_ci   LLVMValueRef pixoffx;
521bf215546Sopenharmony_ci   LLVMValueRef pixoffy;
522bf215546Sopenharmony_ci   LLVMValueRef ptr;
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   /* could do this with code-generated passed in pixel offsets too */
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   assert(loop_iter);
527bf215546Sopenharmony_ci   ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, "");
528bf215546Sopenharmony_ci   pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
529bf215546Sopenharmony_ci   ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, "");
530bf215546Sopenharmony_ci   pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   pixoffx = LLVMBuildFAdd(builder, pixoffx,
533bf215546Sopenharmony_ci                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
534bf215546Sopenharmony_ci   pixoffy = LLVMBuildFAdd(builder, pixoffy,
535bf215546Sopenharmony_ci                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci   if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
540bf215546Sopenharmony_ci      if (bld->coverage_samples > 1) {
541bf215546Sopenharmony_ci         pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
542bf215546Sopenharmony_ci         pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
543bf215546Sopenharmony_ci      }
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci      if (offsets[0])
546bf215546Sopenharmony_ci         pixoffx = LLVMBuildFAdd(builder, pixoffx,
547bf215546Sopenharmony_ci                                 offsets[0], "");
548bf215546Sopenharmony_ci      if (offsets[1])
549bf215546Sopenharmony_ci         pixoffy = LLVMBuildFAdd(builder, pixoffy,
550bf215546Sopenharmony_ci                                 offsets[1], "");
551bf215546Sopenharmony_ci   } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
552bf215546Sopenharmony_ci      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
553bf215546Sopenharmony_ci      LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
556bf215546Sopenharmony_ci                                               LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
557bf215546Sopenharmony_ci      LLVMValueRef xoffset = lp_build_gather(gallivm,
558bf215546Sopenharmony_ci                                             bld->coeff_bld.type.length,
559bf215546Sopenharmony_ci                                             bld->coeff_bld.type.width,
560bf215546Sopenharmony_ci                                             lp_elem_type(bld->coeff_bld.type),
561bf215546Sopenharmony_ci                                             false,
562bf215546Sopenharmony_ci                                             base_ptr,
563bf215546Sopenharmony_ci                                             x_val_idx, true);
564bf215546Sopenharmony_ci      LLVMValueRef yoffset = lp_build_gather(gallivm,
565bf215546Sopenharmony_ci                                             bld->coeff_bld.type.length,
566bf215546Sopenharmony_ci                                             bld->coeff_bld.type.width,
567bf215546Sopenharmony_ci                                             lp_elem_type(bld->coeff_bld.type),
568bf215546Sopenharmony_ci                                             false,
569bf215546Sopenharmony_ci                                             base_ptr,
570bf215546Sopenharmony_ci                                             y_val_idx, true);
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci      if (bld->coverage_samples > 1) {
573bf215546Sopenharmony_ci         pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
574bf215546Sopenharmony_ci         pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
575bf215546Sopenharmony_ci      }
576bf215546Sopenharmony_ci   } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
577bf215546Sopenharmony_ci      LLVMValueRef centroid_x_offset, centroid_y_offset;
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci      /* for centroid find covered samples for this quad. */
580bf215546Sopenharmony_ci      /* if all samples are covered use pixel centers */
581bf215546Sopenharmony_ci      if (bld->coverage_samples > 1) {
582bf215546Sopenharmony_ci         calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
583bf215546Sopenharmony_ci                               pix_center_offset, &centroid_x_offset,
584bf215546Sopenharmony_ci                               &centroid_y_offset);
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci         pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
587bf215546Sopenharmony_ci         pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
588bf215546Sopenharmony_ci      }
589bf215546Sopenharmony_ci   }
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   // remap attrib properly.
592bf215546Sopenharmony_ci   attrib++;
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   if (indir_index)
595bf215546Sopenharmony_ci     return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
596bf215546Sopenharmony_ci                                         indir_index, pixoffx, pixoffy);
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   const enum lp_interp interp = bld->interp[attrib];
600bf215546Sopenharmony_ci   LLVMValueRef dadx = coeff_bld->zero;
601bf215546Sopenharmony_ci   LLVMValueRef dady = coeff_bld->zero;
602bf215546Sopenharmony_ci   LLVMValueRef a = coeff_bld->zero;
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   LLVMValueRef index = lp_build_const_int32(gallivm, chan);
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   switch (interp) {
607bf215546Sopenharmony_ci   case LP_INTERP_PERSPECTIVE:
608bf215546Sopenharmony_ci      FALLTHROUGH;
609bf215546Sopenharmony_ci   case LP_INTERP_LINEAR:
610bf215546Sopenharmony_ci      dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
611bf215546Sopenharmony_ci                                        coeff_bld->type, bld->dadxaos[attrib],
612bf215546Sopenharmony_ci                                        index);
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci      dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
615bf215546Sopenharmony_ci                                        coeff_bld->type, bld->dadyaos[attrib],
616bf215546Sopenharmony_ci                                        index);
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci      a = lp_build_extract_broadcast(gallivm, setup_bld->type,
619bf215546Sopenharmony_ci                                     coeff_bld->type, bld->a0aos[attrib],
620bf215546Sopenharmony_ci                                     index);
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci      /*
623bf215546Sopenharmony_ci       * a = a0 + (x * dadx + y * dady)
624bf215546Sopenharmony_ci       */
625bf215546Sopenharmony_ci      a = lp_build_fmuladd(builder, dadx, pixoffx, a);
626bf215546Sopenharmony_ci      a = lp_build_fmuladd(builder, dady, pixoffy, a);
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci      if (interp == LP_INTERP_PERSPECTIVE) {
629bf215546Sopenharmony_ci        LLVMValueRef w = bld->attribs[0][3];
630bf215546Sopenharmony_ci        assert(attrib != 0);
631bf215546Sopenharmony_ci        assert(bld->mask[0] & TGSI_WRITEMASK_W);
632bf215546Sopenharmony_ci        LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
633bf215546Sopenharmony_ci        a = lp_build_mul(coeff_bld, a, oow);
634bf215546Sopenharmony_ci      }
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci      break;
637bf215546Sopenharmony_ci   case LP_INTERP_CONSTANT:
638bf215546Sopenharmony_ci   case LP_INTERP_FACING:
639bf215546Sopenharmony_ci      a = lp_build_extract_broadcast(gallivm, setup_bld->type,
640bf215546Sopenharmony_ci                                     coeff_bld->type, bld->a0aos[attrib],
641bf215546Sopenharmony_ci                                     index);
642bf215546Sopenharmony_ci      break;
643bf215546Sopenharmony_ci   default:
644bf215546Sopenharmony_ci      assert(0);
645bf215546Sopenharmony_ci      break;
646bf215546Sopenharmony_ci   }
647bf215546Sopenharmony_ci   return a;
648bf215546Sopenharmony_ci}
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci/**
651bf215546Sopenharmony_ci * Generate the position vectors.
652bf215546Sopenharmony_ci *
653bf215546Sopenharmony_ci * Parameter x0, y0 are the integer values with upper left coordinates.
654bf215546Sopenharmony_ci */
655bf215546Sopenharmony_cistatic void
656bf215546Sopenharmony_cipos_init(struct lp_build_interp_soa_context *bld,
657bf215546Sopenharmony_ci         LLVMValueRef x0,
658bf215546Sopenharmony_ci         LLVMValueRef y0)
659bf215546Sopenharmony_ci{
660bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
661bf215546Sopenharmony_ci   struct lp_build_context *coeff_bld = &bld->coeff_bld;
662bf215546Sopenharmony_ci
663bf215546Sopenharmony_ci   bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
664bf215546Sopenharmony_ci   bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
665bf215546Sopenharmony_ci}
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci/**
669bf215546Sopenharmony_ci * Initialize fragment shader input attribute info.
670bf215546Sopenharmony_ci */
671bf215546Sopenharmony_civoid
672bf215546Sopenharmony_cilp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
673bf215546Sopenharmony_ci                         struct gallivm_state *gallivm,
674bf215546Sopenharmony_ci                         unsigned num_inputs,
675bf215546Sopenharmony_ci                         const struct lp_shader_input *inputs,
676bf215546Sopenharmony_ci                         boolean pixel_center_integer,
677bf215546Sopenharmony_ci                         unsigned coverage_samples,
678bf215546Sopenharmony_ci                         LLVMValueRef sample_pos_array,
679bf215546Sopenharmony_ci                         LLVMValueRef num_loop,
680bf215546Sopenharmony_ci                         LLVMBuilderRef builder,
681bf215546Sopenharmony_ci                         struct lp_type type,
682bf215546Sopenharmony_ci                         LLVMValueRef a0_ptr,
683bf215546Sopenharmony_ci                         LLVMValueRef dadx_ptr,
684bf215546Sopenharmony_ci                         LLVMValueRef dady_ptr,
685bf215546Sopenharmony_ci                         LLVMValueRef x0,
686bf215546Sopenharmony_ci                         LLVMValueRef y0)
687bf215546Sopenharmony_ci{
688bf215546Sopenharmony_ci   struct lp_type coeff_type;
689bf215546Sopenharmony_ci   struct lp_type setup_type;
690bf215546Sopenharmony_ci   unsigned attrib;
691bf215546Sopenharmony_ci   unsigned chan;
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci   memset(bld, 0, sizeof *bld);
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci   memset(&coeff_type, 0, sizeof coeff_type);
696bf215546Sopenharmony_ci   coeff_type.floating = TRUE;
697bf215546Sopenharmony_ci   coeff_type.sign = TRUE;
698bf215546Sopenharmony_ci   coeff_type.width = 32;
699bf215546Sopenharmony_ci   coeff_type.length = type.length;
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   memset(&setup_type, 0, sizeof setup_type);
702bf215546Sopenharmony_ci   setup_type.floating = TRUE;
703bf215546Sopenharmony_ci   setup_type.sign = TRUE;
704bf215546Sopenharmony_ci   setup_type.width = 32;
705bf215546Sopenharmony_ci   setup_type.length = TGSI_NUM_CHANNELS;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   /* XXX: we don't support interpolating into any other types */
709bf215546Sopenharmony_ci   assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
712bf215546Sopenharmony_ci   lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci   /* For convenience */
715bf215546Sopenharmony_ci   bld->pos = bld->attribs[0];
716bf215546Sopenharmony_ci   bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   /* Position */
719bf215546Sopenharmony_ci   bld->mask[0] = TGSI_WRITEMASK_XYZW;
720bf215546Sopenharmony_ci   bld->interp[0] = LP_INTERP_LINEAR;
721bf215546Sopenharmony_ci   bld->interp_loc[0] = 0;
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci   /* Inputs */
724bf215546Sopenharmony_ci   for (attrib = 0; attrib < num_inputs; ++attrib) {
725bf215546Sopenharmony_ci      bld->mask[1 + attrib] = inputs[attrib].usage_mask;
726bf215546Sopenharmony_ci      bld->interp[1 + attrib] = inputs[attrib].interp;
727bf215546Sopenharmony_ci      bld->interp_loc[1 + attrib] = inputs[attrib].location;
728bf215546Sopenharmony_ci   }
729bf215546Sopenharmony_ci   bld->num_attribs = 1 + num_inputs;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   /* needed for indirect */
732bf215546Sopenharmony_ci   bld->a0_ptr = a0_ptr;
733bf215546Sopenharmony_ci   bld->dadx_ptr = dadx_ptr;
734bf215546Sopenharmony_ci   bld->dady_ptr = dady_ptr;
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   /* Ensure all masked out input channels have a valid value */
737bf215546Sopenharmony_ci   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
738bf215546Sopenharmony_ci      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
739bf215546Sopenharmony_ci         bld->attribs[attrib][chan] = bld->coeff_bld.undef;
740bf215546Sopenharmony_ci      }
741bf215546Sopenharmony_ci   }
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci   if (pixel_center_integer) {
744bf215546Sopenharmony_ci      bld->pos_offset = 0.0;
745bf215546Sopenharmony_ci   } else {
746bf215546Sopenharmony_ci      bld->pos_offset = 0.5;
747bf215546Sopenharmony_ci   }
748bf215546Sopenharmony_ci   bld->coverage_samples = coverage_samples;
749bf215546Sopenharmony_ci   bld->num_loop = num_loop;
750bf215546Sopenharmony_ci   bld->sample_pos_array = sample_pos_array;
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   pos_init(bld, x0, y0);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   /*
755bf215546Sopenharmony_ci    * Simple method (single step interpolation) may be slower if vector length
756bf215546Sopenharmony_ci    * is just 4, but the results are different (generally less accurate) with
757bf215546Sopenharmony_ci    * the other method, so always use more accurate version.
758bf215546Sopenharmony_ci    */
759bf215546Sopenharmony_ci   {
760bf215546Sopenharmony_ci      /* XXX this should use a global static table */
761bf215546Sopenharmony_ci      unsigned i;
762bf215546Sopenharmony_ci      unsigned num_loops = 16 / type.length;
763bf215546Sopenharmony_ci      LLVMValueRef pixoffx, pixoffy, index;
764bf215546Sopenharmony_ci      LLVMValueRef ptr;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci      bld->store_elem_type = lp_build_vec_type(gallivm, type);
767bf215546Sopenharmony_ci      bld->xoffset_store = lp_build_array_alloca(gallivm,
768bf215546Sopenharmony_ci                                                 bld->store_elem_type,
769bf215546Sopenharmony_ci                                                 lp_build_const_int32(gallivm, num_loops),
770bf215546Sopenharmony_ci                                                 "");
771bf215546Sopenharmony_ci      bld->yoffset_store = lp_build_array_alloca(gallivm,
772bf215546Sopenharmony_ci                                                 bld->store_elem_type,
773bf215546Sopenharmony_ci                                                 lp_build_const_int32(gallivm, num_loops),
774bf215546Sopenharmony_ci                                                 "");
775bf215546Sopenharmony_ci      for (i = 0; i < num_loops; i++) {
776bf215546Sopenharmony_ci         index = lp_build_const_int32(gallivm, i);
777bf215546Sopenharmony_ci         calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
778bf215546Sopenharmony_ci         ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &index, 1, "");
779bf215546Sopenharmony_ci         LLVMBuildStore(builder, pixoffx, ptr);
780bf215546Sopenharmony_ci         ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &index, 1, "");
781bf215546Sopenharmony_ci         LLVMBuildStore(builder, pixoffy, ptr);
782bf215546Sopenharmony_ci      }
783bf215546Sopenharmony_ci   }
784bf215546Sopenharmony_ci   coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
785bf215546Sopenharmony_ci}
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci/*
789bf215546Sopenharmony_ci * Advance the position and inputs to the given quad within the block.
790bf215546Sopenharmony_ci */
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_civoid
793bf215546Sopenharmony_cilp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
794bf215546Sopenharmony_ci                                      struct gallivm_state *gallivm,
795bf215546Sopenharmony_ci                                      LLVMValueRef quad_start_index,
796bf215546Sopenharmony_ci                                      LLVMValueRef mask_store,
797bf215546Sopenharmony_ci                                      LLVMValueRef sample_id)
798bf215546Sopenharmony_ci{
799bf215546Sopenharmony_ci   attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
800bf215546Sopenharmony_ci}
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_civoid
803bf215546Sopenharmony_cilp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
804bf215546Sopenharmony_ci                                   struct gallivm_state *gallivm,
805bf215546Sopenharmony_ci                                   LLVMValueRef quad_start_index,
806bf215546Sopenharmony_ci                                   LLVMValueRef sample_id)
807bf215546Sopenharmony_ci{
808bf215546Sopenharmony_ci   attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
809bf215546Sopenharmony_ci}
810bf215546Sopenharmony_ci
811