1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2010 VMware.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "util/u_math.h"
30bf215546Sopenharmony_ci#include "util/u_memory.h"
31bf215546Sopenharmony_ci#include "util/os_time.h"
32bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h"
33bf215546Sopenharmony_ci#include "gallivm/lp_bld_bitarit.h"
34bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h"
35bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h"
36bf215546Sopenharmony_ci#include "gallivm/lp_bld_init.h"
37bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h"
38bf215546Sopenharmony_ci#include "gallivm/lp_bld_intr.h"
39bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h"
40bf215546Sopenharmony_ci#include "gallivm/lp_bld_type.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci#include "lp_perf.h"
43bf215546Sopenharmony_ci#include "lp_debug.h"
44bf215546Sopenharmony_ci#include "lp_flush.h"
45bf215546Sopenharmony_ci#include "lp_screen.h"
46bf215546Sopenharmony_ci#include "lp_context.h"
47bf215546Sopenharmony_ci#include "lp_state.h"
48bf215546Sopenharmony_ci#include "lp_state_fs.h"
49bf215546Sopenharmony_ci#include "lp_state_setup.h"
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci/** Setup shader number (for debugging) */
53bf215546Sopenharmony_cistatic unsigned setup_no = 0;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci/* currently organized to interpolate full float[4] attributes even
57bf215546Sopenharmony_ci * when some elements are unused.  Later, can pack vertex data more
58bf215546Sopenharmony_ci * closely.
59bf215546Sopenharmony_ci */
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_cistruct lp_setup_args
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   /* Function arguments:
65bf215546Sopenharmony_ci    */
66bf215546Sopenharmony_ci   LLVMValueRef v0;
67bf215546Sopenharmony_ci   LLVMValueRef v1;
68bf215546Sopenharmony_ci   LLVMValueRef v2;
69bf215546Sopenharmony_ci   LLVMValueRef facing;		/* boolean */
70bf215546Sopenharmony_ci   LLVMValueRef a0;
71bf215546Sopenharmony_ci   LLVMValueRef dadx;
72bf215546Sopenharmony_ci   LLVMValueRef dady;
73bf215546Sopenharmony_ci   LLVMValueRef key;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   /* Derived:
76bf215546Sopenharmony_ci    */
77bf215546Sopenharmony_ci   LLVMValueRef x0_center;
78bf215546Sopenharmony_ci   LLVMValueRef y0_center;
79bf215546Sopenharmony_ci   LLVMValueRef dy20_ooa;
80bf215546Sopenharmony_ci   LLVMValueRef dy01_ooa;
81bf215546Sopenharmony_ci   LLVMValueRef dx20_ooa;
82bf215546Sopenharmony_ci   LLVMValueRef dx01_ooa;
83bf215546Sopenharmony_ci   struct lp_build_context bld;
84bf215546Sopenharmony_ci};
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_cistatic void
88bf215546Sopenharmony_cistore_coef(struct gallivm_state *gallivm,
89bf215546Sopenharmony_ci           const struct lp_setup_args *args,
90bf215546Sopenharmony_ci           unsigned slot,
91bf215546Sopenharmony_ci           LLVMValueRef a0,
92bf215546Sopenharmony_ci           LLVMValueRef dadx,
93bf215546Sopenharmony_ci           LLVMValueRef dady)
94bf215546Sopenharmony_ci{
95bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
96bf215546Sopenharmony_ci   LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   LLVMBuildStore(builder,
99bf215546Sopenharmony_ci                  a0,
100bf215546Sopenharmony_ci                  LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   LLVMBuildStore(builder,
103bf215546Sopenharmony_ci                  dadx,
104bf215546Sopenharmony_ci                  LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   LLVMBuildStore(builder,
107bf215546Sopenharmony_ci                  dady,
108bf215546Sopenharmony_ci                  LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
109bf215546Sopenharmony_ci}
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_cistatic void
113bf215546Sopenharmony_ciemit_constant_coef4(struct gallivm_state *gallivm,
114bf215546Sopenharmony_ci                    const struct lp_setup_args *args,
115bf215546Sopenharmony_ci                    unsigned slot,
116bf215546Sopenharmony_ci                    LLVMValueRef vert)
117bf215546Sopenharmony_ci{
118bf215546Sopenharmony_ci   store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero);
119bf215546Sopenharmony_ci}
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci/**
123bf215546Sopenharmony_ci * Setup the fragment input attribute with the front-facing value.
124bf215546Sopenharmony_ci * \param frontface  is the triangle front facing?
125bf215546Sopenharmony_ci */
126bf215546Sopenharmony_cistatic void
127bf215546Sopenharmony_ciemit_facing_coef(struct gallivm_state *gallivm,
128bf215546Sopenharmony_ci                 struct lp_setup_args *args,
129bf215546Sopenharmony_ci                 unsigned slot)
130bf215546Sopenharmony_ci{
131bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
132bf215546Sopenharmony_ci   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
133bf215546Sopenharmony_ci   LLVMValueRef a0_0 = args->facing;
134bf215546Sopenharmony_ci   LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
135bf215546Sopenharmony_ci   LLVMValueRef a0, face_val;
136bf215546Sopenharmony_ci   const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
137bf215546Sopenharmony_ci                                       PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
138bf215546Sopenharmony_ci   /* Our face val is either 1 or 0 so we do
139bf215546Sopenharmony_ci    * face = (val * 2) - 1
140bf215546Sopenharmony_ci    * to make it 1 or -1
141bf215546Sopenharmony_ci    */
142bf215546Sopenharmony_ci   face_val =
143bf215546Sopenharmony_ci      LLVMBuildFAdd(builder,
144bf215546Sopenharmony_ci                    LLVMBuildFMul(builder, a0_0f,
145bf215546Sopenharmony_ci                                  lp_build_const_float(gallivm, 2.0),
146bf215546Sopenharmony_ci                                  ""),
147bf215546Sopenharmony_ci                    lp_build_const_float(gallivm, -1.0),
148bf215546Sopenharmony_ci                    "facing");
149bf215546Sopenharmony_ci   face_val = lp_build_broadcast_scalar(&args->bld, face_val);
150bf215546Sopenharmony_ci   a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero);
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistatic LLVMValueRef
157bf215546Sopenharmony_civert_attrib(struct gallivm_state *gallivm,
158bf215546Sopenharmony_ci            LLVMValueRef vert,
159bf215546Sopenharmony_ci            int attr,
160bf215546Sopenharmony_ci            int elem,
161bf215546Sopenharmony_ci            const char *name)
162bf215546Sopenharmony_ci{
163bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
164bf215546Sopenharmony_ci   LLVMValueRef idx[2];
165bf215546Sopenharmony_ci   idx[0] = lp_build_const_int32(gallivm, attr);
166bf215546Sopenharmony_ci   idx[1] = lp_build_const_int32(gallivm, elem);
167bf215546Sopenharmony_ci   return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
168bf215546Sopenharmony_ci}
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_cistatic void
172bf215546Sopenharmony_cilp_twoside(struct gallivm_state *gallivm,
173bf215546Sopenharmony_ci           struct lp_setup_args *args,
174bf215546Sopenharmony_ci           const struct lp_setup_variant_key *key,
175bf215546Sopenharmony_ci           int bcolor_slot,
176bf215546Sopenharmony_ci           LLVMValueRef attribv[3])
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
179bf215546Sopenharmony_ci   LLVMValueRef a0_back, a1_back, a2_back;
180bf215546Sopenharmony_ci   LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   LLVMValueRef facing = args->facing;
183bf215546Sopenharmony_ci   LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing,
184bf215546Sopenharmony_ci                                             lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
187bf215546Sopenharmony_ci   a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
188bf215546Sopenharmony_ci   a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* Possibly swap the front and back attrib values,
191bf215546Sopenharmony_ci    *
192bf215546Sopenharmony_ci    * Prefer select to if so we don't have to worry about phis or
193bf215546Sopenharmony_ci    * allocas.
194bf215546Sopenharmony_ci    */
195bf215546Sopenharmony_ci   attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
196bf215546Sopenharmony_ci   attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
197bf215546Sopenharmony_ci   attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_cistatic LLVMValueRef
202bf215546Sopenharmony_cilp_do_offset_tri(struct gallivm_state *gallivm,
203bf215546Sopenharmony_ci                 struct lp_setup_args *args,
204bf215546Sopenharmony_ci                 const struct lp_setup_variant_key *key,
205bf215546Sopenharmony_ci                 LLVMValueRef inv_det,
206bf215546Sopenharmony_ci                 LLVMValueRef dxyz01,
207bf215546Sopenharmony_ci                 LLVMValueRef dxyz20,
208bf215546Sopenharmony_ci                 LLVMValueRef attribv[3])
209bf215546Sopenharmony_ci{
210bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
211bf215546Sopenharmony_ci   struct lp_build_context flt_scalar_bld;
212bf215546Sopenharmony_ci   struct lp_build_context int_scalar_bld;
213bf215546Sopenharmony_ci   struct lp_build_context *bld = &args->bld;
214bf215546Sopenharmony_ci   LLVMValueRef zoffset, mult;
215bf215546Sopenharmony_ci   LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
216bf215546Sopenharmony_ci   LLVMValueRef max, max_value, res12;
217bf215546Sopenharmony_ci   LLVMValueRef shuffles[4];
218bf215546Sopenharmony_ci   LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
219bf215546Sopenharmony_ci   LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
220bf215546Sopenharmony_ci   LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
221bf215546Sopenharmony_ci   LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
222bf215546Sopenharmony_ci   LLVMValueRef threei  = lp_build_const_int32(gallivm, 3);
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   /* (res12) = cross(e,f).xy */
225bf215546Sopenharmony_ci   shuffles[0] = twoi;
226bf215546Sopenharmony_ci   shuffles[1] = zeroi;
227bf215546Sopenharmony_ci   shuffles[2] = onei;
228bf215546Sopenharmony_ci   shuffles[3] = twoi;
229bf215546Sopenharmony_ci   dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   shuffles[0] = onei;
232bf215546Sopenharmony_ci   shuffles[1] = twoi;
233bf215546Sopenharmony_ci   shuffles[2] = twoi;
234bf215546Sopenharmony_ci   shuffles[3] = zeroi;
235bf215546Sopenharmony_ci   dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   shuffles[0] = twoi;
240bf215546Sopenharmony_ci   shuffles[1] = threei;
241bf215546Sopenharmony_ci   shuffles[2] = LLVMGetUndef(shuf_type);
242bf215546Sopenharmony_ci   shuffles[3] = LLVMGetUndef(shuf_type);
243bf215546Sopenharmony_ci   dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
244bf215546Sopenharmony_ci                                        LLVMConstVector(shuffles, 4), "");
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
249bf215546Sopenharmony_ci   dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
250bf215546Sopenharmony_ci   dzdxdzdy = lp_build_abs(bld, dzdxdzdy);
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
253bf215546Sopenharmony_ci   dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
256bf215546Sopenharmony_ci   max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
257bf215546Sopenharmony_ci   max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   mult = LLVMBuildFMul(b, max_value,
260bf215546Sopenharmony_ci                        lp_build_const_float(gallivm,
261bf215546Sopenharmony_ci                                             key->pgon_offset_scale), "");
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   if (key->floating_point_depth) {
266bf215546Sopenharmony_ci      /*
267bf215546Sopenharmony_ci       * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) -
268bf215546Sopenharmony_ci       *           mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale
269bf215546Sopenharmony_ci       *
270bf215546Sopenharmony_ci       * NOTE: Assumes IEEE float32.
271bf215546Sopenharmony_ci       */
272bf215546Sopenharmony_ci      LLVMValueRef c23_shifted, exp_mask, bias, exp;
273bf215546Sopenharmony_ci      LLVMValueRef maxz_value, maxz0z1_value;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci      lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci      c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
278bf215546Sopenharmony_ci      exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci      maxz0z1_value = lp_build_max(&flt_scalar_bld,
281bf215546Sopenharmony_ci                         lp_build_abs(&flt_scalar_bld,
282bf215546Sopenharmony_ci                            LLVMBuildExtractElement(b, attribv[0], twoi, "")),
283bf215546Sopenharmony_ci                         lp_build_abs(&flt_scalar_bld,
284bf215546Sopenharmony_ci                            LLVMBuildExtractElement(b, attribv[1], twoi, "")));
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci      maxz_value = lp_build_max(&flt_scalar_bld,
287bf215546Sopenharmony_ci                      lp_build_abs(&flt_scalar_bld,
288bf215546Sopenharmony_ci                         LLVMBuildExtractElement(b, attribv[2], twoi, "")),
289bf215546Sopenharmony_ci                      maxz0z1_value);
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci      exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
292bf215546Sopenharmony_ci      exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
293bf215546Sopenharmony_ci      exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
294bf215546Sopenharmony_ci      /* Clamping to zero means mrd will be zero for very small numbers,
295bf215546Sopenharmony_ci       * but specs do not indicate this should be prevented by clamping
296bf215546Sopenharmony_ci       * mrd to smallest normal number instead. */
297bf215546Sopenharmony_ci      exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
298bf215546Sopenharmony_ci      exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci      bias = LLVMBuildFMul(b, exp,
301bf215546Sopenharmony_ci                           lp_build_const_float(gallivm, key->pgon_offset_units),
302bf215546Sopenharmony_ci                           "bias");
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci      zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
305bf215546Sopenharmony_ci   } else {
306bf215546Sopenharmony_ci      /*
307bf215546Sopenharmony_ci       * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
308bf215546Sopenharmony_ci       */
309bf215546Sopenharmony_ci      zoffset = LLVMBuildFAdd(b,
310bf215546Sopenharmony_ci                              lp_build_const_float(gallivm, key->pgon_offset_units),
311bf215546Sopenharmony_ci                              mult, "zoffset");
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   if (key->pgon_offset_clamp > 0) {
315bf215546Sopenharmony_ci      zoffset = lp_build_min(&flt_scalar_bld,
316bf215546Sopenharmony_ci                             lp_build_const_float(gallivm, key->pgon_offset_clamp),
317bf215546Sopenharmony_ci                             zoffset);
318bf215546Sopenharmony_ci   } else if (key->pgon_offset_clamp < 0) {
319bf215546Sopenharmony_ci      zoffset = lp_build_max(&flt_scalar_bld,
320bf215546Sopenharmony_ci                             lp_build_const_float(gallivm, key->pgon_offset_clamp),
321bf215546Sopenharmony_ci                             zoffset);
322bf215546Sopenharmony_ci   }
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   return zoffset;
325bf215546Sopenharmony_ci}
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_cistatic void
329bf215546Sopenharmony_ciload_attribute(struct gallivm_state *gallivm,
330bf215546Sopenharmony_ci               struct lp_setup_args *args,
331bf215546Sopenharmony_ci               const struct lp_setup_variant_key *key,
332bf215546Sopenharmony_ci               unsigned vert_attr,
333bf215546Sopenharmony_ci               LLVMValueRef attribv[3])
334bf215546Sopenharmony_ci{
335bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
336bf215546Sopenharmony_ci   LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   /* Load the vertex data
339bf215546Sopenharmony_ci    */
340bf215546Sopenharmony_ci   attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
341bf215546Sopenharmony_ci   attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
342bf215546Sopenharmony_ci   attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   /* Potentially modify it according to twoside, etc:
345bf215546Sopenharmony_ci    */
346bf215546Sopenharmony_ci   if (key->twoside) {
347bf215546Sopenharmony_ci      if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
348bf215546Sopenharmony_ci         lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
349bf215546Sopenharmony_ci      else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
350bf215546Sopenharmony_ci         lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
351bf215546Sopenharmony_ci   }
352bf215546Sopenharmony_ci}
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci/*
356bf215546Sopenharmony_ci * FIXME: interpolation is always done wrt fb origin (0/0).
357bf215546Sopenharmony_ci * However, if some (small) tri is far away from the origin and gradients
358bf215546Sopenharmony_ci * are large, this can lead to HUGE errors, since the a0 value calculated
359bf215546Sopenharmony_ci * here can get very large (with the actual values inside the triangle way
360bf215546Sopenharmony_ci * smaller), leading to complete loss of accuracy. This could be prevented
361bf215546Sopenharmony_ci * by using some point inside (or at corner) of the tri as interpolation
362bf215546Sopenharmony_ci * origin, or just use barycentric interpolation (which GL suggests and is
363bf215546Sopenharmony_ci * what real hw does - you can get the barycentric coordinates from the
364bf215546Sopenharmony_ci * edge functions in rasterization in principle (though we skip these
365bf215546Sopenharmony_ci * sometimes completely in case of tris covering a block fully,
366bf215546Sopenharmony_ci * which obviously wouldn't work)).
367bf215546Sopenharmony_ci */
368bf215546Sopenharmony_cistatic void
369bf215546Sopenharmony_cicalc_coef4(struct gallivm_state *gallivm,
370bf215546Sopenharmony_ci           struct lp_setup_args *args,
371bf215546Sopenharmony_ci           LLVMValueRef a0,
372bf215546Sopenharmony_ci           LLVMValueRef a1,
373bf215546Sopenharmony_ci           LLVMValueRef a2,
374bf215546Sopenharmony_ci           LLVMValueRef out[3])
375bf215546Sopenharmony_ci{
376bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
377bf215546Sopenharmony_ci   LLVMValueRef attr_0;
378bf215546Sopenharmony_ci   LLVMValueRef dy20_ooa = args->dy20_ooa;
379bf215546Sopenharmony_ci   LLVMValueRef dy01_ooa = args->dy01_ooa;
380bf215546Sopenharmony_ci   LLVMValueRef dx20_ooa = args->dx20_ooa;
381bf215546Sopenharmony_ci   LLVMValueRef dx01_ooa = args->dx01_ooa;
382bf215546Sopenharmony_ci   LLVMValueRef x0_center = args->x0_center;
383bf215546Sopenharmony_ci   LLVMValueRef y0_center = args->y0_center;
384bf215546Sopenharmony_ci   LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
385bf215546Sopenharmony_ci   LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   /* Calculate dadx (vec4f)
388bf215546Sopenharmony_ci    */
389bf215546Sopenharmony_ci   LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
390bf215546Sopenharmony_ci   LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
391bf215546Sopenharmony_ci   LLVMValueRef dadx          = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci   /* Calculate dady (vec4f)
394bf215546Sopenharmony_ci    */
395bf215546Sopenharmony_ci   LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
396bf215546Sopenharmony_ci   LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
397bf215546Sopenharmony_ci   LLVMValueRef dady          = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   /* Calculate a0 - the attribute value at the origin
400bf215546Sopenharmony_ci    */
401bf215546Sopenharmony_ci   LLVMValueRef dadx_x0    = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
402bf215546Sopenharmony_ci   LLVMValueRef dady_y0    = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
403bf215546Sopenharmony_ci   LLVMValueRef attr_v0    = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
404bf215546Sopenharmony_ci   attr_0                  = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   out[0] = attr_0;
407bf215546Sopenharmony_ci   out[1] = dadx;
408bf215546Sopenharmony_ci   out[2] = dady;
409bf215546Sopenharmony_ci}
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_cistatic void
413bf215546Sopenharmony_ciemit_coef4(struct gallivm_state *gallivm,
414bf215546Sopenharmony_ci           struct lp_setup_args *args,
415bf215546Sopenharmony_ci           unsigned slot,
416bf215546Sopenharmony_ci           LLVMValueRef a0,
417bf215546Sopenharmony_ci           LLVMValueRef a1,
418bf215546Sopenharmony_ci           LLVMValueRef a2)
419bf215546Sopenharmony_ci{
420bf215546Sopenharmony_ci   LLVMValueRef coeffs[3];
421bf215546Sopenharmony_ci   calc_coef4(gallivm, args, a0, a1, a2, coeffs);
422bf215546Sopenharmony_ci   store_coef(gallivm, args, slot, coeffs[0], coeffs[1], coeffs[2]);
423bf215546Sopenharmony_ci}
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_cistatic void
427bf215546Sopenharmony_ciemit_linear_coef(struct gallivm_state *gallivm,
428bf215546Sopenharmony_ci                 struct lp_setup_args *args,
429bf215546Sopenharmony_ci                 unsigned slot,
430bf215546Sopenharmony_ci                 LLVMValueRef attribv[3])
431bf215546Sopenharmony_ci{
432bf215546Sopenharmony_ci   /* nothing to do anymore */
433bf215546Sopenharmony_ci   emit_coef4(gallivm, args, slot, attribv[0], attribv[1], attribv[2]);
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci/**
438bf215546Sopenharmony_ci * Compute a0, dadx and dady for a perspective-corrected interpolant,
439bf215546Sopenharmony_ci * for a triangle.
440bf215546Sopenharmony_ci * We basically multiply the vertex value by 1/w before computing
441bf215546Sopenharmony_ci * the plane coefficients (a0, dadx, dady).
442bf215546Sopenharmony_ci * Later, when we compute the value at a particular fragment position we'll
443bf215546Sopenharmony_ci * divide the interpolated value by the interpolated W at that fragment.
444bf215546Sopenharmony_ci */
445bf215546Sopenharmony_cistatic void
446bf215546Sopenharmony_ciapply_perspective_corr(struct gallivm_state *gallivm,
447bf215546Sopenharmony_ci                       struct lp_setup_args *args,
448bf215546Sopenharmony_ci                       unsigned slot,
449bf215546Sopenharmony_ci                       LLVMValueRef attribv[3])
450bf215546Sopenharmony_ci{
451bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci   /* premultiply by 1/w  (v[0][3] is always 1/w):
454bf215546Sopenharmony_ci    */
455bf215546Sopenharmony_ci   LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld,
456bf215546Sopenharmony_ci                            vert_attrib(gallivm, args->v0, 0, 3, "v0_oow"));
457bf215546Sopenharmony_ci   LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld,
458bf215546Sopenharmony_ci                            vert_attrib(gallivm, args->v1, 0, 3, "v1_oow"));
459bf215546Sopenharmony_ci   LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld,
460bf215546Sopenharmony_ci                            vert_attrib(gallivm, args->v2, 0, 3, "v2_oow"));
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
463bf215546Sopenharmony_ci   attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
464bf215546Sopenharmony_ci   attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
465bf215546Sopenharmony_ci}
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci/**
469bf215546Sopenharmony_ci * Compute the inputs-> dadx, dady, a0 values.
470bf215546Sopenharmony_ci */
471bf215546Sopenharmony_cistatic void
472bf215546Sopenharmony_ciemit_tri_coef(struct gallivm_state *gallivm,
473bf215546Sopenharmony_ci              const struct lp_setup_variant_key *key,
474bf215546Sopenharmony_ci              struct lp_setup_args *args)
475bf215546Sopenharmony_ci{
476bf215546Sopenharmony_ci   LLVMValueRef attribs[3];
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   /* setup interpolation for all the remaining attributes */
479bf215546Sopenharmony_ci   for (unsigned slot = 0; slot < key->num_inputs; slot++) {
480bf215546Sopenharmony_ci      switch (key->inputs[slot].interp) {
481bf215546Sopenharmony_ci      case LP_INTERP_CONSTANT:
482bf215546Sopenharmony_ci         load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
483bf215546Sopenharmony_ci         if (key->flatshade_first) {
484bf215546Sopenharmony_ci            emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
485bf215546Sopenharmony_ci         } else {
486bf215546Sopenharmony_ci            emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
487bf215546Sopenharmony_ci         }
488bf215546Sopenharmony_ci         break;
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci      case LP_INTERP_LINEAR:
491bf215546Sopenharmony_ci         load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
492bf215546Sopenharmony_ci         emit_linear_coef(gallivm, args, slot+1, attribs);
493bf215546Sopenharmony_ci         break;
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci      case LP_INTERP_PERSPECTIVE:
496bf215546Sopenharmony_ci         load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
497bf215546Sopenharmony_ci         apply_perspective_corr(gallivm, args, slot+1, attribs);
498bf215546Sopenharmony_ci         emit_linear_coef(gallivm, args, slot+1, attribs);
499bf215546Sopenharmony_ci         break;
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci      case LP_INTERP_POSITION:
502bf215546Sopenharmony_ci         /*
503bf215546Sopenharmony_ci          * The generated pixel interpolators will pick up the coeffs from
504bf215546Sopenharmony_ci          * slot 0.
505bf215546Sopenharmony_ci          */
506bf215546Sopenharmony_ci         break;
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci      case LP_INTERP_FACING:
509bf215546Sopenharmony_ci         emit_facing_coef(gallivm, args, slot+1);
510bf215546Sopenharmony_ci         break;
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci      default:
513bf215546Sopenharmony_ci         assert(0);
514bf215546Sopenharmony_ci      }
515bf215546Sopenharmony_ci   }
516bf215546Sopenharmony_ci}
517bf215546Sopenharmony_ci
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci/* XXX: generic code:
520bf215546Sopenharmony_ci */
521bf215546Sopenharmony_cistatic void
522bf215546Sopenharmony_ciset_noalias(LLVMBuilderRef builder,
523bf215546Sopenharmony_ci            LLVMValueRef function,
524bf215546Sopenharmony_ci            const LLVMTypeRef *arg_types,
525bf215546Sopenharmony_ci            int nr_args)
526bf215546Sopenharmony_ci{
527bf215546Sopenharmony_ci   for (int i = 0; i < nr_args; ++i) {
528bf215546Sopenharmony_ci      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
529bf215546Sopenharmony_ci         lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
530bf215546Sopenharmony_ci      }
531bf215546Sopenharmony_ci   }
532bf215546Sopenharmony_ci}
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_cistatic void
536bf215546Sopenharmony_ciinit_args(struct gallivm_state *gallivm,
537bf215546Sopenharmony_ci          const struct lp_setup_variant_key *key,
538bf215546Sopenharmony_ci          struct lp_setup_args *args)
539bf215546Sopenharmony_ci{
540bf215546Sopenharmony_ci   LLVMBuilderRef b = gallivm->builder;
541bf215546Sopenharmony_ci   LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
542bf215546Sopenharmony_ci   LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
543bf215546Sopenharmony_ci   LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
544bf215546Sopenharmony_ci   LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
545bf215546Sopenharmony_ci   LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
546bf215546Sopenharmony_ci   LLVMValueRef e, f, ef, ooa;
547bf215546Sopenharmony_ci   LLVMValueRef shuffles[4], shuf10;
548bf215546Sopenharmony_ci   LLVMValueRef attr_pos[3];
549bf215546Sopenharmony_ci   LLVMValueRef polygon_offset;
550bf215546Sopenharmony_ci   struct lp_type typef4 = lp_type_float_vec(32, 128);
551bf215546Sopenharmony_ci   struct lp_build_context bld;
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   lp_build_context_init(&bld, gallivm, typef4);
554bf215546Sopenharmony_ci   args->bld = bld;
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci   /* The internal position input is in slot zero:
557bf215546Sopenharmony_ci    */
558bf215546Sopenharmony_ci   load_attribute(gallivm, args, key, 0, attr_pos);
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci   pixel_center = lp_build_const_vec(gallivm, typef4,
561bf215546Sopenharmony_ci                                     (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0);
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   /*
564bf215546Sopenharmony_ci    * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
565bf215546Sopenharmony_ci    * also offset_tri uses actually xyz in them
566bf215546Sopenharmony_ci    */
567bf215546Sopenharmony_ci   xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci   dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
570bf215546Sopenharmony_ci   dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci   shuffles[0] = onei;
573bf215546Sopenharmony_ci   shuffles[1] = zeroi;
574bf215546Sopenharmony_ci   shuffles[2] = LLVMGetUndef(shuf_type);
575bf215546Sopenharmony_ci   shuffles[3] = LLVMGetUndef(shuf_type);
576bf215546Sopenharmony_ci   shuf10 = LLVMConstVector(shuffles, 4);
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci   ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
581bf215546Sopenharmony_ci   e = LLVMBuildExtractElement(b, ef, zeroi, "");
582bf215546Sopenharmony_ci   f = LLVMBuildExtractElement(b, ef, onei, "");
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   ooa  = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   ooa = lp_build_broadcast_scalar(&bld, ooa);
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   /* tri offset calc shares a lot of arithmetic, do it here */
589bf215546Sopenharmony_ci   if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
590bf215546Sopenharmony_ci      polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
591bf215546Sopenharmony_ci   } else {
592bf215546Sopenharmony_ci      polygon_offset = lp_build_const_float(gallivm, 0.0f);
593bf215546Sopenharmony_ci   }
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
596bf215546Sopenharmony_ci   dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   args->dy20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
599bf215546Sopenharmony_ci   args->dy01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   args->dx20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
602bf215546Sopenharmony_ci   args->dx01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
605bf215546Sopenharmony_ci   args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci   LLVMValueRef coeffs[3];
608bf215546Sopenharmony_ci   calc_coef4(gallivm, args, attr_pos[0], attr_pos[1], attr_pos[2], coeffs);
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci   /* This is a bit sneaky:
611bf215546Sopenharmony_ci    * Because we observe that the X component of A0 is otherwise unused,
612bf215546Sopenharmony_ci    * we can overwrite it with the computed polygon-offset value, to make
613bf215546Sopenharmony_ci    * sure it's available in the fragment shader without having to change
614bf215546Sopenharmony_ci    * the interface (which is error-prone).
615bf215546Sopenharmony_ci    */
616bf215546Sopenharmony_ci   coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset,
617bf215546Sopenharmony_ci                                      lp_build_const_int32(gallivm, 0), "");
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   store_coef(gallivm, args, 0, coeffs[0], coeffs[1], coeffs[2]);
620bf215546Sopenharmony_ci}
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci/**
624bf215546Sopenharmony_ci * Generate the runtime callable function for the coefficient calculation.
625bf215546Sopenharmony_ci *
626bf215546Sopenharmony_ci */
627bf215546Sopenharmony_cistatic struct lp_setup_variant *
628bf215546Sopenharmony_cigenerate_setup_variant(struct lp_setup_variant_key *key,
629bf215546Sopenharmony_ci                       struct llvmpipe_context *lp)
630bf215546Sopenharmony_ci{
631bf215546Sopenharmony_ci   int64_t t0 = 0, t1;
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   if (0)
634bf215546Sopenharmony_ci      goto fail;
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   struct lp_setup_variant *variant = CALLOC_STRUCT(lp_setup_variant);
637bf215546Sopenharmony_ci   if (!variant)
638bf215546Sopenharmony_ci      goto fail;
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   variant->no = setup_no++;
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   char func_name[64];
643bf215546Sopenharmony_ci   snprintf(func_name, sizeof(func_name), "setup_variant_%u",
644bf215546Sopenharmony_ci            variant->no);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   struct gallivm_state *gallivm;
647bf215546Sopenharmony_ci   variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL);
648bf215546Sopenharmony_ci   if (!variant->gallivm) {
649bf215546Sopenharmony_ci      goto fail;
650bf215546Sopenharmony_ci   }
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci   if (LP_DEBUG & DEBUG_COUNTERS) {
655bf215546Sopenharmony_ci      t0 = os_time_get();
656bf215546Sopenharmony_ci   }
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci   memcpy(&variant->key, key, key->size);
659bf215546Sopenharmony_ci   variant->list_item_global.base = variant;
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   /* Currently always deal with full 4-wide vertex attributes from
662bf215546Sopenharmony_ci    * the vertices.
663bf215546Sopenharmony_ci    */
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   LLVMTypeRef vec4f_type =
666bf215546Sopenharmony_ci      LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   LLVMTypeRef arg_types[8];
669bf215546Sopenharmony_ci   arg_types[0] = LLVMPointerType(vec4f_type, 0);        /* v0 */
670bf215546Sopenharmony_ci   arg_types[1] = LLVMPointerType(vec4f_type, 0);        /* v1 */
671bf215546Sopenharmony_ci   arg_types[2] = LLVMPointerType(vec4f_type, 0);        /* v2 */
672bf215546Sopenharmony_ci   arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
673bf215546Sopenharmony_ci   arg_types[4] = LLVMPointerType(vec4f_type, 0);	/* a0, aligned */
674bf215546Sopenharmony_ci   arg_types[5] = LLVMPointerType(vec4f_type, 0);	/* dadx, aligned */
675bf215546Sopenharmony_ci   arg_types[6] = LLVMPointerType(vec4f_type, 0);	/* dady, aligned */
676bf215546Sopenharmony_ci   arg_types[7] = LLVMPointerType(vec4f_type, 0);	/* key (placeholder) */
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci   LLVMTypeRef func_type =
679bf215546Sopenharmony_ci      LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
680bf215546Sopenharmony_ci                       arg_types, ARRAY_SIZE(arg_types), 0);
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci   variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
683bf215546Sopenharmony_ci   if (!variant->function)
684bf215546Sopenharmony_ci      goto fail;
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci   struct lp_setup_args args;
689bf215546Sopenharmony_ci   args.v0       = LLVMGetParam(variant->function, 0);
690bf215546Sopenharmony_ci   args.v1       = LLVMGetParam(variant->function, 1);
691bf215546Sopenharmony_ci   args.v2       = LLVMGetParam(variant->function, 2);
692bf215546Sopenharmony_ci   args.facing   = LLVMGetParam(variant->function, 3);
693bf215546Sopenharmony_ci   args.a0       = LLVMGetParam(variant->function, 4);
694bf215546Sopenharmony_ci   args.dadx     = LLVMGetParam(variant->function, 5);
695bf215546Sopenharmony_ci   args.dady     = LLVMGetParam(variant->function, 6);
696bf215546Sopenharmony_ci   args.key      = LLVMGetParam(variant->function, 7);
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci   lp_build_name(args.v0, "in_v0");
699bf215546Sopenharmony_ci   lp_build_name(args.v1, "in_v1");
700bf215546Sopenharmony_ci   lp_build_name(args.v2, "in_v2");
701bf215546Sopenharmony_ci   lp_build_name(args.facing, "in_facing");
702bf215546Sopenharmony_ci   lp_build_name(args.a0, "out_a0");
703bf215546Sopenharmony_ci   lp_build_name(args.dadx, "out_dadx");
704bf215546Sopenharmony_ci   lp_build_name(args.dady, "out_dady");
705bf215546Sopenharmony_ci   lp_build_name(args.key, "key");
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci   /*
708bf215546Sopenharmony_ci    * Function body
709bf215546Sopenharmony_ci    */
710bf215546Sopenharmony_ci   LLVMBasicBlockRef block =
711bf215546Sopenharmony_ci      LLVMAppendBasicBlockInContext(gallivm->context,
712bf215546Sopenharmony_ci                                    variant->function, "entry");
713bf215546Sopenharmony_ci   LLVMPositionBuilderAtEnd(builder, block);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
716bf215546Sopenharmony_ci   init_args(gallivm, &variant->key, &args);
717bf215546Sopenharmony_ci   emit_tri_coef(gallivm, &variant->key, &args);
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci   LLVMBuildRetVoid(builder);
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   gallivm_verify_function(gallivm, variant->function);
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci   gallivm_compile_module(gallivm);
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci   variant->jit_function = (lp_jit_setup_triangle)
726bf215546Sopenharmony_ci      gallivm_jit_function(gallivm, variant->function);
727bf215546Sopenharmony_ci   if (!variant->jit_function)
728bf215546Sopenharmony_ci      goto fail;
729bf215546Sopenharmony_ci
730bf215546Sopenharmony_ci   gallivm_free_ir(variant->gallivm);
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci   /*
733bf215546Sopenharmony_ci    * Update timing information:
734bf215546Sopenharmony_ci    */
735bf215546Sopenharmony_ci   if (LP_DEBUG & DEBUG_COUNTERS) {
736bf215546Sopenharmony_ci      t1 = os_time_get();
737bf215546Sopenharmony_ci      LP_COUNT_ADD(llvm_compile_time, t1 - t0);
738bf215546Sopenharmony_ci      LP_COUNT_ADD(nr_llvm_compiles, 1);
739bf215546Sopenharmony_ci   }
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   return variant;
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_cifail:
744bf215546Sopenharmony_ci   if (variant) {
745bf215546Sopenharmony_ci      if (variant->gallivm) {
746bf215546Sopenharmony_ci         gallivm_destroy(variant->gallivm);
747bf215546Sopenharmony_ci      }
748bf215546Sopenharmony_ci      FREE(variant);
749bf215546Sopenharmony_ci   }
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   return NULL;
752bf215546Sopenharmony_ci}
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_cistatic void
756bf215546Sopenharmony_cilp_make_setup_variant_key(const struct llvmpipe_context *lp,
757bf215546Sopenharmony_ci                          struct lp_setup_variant_key *key)
758bf215546Sopenharmony_ci{
759bf215546Sopenharmony_ci   const struct lp_fragment_shader *fs = lp->fs;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   assert(sizeof key->inputs[0] == sizeof(uint));
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   key->num_inputs = fs->info.base.num_inputs;
764bf215546Sopenharmony_ci   key->flatshade_first = lp->rasterizer->flatshade_first;
765bf215546Sopenharmony_ci   key->pixel_center_half = lp->rasterizer->half_pixel_center;
766bf215546Sopenharmony_ci   key->multisample = lp->rasterizer->multisample;
767bf215546Sopenharmony_ci   key->twoside = lp->rasterizer->light_twoside;
768bf215546Sopenharmony_ci   key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]);
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   key->color_slot = lp->color_slot[0];
771bf215546Sopenharmony_ci   key->bcolor_slot = lp->bcolor_slot[0];
772bf215546Sopenharmony_ci   key->spec_slot = lp->color_slot[1];
773bf215546Sopenharmony_ci   key->bspec_slot = lp->bcolor_slot[1];
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   /*
776bf215546Sopenharmony_ci    * If depth is floating point, depth bias is calculated with respect
777bf215546Sopenharmony_ci    * to the primitive's maximum Z value. Retain the original depth bias
778bf215546Sopenharmony_ci    * value until that stage.
779bf215546Sopenharmony_ci    */
780bf215546Sopenharmony_ci   key->floating_point_depth = lp->floating_point_depth;
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   if (key->floating_point_depth) {
783bf215546Sopenharmony_ci      key->pgon_offset_units = (float) lp->rasterizer->offset_units;
784bf215546Sopenharmony_ci   } else {
785bf215546Sopenharmony_ci      key->pgon_offset_units =
786bf215546Sopenharmony_ci         (float) (lp->rasterizer->offset_units * lp->mrd * 2);
787bf215546Sopenharmony_ci   }
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci   key->pgon_offset_scale = lp->rasterizer->offset_scale;
790bf215546Sopenharmony_ci   key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
791bf215546Sopenharmony_ci   key->uses_constant_interp = 0;
792bf215546Sopenharmony_ci   key->pad = 0;
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci   for (unsigned i = 0; i < key->num_inputs; i++) {
797bf215546Sopenharmony_ci      if (key->inputs[i].interp == LP_INTERP_COLOR) {
798bf215546Sopenharmony_ci         if (lp->rasterizer->flatshade)
799bf215546Sopenharmony_ci            key->inputs[i].interp = LP_INTERP_CONSTANT;
800bf215546Sopenharmony_ci         else
801bf215546Sopenharmony_ci            key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
802bf215546Sopenharmony_ci      }
803bf215546Sopenharmony_ci      if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
804bf215546Sopenharmony_ci         key->uses_constant_interp = 1;
805bf215546Sopenharmony_ci      }
806bf215546Sopenharmony_ci   }
807bf215546Sopenharmony_ci}
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci
810bf215546Sopenharmony_cistatic void
811bf215546Sopenharmony_ciremove_setup_variant(struct llvmpipe_context *lp,
812bf215546Sopenharmony_ci                     struct lp_setup_variant *variant)
813bf215546Sopenharmony_ci{
814bf215546Sopenharmony_ci   if (gallivm_debug & GALLIVM_DEBUG_IR) {
815bf215546Sopenharmony_ci      debug_printf("llvmpipe: del setup_variant #%u total %u\n",
816bf215546Sopenharmony_ci                   variant->no, lp->nr_setup_variants);
817bf215546Sopenharmony_ci   }
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   if (variant->gallivm) {
820bf215546Sopenharmony_ci      gallivm_destroy(variant->gallivm);
821bf215546Sopenharmony_ci   }
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci   list_del(&variant->list_item_global.list);
824bf215546Sopenharmony_ci   lp->nr_setup_variants--;
825bf215546Sopenharmony_ci   FREE(variant);
826bf215546Sopenharmony_ci}
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci/* When the number of setup variants exceeds a threshold, cull a
830bf215546Sopenharmony_ci * fraction (currently a quarter) of them.
831bf215546Sopenharmony_ci */
832bf215546Sopenharmony_cistatic void
833bf215546Sopenharmony_cicull_setup_variants(struct llvmpipe_context *lp)
834bf215546Sopenharmony_ci{
835bf215546Sopenharmony_ci   struct pipe_context *pipe = &lp->pipe;
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci   /*
838bf215546Sopenharmony_ci    * XXX: we need to flush the context until we have some sort of reference
839bf215546Sopenharmony_ci    * counting in fragment shaders as they may still be binned
840bf215546Sopenharmony_ci    * Flushing alone might not be sufficient we need to wait on it too.
841bf215546Sopenharmony_ci    */
842bf215546Sopenharmony_ci   llvmpipe_finish(pipe, __FUNCTION__);
843bf215546Sopenharmony_ci
844bf215546Sopenharmony_ci   for (int i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
845bf215546Sopenharmony_ci      struct lp_setup_variant_list_item *item;
846bf215546Sopenharmony_ci      if (list_is_empty(&lp->setup_variants_list.list)) {
847bf215546Sopenharmony_ci         break;
848bf215546Sopenharmony_ci      }
849bf215546Sopenharmony_ci      item = list_last_entry(&lp->setup_variants_list.list,
850bf215546Sopenharmony_ci                             struct lp_setup_variant_list_item, list);
851bf215546Sopenharmony_ci      assert(item);
852bf215546Sopenharmony_ci      assert(item->base);
853bf215546Sopenharmony_ci      remove_setup_variant(lp, item->base);
854bf215546Sopenharmony_ci   }
855bf215546Sopenharmony_ci}
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci/**
859bf215546Sopenharmony_ci * Update fragment/vertex shader linkage state.  This is called just
860bf215546Sopenharmony_ci * prior to drawing something when some fragment-related state has
861bf215546Sopenharmony_ci * changed.
862bf215546Sopenharmony_ci */
863bf215546Sopenharmony_civoid
864bf215546Sopenharmony_cillvmpipe_update_setup(struct llvmpipe_context *lp)
865bf215546Sopenharmony_ci{
866bf215546Sopenharmony_ci   struct lp_setup_variant_key *key = &lp->setup_variant.key;
867bf215546Sopenharmony_ci   struct lp_setup_variant *variant = NULL;
868bf215546Sopenharmony_ci   struct lp_setup_variant_list_item *li;
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci   lp_make_setup_variant_key(lp, key);
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   LIST_FOR_EACH_ENTRY(li, &lp->setup_variants_list.list, list) {
873bf215546Sopenharmony_ci      if (li->base->key.size == key->size &&
874bf215546Sopenharmony_ci         memcmp(&li->base->key, key, key->size) == 0) {
875bf215546Sopenharmony_ci         variant = li->base;
876bf215546Sopenharmony_ci         break;
877bf215546Sopenharmony_ci      }
878bf215546Sopenharmony_ci   }
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   if (variant) {
881bf215546Sopenharmony_ci      list_move_to(&variant->list_item_global.list, &lp->setup_variants_list.list);
882bf215546Sopenharmony_ci   } else {
883bf215546Sopenharmony_ci      if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
884bf215546Sopenharmony_ci         cull_setup_variants(lp);
885bf215546Sopenharmony_ci      }
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci      variant = generate_setup_variant(key, lp);
888bf215546Sopenharmony_ci      if (variant) {
889bf215546Sopenharmony_ci         list_add(&variant->list_item_global.list, &lp->setup_variants_list.list);
890bf215546Sopenharmony_ci         lp->nr_setup_variants++;
891bf215546Sopenharmony_ci      }
892bf215546Sopenharmony_ci   }
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci   lp_setup_set_setup_variant(lp->setup, variant);
895bf215546Sopenharmony_ci}
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci
898bf215546Sopenharmony_civoid
899bf215546Sopenharmony_cilp_delete_setup_variants(struct llvmpipe_context *lp)
900bf215546Sopenharmony_ci{
901bf215546Sopenharmony_ci   struct lp_setup_variant_list_item *li, *next;
902bf215546Sopenharmony_ci   LIST_FOR_EACH_ENTRY_SAFE(li, next, &lp->setup_variants_list.list, list) {
903bf215546Sopenharmony_ci      remove_setup_variant(lp, li->base);
904bf215546Sopenharmony_ci   }
905bf215546Sopenharmony_ci}
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_civoid
909bf215546Sopenharmony_cilp_dump_setup_coef(const struct lp_setup_variant_key *key,
910bf215546Sopenharmony_ci                   const float (*sa0)[4],
911bf215546Sopenharmony_ci                   const float (*sdadx)[4],
912bf215546Sopenharmony_ci                   const float (*sdady)[4])
913bf215546Sopenharmony_ci{
914bf215546Sopenharmony_ci   for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
915bf215546Sopenharmony_ci      float a0   = sa0  [0][i];
916bf215546Sopenharmony_ci      float dadx = sdadx[0][i];
917bf215546Sopenharmony_ci      float dady = sdady[0][i];
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci      debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
920bf215546Sopenharmony_ci                   "xyzw"[i], a0, dadx, dady);
921bf215546Sopenharmony_ci   }
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   for (int slot = 0; slot < key->num_inputs; slot++) {
924bf215546Sopenharmony_ci      unsigned usage_mask = key->inputs[slot].usage_mask;
925bf215546Sopenharmony_ci      for (int i = 0; i < TGSI_NUM_CHANNELS; i++) {
926bf215546Sopenharmony_ci         if (usage_mask & (1 << i)) {
927bf215546Sopenharmony_ci            float a0   = sa0  [1 + slot][i];
928bf215546Sopenharmony_ci            float dadx = sdadx[1 + slot][i];
929bf215546Sopenharmony_ci            float dady = sdady[1 + slot][i];
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci            debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
932bf215546Sopenharmony_ci                         slot, "xyzw"[i], a0, dadx, dady);
933bf215546Sopenharmony_ci         }
934bf215546Sopenharmony_ci      }
935bf215546Sopenharmony_ci   }
936bf215546Sopenharmony_ci}
937