1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2010 VMware. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "util/u_math.h" 30bf215546Sopenharmony_ci#include "util/u_memory.h" 31bf215546Sopenharmony_ci#include "util/os_time.h" 32bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h" 33bf215546Sopenharmony_ci#include "gallivm/lp_bld_bitarit.h" 34bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h" 35bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h" 36bf215546Sopenharmony_ci#include "gallivm/lp_bld_init.h" 37bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h" 38bf215546Sopenharmony_ci#include "gallivm/lp_bld_intr.h" 39bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h" 40bf215546Sopenharmony_ci#include "gallivm/lp_bld_type.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "lp_perf.h" 43bf215546Sopenharmony_ci#include "lp_debug.h" 44bf215546Sopenharmony_ci#include "lp_flush.h" 45bf215546Sopenharmony_ci#include "lp_screen.h" 46bf215546Sopenharmony_ci#include "lp_context.h" 47bf215546Sopenharmony_ci#include "lp_state.h" 48bf215546Sopenharmony_ci#include "lp_state_fs.h" 49bf215546Sopenharmony_ci#include "lp_state_setup.h" 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci/** Setup shader number (for debugging) */ 53bf215546Sopenharmony_cistatic unsigned setup_no = 0; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci/* currently organized to interpolate full float[4] attributes even 57bf215546Sopenharmony_ci * when some elements are unused. Later, can pack vertex data more 58bf215546Sopenharmony_ci * closely. 59bf215546Sopenharmony_ci */ 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistruct lp_setup_args 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci /* Function arguments: 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci LLVMValueRef v0; 67bf215546Sopenharmony_ci LLVMValueRef v1; 68bf215546Sopenharmony_ci LLVMValueRef v2; 69bf215546Sopenharmony_ci LLVMValueRef facing; /* boolean */ 70bf215546Sopenharmony_ci LLVMValueRef a0; 71bf215546Sopenharmony_ci LLVMValueRef dadx; 72bf215546Sopenharmony_ci LLVMValueRef dady; 73bf215546Sopenharmony_ci LLVMValueRef key; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* Derived: 76bf215546Sopenharmony_ci */ 77bf215546Sopenharmony_ci LLVMValueRef x0_center; 78bf215546Sopenharmony_ci LLVMValueRef y0_center; 79bf215546Sopenharmony_ci LLVMValueRef dy20_ooa; 80bf215546Sopenharmony_ci LLVMValueRef dy01_ooa; 81bf215546Sopenharmony_ci LLVMValueRef dx20_ooa; 82bf215546Sopenharmony_ci LLVMValueRef dx01_ooa; 83bf215546Sopenharmony_ci struct lp_build_context bld; 84bf215546Sopenharmony_ci}; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistatic void 88bf215546Sopenharmony_cistore_coef(struct gallivm_state *gallivm, 89bf215546Sopenharmony_ci const struct lp_setup_args *args, 90bf215546Sopenharmony_ci unsigned slot, 91bf215546Sopenharmony_ci LLVMValueRef a0, 92bf215546Sopenharmony_ci LLVMValueRef dadx, 93bf215546Sopenharmony_ci LLVMValueRef dady) 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 96bf215546Sopenharmony_ci LLVMValueRef idx = lp_build_const_int32(gallivm, slot); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci LLVMBuildStore(builder, 99bf215546Sopenharmony_ci a0, 100bf215546Sopenharmony_ci LLVMBuildGEP(builder, args->a0, &idx, 1, "")); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci LLVMBuildStore(builder, 103bf215546Sopenharmony_ci dadx, 104bf215546Sopenharmony_ci LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci LLVMBuildStore(builder, 107bf215546Sopenharmony_ci dady, 108bf215546Sopenharmony_ci LLVMBuildGEP(builder, args->dady, &idx, 1, "")); 109bf215546Sopenharmony_ci} 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_cistatic void 113bf215546Sopenharmony_ciemit_constant_coef4(struct gallivm_state *gallivm, 114bf215546Sopenharmony_ci const struct lp_setup_args *args, 115bf215546Sopenharmony_ci unsigned slot, 116bf215546Sopenharmony_ci LLVMValueRef vert) 117bf215546Sopenharmony_ci{ 118bf215546Sopenharmony_ci store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero); 119bf215546Sopenharmony_ci} 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci/** 123bf215546Sopenharmony_ci * Setup the fragment input attribute with the front-facing value. 124bf215546Sopenharmony_ci * \param frontface is the triangle front facing? 125bf215546Sopenharmony_ci */ 126bf215546Sopenharmony_cistatic void 127bf215546Sopenharmony_ciemit_facing_coef(struct gallivm_state *gallivm, 128bf215546Sopenharmony_ci struct lp_setup_args *args, 129bf215546Sopenharmony_ci unsigned slot) 130bf215546Sopenharmony_ci{ 131bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 132bf215546Sopenharmony_ci LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 133bf215546Sopenharmony_ci LLVMValueRef a0_0 = args->facing; 134bf215546Sopenharmony_ci LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); 135bf215546Sopenharmony_ci LLVMValueRef a0, face_val; 136bf215546Sopenharmony_ci const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, 137bf215546Sopenharmony_ci PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 }; 138bf215546Sopenharmony_ci /* Our face val is either 1 or 0 so we do 139bf215546Sopenharmony_ci * face = (val * 2) - 1 140bf215546Sopenharmony_ci * to make it 1 or -1 141bf215546Sopenharmony_ci */ 142bf215546Sopenharmony_ci face_val = 143bf215546Sopenharmony_ci LLVMBuildFAdd(builder, 144bf215546Sopenharmony_ci LLVMBuildFMul(builder, a0_0f, 145bf215546Sopenharmony_ci lp_build_const_float(gallivm, 2.0), 146bf215546Sopenharmony_ci ""), 147bf215546Sopenharmony_ci lp_build_const_float(gallivm, -1.0), 148bf215546Sopenharmony_ci "facing"); 149bf215546Sopenharmony_ci face_val = lp_build_broadcast_scalar(&args->bld, face_val); 150bf215546Sopenharmony_ci a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero); 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic LLVMValueRef 157bf215546Sopenharmony_civert_attrib(struct gallivm_state *gallivm, 158bf215546Sopenharmony_ci LLVMValueRef vert, 159bf215546Sopenharmony_ci int attr, 160bf215546Sopenharmony_ci int elem, 161bf215546Sopenharmony_ci const char *name) 162bf215546Sopenharmony_ci{ 163bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 164bf215546Sopenharmony_ci LLVMValueRef idx[2]; 165bf215546Sopenharmony_ci idx[0] = lp_build_const_int32(gallivm, attr); 166bf215546Sopenharmony_ci idx[1] = lp_build_const_int32(gallivm, elem); 167bf215546Sopenharmony_ci return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_cistatic void 172bf215546Sopenharmony_cilp_twoside(struct gallivm_state *gallivm, 173bf215546Sopenharmony_ci struct lp_setup_args *args, 174bf215546Sopenharmony_ci const struct lp_setup_variant_key *key, 175bf215546Sopenharmony_ci int bcolor_slot, 176bf215546Sopenharmony_ci LLVMValueRef attribv[3]) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 179bf215546Sopenharmony_ci LLVMValueRef a0_back, a1_back, a2_back; 180bf215546Sopenharmony_ci LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci LLVMValueRef facing = args->facing; 183bf215546Sopenharmony_ci LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, 184bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); 187bf215546Sopenharmony_ci a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); 188bf215546Sopenharmony_ci a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back"); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* Possibly swap the front and back attrib values, 191bf215546Sopenharmony_ci * 192bf215546Sopenharmony_ci * Prefer select to if so we don't have to worry about phis or 193bf215546Sopenharmony_ci * allocas. 194bf215546Sopenharmony_ci */ 195bf215546Sopenharmony_ci attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); 196bf215546Sopenharmony_ci attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); 197bf215546Sopenharmony_ci attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic LLVMValueRef 202bf215546Sopenharmony_cilp_do_offset_tri(struct gallivm_state *gallivm, 203bf215546Sopenharmony_ci struct lp_setup_args *args, 204bf215546Sopenharmony_ci const struct lp_setup_variant_key *key, 205bf215546Sopenharmony_ci LLVMValueRef inv_det, 206bf215546Sopenharmony_ci LLVMValueRef dxyz01, 207bf215546Sopenharmony_ci LLVMValueRef dxyz20, 208bf215546Sopenharmony_ci LLVMValueRef attribv[3]) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 211bf215546Sopenharmony_ci struct lp_build_context flt_scalar_bld; 212bf215546Sopenharmony_ci struct lp_build_context int_scalar_bld; 213bf215546Sopenharmony_ci struct lp_build_context *bld = &args->bld; 214bf215546Sopenharmony_ci LLVMValueRef zoffset, mult; 215bf215546Sopenharmony_ci LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; 216bf215546Sopenharmony_ci LLVMValueRef max, max_value, res12; 217bf215546Sopenharmony_ci LLVMValueRef shuffles[4]; 218bf215546Sopenharmony_ci LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 219bf215546Sopenharmony_ci LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 220bf215546Sopenharmony_ci LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 221bf215546Sopenharmony_ci LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); 222bf215546Sopenharmony_ci LLVMValueRef threei = lp_build_const_int32(gallivm, 3); 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci /* (res12) = cross(e,f).xy */ 225bf215546Sopenharmony_ci shuffles[0] = twoi; 226bf215546Sopenharmony_ci shuffles[1] = zeroi; 227bf215546Sopenharmony_ci shuffles[2] = onei; 228bf215546Sopenharmony_ci shuffles[3] = twoi; 229bf215546Sopenharmony_ci dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci shuffles[0] = onei; 232bf215546Sopenharmony_ci shuffles[1] = twoi; 233bf215546Sopenharmony_ci shuffles[2] = twoi; 234bf215546Sopenharmony_ci shuffles[3] = zeroi; 235bf215546Sopenharmony_ci dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci shuffles[0] = twoi; 240bf215546Sopenharmony_ci shuffles[1] = threei; 241bf215546Sopenharmony_ci shuffles[2] = LLVMGetUndef(shuf_type); 242bf215546Sopenharmony_ci shuffles[3] = LLVMGetUndef(shuf_type); 243bf215546Sopenharmony_ci dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, 244bf215546Sopenharmony_ci LLVMConstVector(shuffles, 4), ""); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ 249bf215546Sopenharmony_ci dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); 250bf215546Sopenharmony_ci dzdxdzdy = lp_build_abs(bld, dzdxdzdy); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); 253bf215546Sopenharmony_ci dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */ 256bf215546Sopenharmony_ci max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); 257bf215546Sopenharmony_ci max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci mult = LLVMBuildFMul(b, max_value, 260bf215546Sopenharmony_ci lp_build_const_float(gallivm, 261bf215546Sopenharmony_ci key->pgon_offset_scale), ""); 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32)); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci if (key->floating_point_depth) { 266bf215546Sopenharmony_ci /* 267bf215546Sopenharmony_ci * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) - 268bf215546Sopenharmony_ci * mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale 269bf215546Sopenharmony_ci * 270bf215546Sopenharmony_ci * NOTE: Assumes IEEE float32. 271bf215546Sopenharmony_ci */ 272bf215546Sopenharmony_ci LLVMValueRef c23_shifted, exp_mask, bias, exp; 273bf215546Sopenharmony_ci LLVMValueRef maxz_value, maxz0z1_value; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32)); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci c23_shifted = lp_build_const_int32(gallivm, 23 << 23); 278bf215546Sopenharmony_ci exp_mask = lp_build_const_int32(gallivm, 0xff << 23); 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci maxz0z1_value = lp_build_max(&flt_scalar_bld, 281bf215546Sopenharmony_ci lp_build_abs(&flt_scalar_bld, 282bf215546Sopenharmony_ci LLVMBuildExtractElement(b, attribv[0], twoi, "")), 283bf215546Sopenharmony_ci lp_build_abs(&flt_scalar_bld, 284bf215546Sopenharmony_ci LLVMBuildExtractElement(b, attribv[1], twoi, ""))); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci maxz_value = lp_build_max(&flt_scalar_bld, 287bf215546Sopenharmony_ci lp_build_abs(&flt_scalar_bld, 288bf215546Sopenharmony_ci LLVMBuildExtractElement(b, attribv[2], twoi, "")), 289bf215546Sopenharmony_ci maxz0z1_value); 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); 292bf215546Sopenharmony_ci exp = lp_build_and(&int_scalar_bld, exp, exp_mask); 293bf215546Sopenharmony_ci exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted); 294bf215546Sopenharmony_ci /* Clamping to zero means mrd will be zero for very small numbers, 295bf215546Sopenharmony_ci * but specs do not indicate this should be prevented by clamping 296bf215546Sopenharmony_ci * mrd to smallest normal number instead. */ 297bf215546Sopenharmony_ci exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero); 298bf215546Sopenharmony_ci exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, ""); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci bias = LLVMBuildFMul(b, exp, 301bf215546Sopenharmony_ci lp_build_const_float(gallivm, key->pgon_offset_units), 302bf215546Sopenharmony_ci "bias"); 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset"); 305bf215546Sopenharmony_ci } else { 306bf215546Sopenharmony_ci /* 307bf215546Sopenharmony_ci * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale 308bf215546Sopenharmony_ci */ 309bf215546Sopenharmony_ci zoffset = LLVMBuildFAdd(b, 310bf215546Sopenharmony_ci lp_build_const_float(gallivm, key->pgon_offset_units), 311bf215546Sopenharmony_ci mult, "zoffset"); 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci if (key->pgon_offset_clamp > 0) { 315bf215546Sopenharmony_ci zoffset = lp_build_min(&flt_scalar_bld, 316bf215546Sopenharmony_ci lp_build_const_float(gallivm, key->pgon_offset_clamp), 317bf215546Sopenharmony_ci zoffset); 318bf215546Sopenharmony_ci } else if (key->pgon_offset_clamp < 0) { 319bf215546Sopenharmony_ci zoffset = lp_build_max(&flt_scalar_bld, 320bf215546Sopenharmony_ci lp_build_const_float(gallivm, key->pgon_offset_clamp), 321bf215546Sopenharmony_ci zoffset); 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci return zoffset; 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_cistatic void 329bf215546Sopenharmony_ciload_attribute(struct gallivm_state *gallivm, 330bf215546Sopenharmony_ci struct lp_setup_args *args, 331bf215546Sopenharmony_ci const struct lp_setup_variant_key *key, 332bf215546Sopenharmony_ci unsigned vert_attr, 333bf215546Sopenharmony_ci LLVMValueRef attribv[3]) 334bf215546Sopenharmony_ci{ 335bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 336bf215546Sopenharmony_ci LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* Load the vertex data 339bf215546Sopenharmony_ci */ 340bf215546Sopenharmony_ci attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); 341bf215546Sopenharmony_ci attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); 342bf215546Sopenharmony_ci attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci /* Potentially modify it according to twoside, etc: 345bf215546Sopenharmony_ci */ 346bf215546Sopenharmony_ci if (key->twoside) { 347bf215546Sopenharmony_ci if (vert_attr == key->color_slot && key->bcolor_slot >= 0) 348bf215546Sopenharmony_ci lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); 349bf215546Sopenharmony_ci else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) 350bf215546Sopenharmony_ci lp_twoside(gallivm, args, key, key->bspec_slot, attribv); 351bf215546Sopenharmony_ci } 352bf215546Sopenharmony_ci} 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci/* 356bf215546Sopenharmony_ci * FIXME: interpolation is always done wrt fb origin (0/0). 357bf215546Sopenharmony_ci * However, if some (small) tri is far away from the origin and gradients 358bf215546Sopenharmony_ci * are large, this can lead to HUGE errors, since the a0 value calculated 359bf215546Sopenharmony_ci * here can get very large (with the actual values inside the triangle way 360bf215546Sopenharmony_ci * smaller), leading to complete loss of accuracy. This could be prevented 361bf215546Sopenharmony_ci * by using some point inside (or at corner) of the tri as interpolation 362bf215546Sopenharmony_ci * origin, or just use barycentric interpolation (which GL suggests and is 363bf215546Sopenharmony_ci * what real hw does - you can get the barycentric coordinates from the 364bf215546Sopenharmony_ci * edge functions in rasterization in principle (though we skip these 365bf215546Sopenharmony_ci * sometimes completely in case of tris covering a block fully, 366bf215546Sopenharmony_ci * which obviously wouldn't work)). 367bf215546Sopenharmony_ci */ 368bf215546Sopenharmony_cistatic void 369bf215546Sopenharmony_cicalc_coef4(struct gallivm_state *gallivm, 370bf215546Sopenharmony_ci struct lp_setup_args *args, 371bf215546Sopenharmony_ci LLVMValueRef a0, 372bf215546Sopenharmony_ci LLVMValueRef a1, 373bf215546Sopenharmony_ci LLVMValueRef a2, 374bf215546Sopenharmony_ci LLVMValueRef out[3]) 375bf215546Sopenharmony_ci{ 376bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 377bf215546Sopenharmony_ci LLVMValueRef attr_0; 378bf215546Sopenharmony_ci LLVMValueRef dy20_ooa = args->dy20_ooa; 379bf215546Sopenharmony_ci LLVMValueRef dy01_ooa = args->dy01_ooa; 380bf215546Sopenharmony_ci LLVMValueRef dx20_ooa = args->dx20_ooa; 381bf215546Sopenharmony_ci LLVMValueRef dx01_ooa = args->dx01_ooa; 382bf215546Sopenharmony_ci LLVMValueRef x0_center = args->x0_center; 383bf215546Sopenharmony_ci LLVMValueRef y0_center = args->y0_center; 384bf215546Sopenharmony_ci LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); 385bf215546Sopenharmony_ci LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci /* Calculate dadx (vec4f) 388bf215546Sopenharmony_ci */ 389bf215546Sopenharmony_ci LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); 390bf215546Sopenharmony_ci LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); 391bf215546Sopenharmony_ci LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci /* Calculate dady (vec4f) 394bf215546Sopenharmony_ci */ 395bf215546Sopenharmony_ci LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); 396bf215546Sopenharmony_ci LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); 397bf215546Sopenharmony_ci LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci /* Calculate a0 - the attribute value at the origin 400bf215546Sopenharmony_ci */ 401bf215546Sopenharmony_ci LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); 402bf215546Sopenharmony_ci LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); 403bf215546Sopenharmony_ci LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); 404bf215546Sopenharmony_ci attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci out[0] = attr_0; 407bf215546Sopenharmony_ci out[1] = dadx; 408bf215546Sopenharmony_ci out[2] = dady; 409bf215546Sopenharmony_ci} 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_cistatic void 413bf215546Sopenharmony_ciemit_coef4(struct gallivm_state *gallivm, 414bf215546Sopenharmony_ci struct lp_setup_args *args, 415bf215546Sopenharmony_ci unsigned slot, 416bf215546Sopenharmony_ci LLVMValueRef a0, 417bf215546Sopenharmony_ci LLVMValueRef a1, 418bf215546Sopenharmony_ci LLVMValueRef a2) 419bf215546Sopenharmony_ci{ 420bf215546Sopenharmony_ci LLVMValueRef coeffs[3]; 421bf215546Sopenharmony_ci calc_coef4(gallivm, args, a0, a1, a2, coeffs); 422bf215546Sopenharmony_ci store_coef(gallivm, args, slot, coeffs[0], coeffs[1], coeffs[2]); 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_cistatic void 427bf215546Sopenharmony_ciemit_linear_coef(struct gallivm_state *gallivm, 428bf215546Sopenharmony_ci struct lp_setup_args *args, 429bf215546Sopenharmony_ci unsigned slot, 430bf215546Sopenharmony_ci LLVMValueRef attribv[3]) 431bf215546Sopenharmony_ci{ 432bf215546Sopenharmony_ci /* nothing to do anymore */ 433bf215546Sopenharmony_ci emit_coef4(gallivm, args, slot, attribv[0], attribv[1], attribv[2]); 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci/** 438bf215546Sopenharmony_ci * Compute a0, dadx and dady for a perspective-corrected interpolant, 439bf215546Sopenharmony_ci * for a triangle. 440bf215546Sopenharmony_ci * We basically multiply the vertex value by 1/w before computing 441bf215546Sopenharmony_ci * the plane coefficients (a0, dadx, dady). 442bf215546Sopenharmony_ci * Later, when we compute the value at a particular fragment position we'll 443bf215546Sopenharmony_ci * divide the interpolated value by the interpolated W at that fragment. 444bf215546Sopenharmony_ci */ 445bf215546Sopenharmony_cistatic void 446bf215546Sopenharmony_ciapply_perspective_corr(struct gallivm_state *gallivm, 447bf215546Sopenharmony_ci struct lp_setup_args *args, 448bf215546Sopenharmony_ci unsigned slot, 449bf215546Sopenharmony_ci LLVMValueRef attribv[3]) 450bf215546Sopenharmony_ci{ 451bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci /* premultiply by 1/w (v[0][3] is always 1/w): 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld, 456bf215546Sopenharmony_ci vert_attrib(gallivm, args->v0, 0, 3, "v0_oow")); 457bf215546Sopenharmony_ci LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld, 458bf215546Sopenharmony_ci vert_attrib(gallivm, args->v1, 0, 3, "v1_oow")); 459bf215546Sopenharmony_ci LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld, 460bf215546Sopenharmony_ci vert_attrib(gallivm, args->v2, 0, 3, "v2_oow")); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); 463bf215546Sopenharmony_ci attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); 464bf215546Sopenharmony_ci attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); 465bf215546Sopenharmony_ci} 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci/** 469bf215546Sopenharmony_ci * Compute the inputs-> dadx, dady, a0 values. 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_cistatic void 472bf215546Sopenharmony_ciemit_tri_coef(struct gallivm_state *gallivm, 473bf215546Sopenharmony_ci const struct lp_setup_variant_key *key, 474bf215546Sopenharmony_ci struct lp_setup_args *args) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci LLVMValueRef attribs[3]; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci /* setup interpolation for all the remaining attributes */ 479bf215546Sopenharmony_ci for (unsigned slot = 0; slot < key->num_inputs; slot++) { 480bf215546Sopenharmony_ci switch (key->inputs[slot].interp) { 481bf215546Sopenharmony_ci case LP_INTERP_CONSTANT: 482bf215546Sopenharmony_ci load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 483bf215546Sopenharmony_ci if (key->flatshade_first) { 484bf215546Sopenharmony_ci emit_constant_coef4(gallivm, args, slot+1, attribs[0]); 485bf215546Sopenharmony_ci } else { 486bf215546Sopenharmony_ci emit_constant_coef4(gallivm, args, slot+1, attribs[2]); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci break; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci case LP_INTERP_LINEAR: 491bf215546Sopenharmony_ci load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 492bf215546Sopenharmony_ci emit_linear_coef(gallivm, args, slot+1, attribs); 493bf215546Sopenharmony_ci break; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci case LP_INTERP_PERSPECTIVE: 496bf215546Sopenharmony_ci load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 497bf215546Sopenharmony_ci apply_perspective_corr(gallivm, args, slot+1, attribs); 498bf215546Sopenharmony_ci emit_linear_coef(gallivm, args, slot+1, attribs); 499bf215546Sopenharmony_ci break; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci case LP_INTERP_POSITION: 502bf215546Sopenharmony_ci /* 503bf215546Sopenharmony_ci * The generated pixel interpolators will pick up the coeffs from 504bf215546Sopenharmony_ci * slot 0. 505bf215546Sopenharmony_ci */ 506bf215546Sopenharmony_ci break; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci case LP_INTERP_FACING: 509bf215546Sopenharmony_ci emit_facing_coef(gallivm, args, slot+1); 510bf215546Sopenharmony_ci break; 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci default: 513bf215546Sopenharmony_ci assert(0); 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci } 516bf215546Sopenharmony_ci} 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci/* XXX: generic code: 520bf215546Sopenharmony_ci */ 521bf215546Sopenharmony_cistatic void 522bf215546Sopenharmony_ciset_noalias(LLVMBuilderRef builder, 523bf215546Sopenharmony_ci LLVMValueRef function, 524bf215546Sopenharmony_ci const LLVMTypeRef *arg_types, 525bf215546Sopenharmony_ci int nr_args) 526bf215546Sopenharmony_ci{ 527bf215546Sopenharmony_ci for (int i = 0; i < nr_args; ++i) { 528bf215546Sopenharmony_ci if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { 529bf215546Sopenharmony_ci lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci} 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_cistatic void 536bf215546Sopenharmony_ciinit_args(struct gallivm_state *gallivm, 537bf215546Sopenharmony_ci const struct lp_setup_variant_key *key, 538bf215546Sopenharmony_ci struct lp_setup_args *args) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci LLVMBuilderRef b = gallivm->builder; 541bf215546Sopenharmony_ci LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 542bf215546Sopenharmony_ci LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); 543bf215546Sopenharmony_ci LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 544bf215546Sopenharmony_ci LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 545bf215546Sopenharmony_ci LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; 546bf215546Sopenharmony_ci LLVMValueRef e, f, ef, ooa; 547bf215546Sopenharmony_ci LLVMValueRef shuffles[4], shuf10; 548bf215546Sopenharmony_ci LLVMValueRef attr_pos[3]; 549bf215546Sopenharmony_ci LLVMValueRef polygon_offset; 550bf215546Sopenharmony_ci struct lp_type typef4 = lp_type_float_vec(32, 128); 551bf215546Sopenharmony_ci struct lp_build_context bld; 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci lp_build_context_init(&bld, gallivm, typef4); 554bf215546Sopenharmony_ci args->bld = bld; 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci /* The internal position input is in slot zero: 557bf215546Sopenharmony_ci */ 558bf215546Sopenharmony_ci load_attribute(gallivm, args, key, 0, attr_pos); 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci pixel_center = lp_build_const_vec(gallivm, typef4, 561bf215546Sopenharmony_ci (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* 564bf215546Sopenharmony_ci * xy are first two elems in v0a/v1a/v2a but just use vec4 arit 565bf215546Sopenharmony_ci * also offset_tri uses actually xyz in them 566bf215546Sopenharmony_ci */ 567bf215546Sopenharmony_ci xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); 570bf215546Sopenharmony_ci dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci shuffles[0] = onei; 573bf215546Sopenharmony_ci shuffles[1] = zeroi; 574bf215546Sopenharmony_ci shuffles[2] = LLVMGetUndef(shuf_type); 575bf215546Sopenharmony_ci shuffles[3] = LLVMGetUndef(shuf_type); 576bf215546Sopenharmony_ci shuf10 = LLVMConstVector(shuffles, 4); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); 581bf215546Sopenharmony_ci e = LLVMBuildExtractElement(b, ef, zeroi, ""); 582bf215546Sopenharmony_ci f = LLVMBuildExtractElement(b, ef, onei, ""); 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci ooa = lp_build_broadcast_scalar(&bld, ooa); 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* tri offset calc shares a lot of arithmetic, do it here */ 589bf215546Sopenharmony_ci if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { 590bf215546Sopenharmony_ci polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); 591bf215546Sopenharmony_ci } else { 592bf215546Sopenharmony_ci polygon_offset = lp_build_const_float(gallivm, 0.0f); 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); 596bf215546Sopenharmony_ci dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); 599bf215546Sopenharmony_ci args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); 602bf215546Sopenharmony_ci args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); 605bf215546Sopenharmony_ci args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci LLVMValueRef coeffs[3]; 608bf215546Sopenharmony_ci calc_coef4(gallivm, args, attr_pos[0], attr_pos[1], attr_pos[2], coeffs); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci /* This is a bit sneaky: 611bf215546Sopenharmony_ci * Because we observe that the X component of A0 is otherwise unused, 612bf215546Sopenharmony_ci * we can overwrite it with the computed polygon-offset value, to make 613bf215546Sopenharmony_ci * sure it's available in the fragment shader without having to change 614bf215546Sopenharmony_ci * the interface (which is error-prone). 615bf215546Sopenharmony_ci */ 616bf215546Sopenharmony_ci coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset, 617bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 0), ""); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci store_coef(gallivm, args, 0, coeffs[0], coeffs[1], coeffs[2]); 620bf215546Sopenharmony_ci} 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci/** 624bf215546Sopenharmony_ci * Generate the runtime callable function for the coefficient calculation. 625bf215546Sopenharmony_ci * 626bf215546Sopenharmony_ci */ 627bf215546Sopenharmony_cistatic struct lp_setup_variant * 628bf215546Sopenharmony_cigenerate_setup_variant(struct lp_setup_variant_key *key, 629bf215546Sopenharmony_ci struct llvmpipe_context *lp) 630bf215546Sopenharmony_ci{ 631bf215546Sopenharmony_ci int64_t t0 = 0, t1; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci if (0) 634bf215546Sopenharmony_ci goto fail; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci struct lp_setup_variant *variant = CALLOC_STRUCT(lp_setup_variant); 637bf215546Sopenharmony_ci if (!variant) 638bf215546Sopenharmony_ci goto fail; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci variant->no = setup_no++; 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci char func_name[64]; 643bf215546Sopenharmony_ci snprintf(func_name, sizeof(func_name), "setup_variant_%u", 644bf215546Sopenharmony_ci variant->no); 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci struct gallivm_state *gallivm; 647bf215546Sopenharmony_ci variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL); 648bf215546Sopenharmony_ci if (!variant->gallivm) { 649bf215546Sopenharmony_ci goto fail; 650bf215546Sopenharmony_ci } 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci if (LP_DEBUG & DEBUG_COUNTERS) { 655bf215546Sopenharmony_ci t0 = os_time_get(); 656bf215546Sopenharmony_ci } 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci memcpy(&variant->key, key, key->size); 659bf215546Sopenharmony_ci variant->list_item_global.base = variant; 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci /* Currently always deal with full 4-wide vertex attributes from 662bf215546Sopenharmony_ci * the vertices. 663bf215546Sopenharmony_ci */ 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci LLVMTypeRef vec4f_type = 666bf215546Sopenharmony_ci LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci LLVMTypeRef arg_types[8]; 669bf215546Sopenharmony_ci arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ 670bf215546Sopenharmony_ci arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ 671bf215546Sopenharmony_ci arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ 672bf215546Sopenharmony_ci arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ 673bf215546Sopenharmony_ci arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ 674bf215546Sopenharmony_ci arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ 675bf215546Sopenharmony_ci arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ 676bf215546Sopenharmony_ci arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key (placeholder) */ 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci LLVMTypeRef func_type = 679bf215546Sopenharmony_ci LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), 680bf215546Sopenharmony_ci arg_types, ARRAY_SIZE(arg_types), 0); 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); 683bf215546Sopenharmony_ci if (!variant->function) 684bf215546Sopenharmony_ci goto fail; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci struct lp_setup_args args; 689bf215546Sopenharmony_ci args.v0 = LLVMGetParam(variant->function, 0); 690bf215546Sopenharmony_ci args.v1 = LLVMGetParam(variant->function, 1); 691bf215546Sopenharmony_ci args.v2 = LLVMGetParam(variant->function, 2); 692bf215546Sopenharmony_ci args.facing = LLVMGetParam(variant->function, 3); 693bf215546Sopenharmony_ci args.a0 = LLVMGetParam(variant->function, 4); 694bf215546Sopenharmony_ci args.dadx = LLVMGetParam(variant->function, 5); 695bf215546Sopenharmony_ci args.dady = LLVMGetParam(variant->function, 6); 696bf215546Sopenharmony_ci args.key = LLVMGetParam(variant->function, 7); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci lp_build_name(args.v0, "in_v0"); 699bf215546Sopenharmony_ci lp_build_name(args.v1, "in_v1"); 700bf215546Sopenharmony_ci lp_build_name(args.v2, "in_v2"); 701bf215546Sopenharmony_ci lp_build_name(args.facing, "in_facing"); 702bf215546Sopenharmony_ci lp_build_name(args.a0, "out_a0"); 703bf215546Sopenharmony_ci lp_build_name(args.dadx, "out_dadx"); 704bf215546Sopenharmony_ci lp_build_name(args.dady, "out_dady"); 705bf215546Sopenharmony_ci lp_build_name(args.key, "key"); 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci /* 708bf215546Sopenharmony_ci * Function body 709bf215546Sopenharmony_ci */ 710bf215546Sopenharmony_ci LLVMBasicBlockRef block = 711bf215546Sopenharmony_ci LLVMAppendBasicBlockInContext(gallivm->context, 712bf215546Sopenharmony_ci variant->function, "entry"); 713bf215546Sopenharmony_ci LLVMPositionBuilderAtEnd(builder, block); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); 716bf215546Sopenharmony_ci init_args(gallivm, &variant->key, &args); 717bf215546Sopenharmony_ci emit_tri_coef(gallivm, &variant->key, &args); 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci LLVMBuildRetVoid(builder); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci gallivm_verify_function(gallivm, variant->function); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci gallivm_compile_module(gallivm); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci variant->jit_function = (lp_jit_setup_triangle) 726bf215546Sopenharmony_ci gallivm_jit_function(gallivm, variant->function); 727bf215546Sopenharmony_ci if (!variant->jit_function) 728bf215546Sopenharmony_ci goto fail; 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci gallivm_free_ir(variant->gallivm); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci /* 733bf215546Sopenharmony_ci * Update timing information: 734bf215546Sopenharmony_ci */ 735bf215546Sopenharmony_ci if (LP_DEBUG & DEBUG_COUNTERS) { 736bf215546Sopenharmony_ci t1 = os_time_get(); 737bf215546Sopenharmony_ci LP_COUNT_ADD(llvm_compile_time, t1 - t0); 738bf215546Sopenharmony_ci LP_COUNT_ADD(nr_llvm_compiles, 1); 739bf215546Sopenharmony_ci } 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci return variant; 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_cifail: 744bf215546Sopenharmony_ci if (variant) { 745bf215546Sopenharmony_ci if (variant->gallivm) { 746bf215546Sopenharmony_ci gallivm_destroy(variant->gallivm); 747bf215546Sopenharmony_ci } 748bf215546Sopenharmony_ci FREE(variant); 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci return NULL; 752bf215546Sopenharmony_ci} 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_cistatic void 756bf215546Sopenharmony_cilp_make_setup_variant_key(const struct llvmpipe_context *lp, 757bf215546Sopenharmony_ci struct lp_setup_variant_key *key) 758bf215546Sopenharmony_ci{ 759bf215546Sopenharmony_ci const struct lp_fragment_shader *fs = lp->fs; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci assert(sizeof key->inputs[0] == sizeof(uint)); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci key->num_inputs = fs->info.base.num_inputs; 764bf215546Sopenharmony_ci key->flatshade_first = lp->rasterizer->flatshade_first; 765bf215546Sopenharmony_ci key->pixel_center_half = lp->rasterizer->half_pixel_center; 766bf215546Sopenharmony_ci key->multisample = lp->rasterizer->multisample; 767bf215546Sopenharmony_ci key->twoside = lp->rasterizer->light_twoside; 768bf215546Sopenharmony_ci key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci key->color_slot = lp->color_slot[0]; 771bf215546Sopenharmony_ci key->bcolor_slot = lp->bcolor_slot[0]; 772bf215546Sopenharmony_ci key->spec_slot = lp->color_slot[1]; 773bf215546Sopenharmony_ci key->bspec_slot = lp->bcolor_slot[1]; 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci /* 776bf215546Sopenharmony_ci * If depth is floating point, depth bias is calculated with respect 777bf215546Sopenharmony_ci * to the primitive's maximum Z value. Retain the original depth bias 778bf215546Sopenharmony_ci * value until that stage. 779bf215546Sopenharmony_ci */ 780bf215546Sopenharmony_ci key->floating_point_depth = lp->floating_point_depth; 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci if (key->floating_point_depth) { 783bf215546Sopenharmony_ci key->pgon_offset_units = (float) lp->rasterizer->offset_units; 784bf215546Sopenharmony_ci } else { 785bf215546Sopenharmony_ci key->pgon_offset_units = 786bf215546Sopenharmony_ci (float) (lp->rasterizer->offset_units * lp->mrd * 2); 787bf215546Sopenharmony_ci } 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci key->pgon_offset_scale = lp->rasterizer->offset_scale; 790bf215546Sopenharmony_ci key->pgon_offset_clamp = lp->rasterizer->offset_clamp; 791bf215546Sopenharmony_ci key->uses_constant_interp = 0; 792bf215546Sopenharmony_ci key->pad = 0; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci for (unsigned i = 0; i < key->num_inputs; i++) { 797bf215546Sopenharmony_ci if (key->inputs[i].interp == LP_INTERP_COLOR) { 798bf215546Sopenharmony_ci if (lp->rasterizer->flatshade) 799bf215546Sopenharmony_ci key->inputs[i].interp = LP_INTERP_CONSTANT; 800bf215546Sopenharmony_ci else 801bf215546Sopenharmony_ci key->inputs[i].interp = LP_INTERP_PERSPECTIVE; 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci if (key->inputs[i].interp == LP_INTERP_CONSTANT) { 804bf215546Sopenharmony_ci key->uses_constant_interp = 1; 805bf215546Sopenharmony_ci } 806bf215546Sopenharmony_ci } 807bf215546Sopenharmony_ci} 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_cistatic void 811bf215546Sopenharmony_ciremove_setup_variant(struct llvmpipe_context *lp, 812bf215546Sopenharmony_ci struct lp_setup_variant *variant) 813bf215546Sopenharmony_ci{ 814bf215546Sopenharmony_ci if (gallivm_debug & GALLIVM_DEBUG_IR) { 815bf215546Sopenharmony_ci debug_printf("llvmpipe: del setup_variant #%u total %u\n", 816bf215546Sopenharmony_ci variant->no, lp->nr_setup_variants); 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci if (variant->gallivm) { 820bf215546Sopenharmony_ci gallivm_destroy(variant->gallivm); 821bf215546Sopenharmony_ci } 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci list_del(&variant->list_item_global.list); 824bf215546Sopenharmony_ci lp->nr_setup_variants--; 825bf215546Sopenharmony_ci FREE(variant); 826bf215546Sopenharmony_ci} 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci/* When the number of setup variants exceeds a threshold, cull a 830bf215546Sopenharmony_ci * fraction (currently a quarter) of them. 831bf215546Sopenharmony_ci */ 832bf215546Sopenharmony_cistatic void 833bf215546Sopenharmony_cicull_setup_variants(struct llvmpipe_context *lp) 834bf215546Sopenharmony_ci{ 835bf215546Sopenharmony_ci struct pipe_context *pipe = &lp->pipe; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci /* 838bf215546Sopenharmony_ci * XXX: we need to flush the context until we have some sort of reference 839bf215546Sopenharmony_ci * counting in fragment shaders as they may still be binned 840bf215546Sopenharmony_ci * Flushing alone might not be sufficient we need to wait on it too. 841bf215546Sopenharmony_ci */ 842bf215546Sopenharmony_ci llvmpipe_finish(pipe, __FUNCTION__); 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci for (int i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { 845bf215546Sopenharmony_ci struct lp_setup_variant_list_item *item; 846bf215546Sopenharmony_ci if (list_is_empty(&lp->setup_variants_list.list)) { 847bf215546Sopenharmony_ci break; 848bf215546Sopenharmony_ci } 849bf215546Sopenharmony_ci item = list_last_entry(&lp->setup_variants_list.list, 850bf215546Sopenharmony_ci struct lp_setup_variant_list_item, list); 851bf215546Sopenharmony_ci assert(item); 852bf215546Sopenharmony_ci assert(item->base); 853bf215546Sopenharmony_ci remove_setup_variant(lp, item->base); 854bf215546Sopenharmony_ci } 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci/** 859bf215546Sopenharmony_ci * Update fragment/vertex shader linkage state. This is called just 860bf215546Sopenharmony_ci * prior to drawing something when some fragment-related state has 861bf215546Sopenharmony_ci * changed. 862bf215546Sopenharmony_ci */ 863bf215546Sopenharmony_civoid 864bf215546Sopenharmony_cillvmpipe_update_setup(struct llvmpipe_context *lp) 865bf215546Sopenharmony_ci{ 866bf215546Sopenharmony_ci struct lp_setup_variant_key *key = &lp->setup_variant.key; 867bf215546Sopenharmony_ci struct lp_setup_variant *variant = NULL; 868bf215546Sopenharmony_ci struct lp_setup_variant_list_item *li; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci lp_make_setup_variant_key(lp, key); 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(li, &lp->setup_variants_list.list, list) { 873bf215546Sopenharmony_ci if (li->base->key.size == key->size && 874bf215546Sopenharmony_ci memcmp(&li->base->key, key, key->size) == 0) { 875bf215546Sopenharmony_ci variant = li->base; 876bf215546Sopenharmony_ci break; 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci } 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci if (variant) { 881bf215546Sopenharmony_ci list_move_to(&variant->list_item_global.list, &lp->setup_variants_list.list); 882bf215546Sopenharmony_ci } else { 883bf215546Sopenharmony_ci if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { 884bf215546Sopenharmony_ci cull_setup_variants(lp); 885bf215546Sopenharmony_ci } 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci variant = generate_setup_variant(key, lp); 888bf215546Sopenharmony_ci if (variant) { 889bf215546Sopenharmony_ci list_add(&variant->list_item_global.list, &lp->setup_variants_list.list); 890bf215546Sopenharmony_ci lp->nr_setup_variants++; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci lp_setup_set_setup_variant(lp->setup, variant); 895bf215546Sopenharmony_ci} 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_civoid 899bf215546Sopenharmony_cilp_delete_setup_variants(struct llvmpipe_context *lp) 900bf215546Sopenharmony_ci{ 901bf215546Sopenharmony_ci struct lp_setup_variant_list_item *li, *next; 902bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY_SAFE(li, next, &lp->setup_variants_list.list, list) { 903bf215546Sopenharmony_ci remove_setup_variant(lp, li->base); 904bf215546Sopenharmony_ci } 905bf215546Sopenharmony_ci} 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_civoid 909bf215546Sopenharmony_cilp_dump_setup_coef(const struct lp_setup_variant_key *key, 910bf215546Sopenharmony_ci const float (*sa0)[4], 911bf215546Sopenharmony_ci const float (*sdadx)[4], 912bf215546Sopenharmony_ci const float (*sdady)[4]) 913bf215546Sopenharmony_ci{ 914bf215546Sopenharmony_ci for (int i = 0; i < TGSI_NUM_CHANNELS; i++) { 915bf215546Sopenharmony_ci float a0 = sa0 [0][i]; 916bf215546Sopenharmony_ci float dadx = sdadx[0][i]; 917bf215546Sopenharmony_ci float dady = sdady[0][i]; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", 920bf215546Sopenharmony_ci "xyzw"[i], a0, dadx, dady); 921bf215546Sopenharmony_ci } 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci for (int slot = 0; slot < key->num_inputs; slot++) { 924bf215546Sopenharmony_ci unsigned usage_mask = key->inputs[slot].usage_mask; 925bf215546Sopenharmony_ci for (int i = 0; i < TGSI_NUM_CHANNELS; i++) { 926bf215546Sopenharmony_ci if (usage_mask & (1 << i)) { 927bf215546Sopenharmony_ci float a0 = sa0 [1 + slot][i]; 928bf215546Sopenharmony_ci float dadx = sdadx[1 + slot][i]; 929bf215546Sopenharmony_ci float dady = sdady[1 + slot][i]; 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", 932bf215546Sopenharmony_ci slot, "xyzw"[i], a0, dadx, dady); 933bf215546Sopenharmony_ci } 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci} 937