1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc. 4bf215546Sopenharmony_ci * Copyright 2007-2008 VMware, Inc. 5bf215546Sopenharmony_ci * All Rights Reserved. 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 9bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 10bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 11bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 12bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 13bf215546Sopenharmony_ci * the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 16bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 17bf215546Sopenharmony_ci * of the Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci **************************************************************************/ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/** 30bf215546Sopenharmony_ci * @file 31bf215546Sopenharmony_ci * Position and shader input interpolation. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com> 34bf215546Sopenharmony_ci */ 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h" 37bf215546Sopenharmony_ci#include "util/compiler.h" 38bf215546Sopenharmony_ci#include "util/u_debug.h" 39bf215546Sopenharmony_ci#include "util/u_memory.h" 40bf215546Sopenharmony_ci#include "util/u_math.h" 41bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h" 42bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h" 43bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h" 44bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h" 45bf215546Sopenharmony_ci#include "gallivm/lp_bld_swizzle.h" 46bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h" 47bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h" 48bf215546Sopenharmony_ci#include "gallivm/lp_bld_struct.h" 49bf215546Sopenharmony_ci#include "gallivm/lp_bld_gather.h" 50bf215546Sopenharmony_ci#include "lp_bld_interp.h" 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci/* 54bf215546Sopenharmony_ci * The shader JIT function operates on blocks of quads. 55bf215546Sopenharmony_ci * Each block has 2x2 quads and each quad has 2x2 pixels. 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * We iterate over the quads in order 0, 1, 2, 3: 58bf215546Sopenharmony_ci * 59bf215546Sopenharmony_ci * ################# 60bf215546Sopenharmony_ci * # | # | # 61bf215546Sopenharmony_ci * #---0---#---1---# 62bf215546Sopenharmony_ci * # | # | # 63bf215546Sopenharmony_ci * ################# 64bf215546Sopenharmony_ci * # | # | # 65bf215546Sopenharmony_ci * #---2---#---3---# 66bf215546Sopenharmony_ci * # | # | # 67bf215546Sopenharmony_ci * ################# 68bf215546Sopenharmony_ci * 69bf215546Sopenharmony_ci * If we iterate over multiple quads at once, quads 01 and 23 are processed 70bf215546Sopenharmony_ci * together. 71bf215546Sopenharmony_ci * 72bf215546Sopenharmony_ci * Within each quad, we have four pixels which are represented in SOA 73bf215546Sopenharmony_ci * order: 74bf215546Sopenharmony_ci * 75bf215546Sopenharmony_ci * ######### 76bf215546Sopenharmony_ci * # 0 | 1 # 77bf215546Sopenharmony_ci * #---+---# 78bf215546Sopenharmony_ci * # 2 | 3 # 79bf215546Sopenharmony_ci * ######### 80bf215546Sopenharmony_ci * 81bf215546Sopenharmony_ci * So the green channel (for example) of the four pixels is stored in 82bf215546Sopenharmony_ci * a single vector register: {g0, g1, g2, g3}. 83bf215546Sopenharmony_ci * The order stays the same even with multiple quads: 84bf215546Sopenharmony_ci * 0 1 4 5 85bf215546Sopenharmony_ci * 2 3 6 7 86bf215546Sopenharmony_ci * is stored as g0..g7 87bf215546Sopenharmony_ci */ 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci/** 91bf215546Sopenharmony_ci * Do one perspective divide per quad. 92bf215546Sopenharmony_ci * 93bf215546Sopenharmony_ci * For perspective interpolation, the final attribute value is given 94bf215546Sopenharmony_ci * 95bf215546Sopenharmony_ci * a' = a/w = a * oow 96bf215546Sopenharmony_ci * 97bf215546Sopenharmony_ci * where 98bf215546Sopenharmony_ci * 99bf215546Sopenharmony_ci * a = a0 + dadx*x + dady*y 100bf215546Sopenharmony_ci * w = w0 + dwdx*x + dwdy*y 101bf215546Sopenharmony_ci * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) 102bf215546Sopenharmony_ci * 103bf215546Sopenharmony_ci * Instead of computing the division per pixel, with this macro we compute the 104bf215546Sopenharmony_ci * division on the upper left pixel of each quad, and use a linear 105bf215546Sopenharmony_ci * approximation in the remaining pixels, given by: 106bf215546Sopenharmony_ci * 107bf215546Sopenharmony_ci * da'dx = (dadx - dwdx*a)*oow 108bf215546Sopenharmony_ci * da'dy = (dady - dwdy*a)*oow 109bf215546Sopenharmony_ci * 110bf215546Sopenharmony_ci * Ironically, this actually makes things slower -- probably because the 111bf215546Sopenharmony_ci * divide hardware unit is rarely used, whereas the multiply unit is typically 112bf215546Sopenharmony_ci * already saturated. 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_ci#define PERSPECTIVE_DIVIDE_PER_QUAD 0 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_cistatic const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3}; 118bf215546Sopenharmony_cistatic const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistatic void 122bf215546Sopenharmony_ciattrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci if(attrib == 0) 125bf215546Sopenharmony_ci lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); 126bf215546Sopenharmony_ci else 127bf215546Sopenharmony_ci lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic void 131bf215546Sopenharmony_cicalc_offsets(struct lp_build_context *coeff_bld, 132bf215546Sopenharmony_ci unsigned quad_start_index, 133bf215546Sopenharmony_ci LLVMValueRef *pixoffx, 134bf215546Sopenharmony_ci LLVMValueRef *pixoffy) 135bf215546Sopenharmony_ci{ 136bf215546Sopenharmony_ci unsigned i; 137bf215546Sopenharmony_ci unsigned num_pix = coeff_bld->type.length; 138bf215546Sopenharmony_ci struct gallivm_state *gallivm = coeff_bld->gallivm; 139bf215546Sopenharmony_ci LLVMBuilderRef builder = coeff_bld->gallivm->builder; 140bf215546Sopenharmony_ci LLVMValueRef nr, pixxf, pixyf; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci *pixoffx = coeff_bld->undef; 143bf215546Sopenharmony_ci *pixoffy = coeff_bld->undef; 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci for (i = 0; i < num_pix; i++) { 146bf215546Sopenharmony_ci nr = lp_build_const_int32(gallivm, i); 147bf215546Sopenharmony_ci pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + 148bf215546Sopenharmony_ci (quad_start_index & 1) * 2); 149bf215546Sopenharmony_ci pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + 150bf215546Sopenharmony_ci (quad_start_index & 2)); 151bf215546Sopenharmony_ci *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, ""); 152bf215546Sopenharmony_ci *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, ""); 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic void 157bf215546Sopenharmony_cicalc_centroid_offsets(struct lp_build_interp_soa_context *bld, 158bf215546Sopenharmony_ci struct gallivm_state *gallivm, 159bf215546Sopenharmony_ci LLVMValueRef loop_iter, 160bf215546Sopenharmony_ci LLVMValueRef mask_store, 161bf215546Sopenharmony_ci LLVMValueRef pix_center_offset, 162bf215546Sopenharmony_ci LLVMValueRef *centroid_x, LLVMValueRef *centroid_y) 163bf215546Sopenharmony_ci{ 164bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 165bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 166bf215546Sopenharmony_ci LLVMValueRef s_mask_and = NULL; 167bf215546Sopenharmony_ci LLVMValueRef centroid_x_offset = pix_center_offset; 168bf215546Sopenharmony_ci LLVMValueRef centroid_y_offset = pix_center_offset; 169bf215546Sopenharmony_ci for (int s = bld->coverage_samples - 1; s >= 0; s--) { 170bf215546Sopenharmony_ci LLVMValueRef sample_cov; 171bf215546Sopenharmony_ci LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), ""); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, ""); 174bf215546Sopenharmony_ci sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx); 175bf215546Sopenharmony_ci if (s == bld->coverage_samples - 1) 176bf215546Sopenharmony_ci s_mask_and = sample_cov; 177bf215546Sopenharmony_ci else 178bf215546Sopenharmony_ci s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, ""); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2); 181bf215546Sopenharmony_ci LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx); 184bf215546Sopenharmony_ci y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx); 185bf215546Sopenharmony_ci x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx); 186bf215546Sopenharmony_ci y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx); 187bf215546Sopenharmony_ci centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset); 188bf215546Sopenharmony_ci centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset); 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset); 191bf215546Sopenharmony_ci *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset); 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci/* Note: this assumes the pointer to elem_type is in address space 0 */ 195bf215546Sopenharmony_cistatic LLVMValueRef 196bf215546Sopenharmony_ciload_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type, LLVMValueRef ptr, const char *name) { 197bf215546Sopenharmony_ci ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(elem_type, 0), name); 198bf215546Sopenharmony_ci return LLVMBuildLoad2(builder, elem_type, ptr, name); 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic LLVMValueRef 202bf215546Sopenharmony_ciindexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type, 203bf215546Sopenharmony_ci LLVMTypeRef elem_type, LLVMValueRef ptr, LLVMValueRef index, const char *name) { 204bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, gep_type, ptr, &index, 1, name); 205bf215546Sopenharmony_ci return load_casted(builder, elem_type, ptr, name); 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci/* Much easier, and significantly less instructions in the per-stamp 209bf215546Sopenharmony_ci * part (less than half) but overall more instructions so a loss if 210bf215546Sopenharmony_ci * most quads are active. Might be a win though with larger vectors. 211bf215546Sopenharmony_ci * No ability to do per-quad divide (doable but not implemented) 212bf215546Sopenharmony_ci * Could be made to work with passed in pixel offsets (i.e. active quad merging). 213bf215546Sopenharmony_ci */ 214bf215546Sopenharmony_cistatic void 215bf215546Sopenharmony_cicoeffs_init_simple(struct lp_build_interp_soa_context *bld, 216bf215546Sopenharmony_ci LLVMValueRef a0_ptr, 217bf215546Sopenharmony_ci LLVMValueRef dadx_ptr, 218bf215546Sopenharmony_ci LLVMValueRef dady_ptr) 219bf215546Sopenharmony_ci{ 220bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 221bf215546Sopenharmony_ci struct lp_build_context *setup_bld = &bld->setup_bld; 222bf215546Sopenharmony_ci struct gallivm_state *gallivm = coeff_bld->gallivm; 223bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 224bf215546Sopenharmony_ci unsigned attrib; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci for (attrib = 0; attrib < bld->num_attribs; ++attrib) { 227bf215546Sopenharmony_ci /* 228bf215546Sopenharmony_ci * always fetch all 4 values for performance/simplicity 229bf215546Sopenharmony_ci * Note: we do that here because it seems to generate better 230bf215546Sopenharmony_ci * code. It generates a lot of moves initially but less 231bf215546Sopenharmony_ci * moves later. As far as I can tell this looks like a 232bf215546Sopenharmony_ci * llvm issue, instead of simply reloading the values from 233bf215546Sopenharmony_ci * the passed in pointers it if it runs out of registers 234bf215546Sopenharmony_ci * it spills/reloads them. Maybe some optimization passes 235bf215546Sopenharmony_ci * would help. 236bf215546Sopenharmony_ci * Might want to investigate this again later. 237bf215546Sopenharmony_ci */ 238bf215546Sopenharmony_ci const enum lp_interp interp = bld->interp[attrib]; 239bf215546Sopenharmony_ci LLVMValueRef index = lp_build_const_int32(gallivm, 240bf215546Sopenharmony_ci attrib * TGSI_NUM_CHANNELS); 241bf215546Sopenharmony_ci LLVMValueRef dadxaos = setup_bld->zero; 242bf215546Sopenharmony_ci LLVMValueRef dadyaos = setup_bld->zero; 243bf215546Sopenharmony_ci LLVMValueRef a0aos = setup_bld->zero; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci /* See: lp_state_fs.c / generate_fragment() / fs_elem_type */ 246bf215546Sopenharmony_ci LLVMTypeRef fs_elem_type = LLVMFloatTypeInContext(gallivm->context); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci switch (interp) { 249bf215546Sopenharmony_ci case LP_INTERP_PERSPECTIVE: 250bf215546Sopenharmony_ci FALLTHROUGH; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci case LP_INTERP_LINEAR: 253bf215546Sopenharmony_ci dadxaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dadx_ptr, index, ""); 254bf215546Sopenharmony_ci dadyaos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, dady_ptr, index, ""); 255bf215546Sopenharmony_ci attrib_name(dadxaos, attrib, 0, ".dadxaos"); 256bf215546Sopenharmony_ci attrib_name(dadyaos, attrib, 0, ".dadyaos"); 257bf215546Sopenharmony_ci FALLTHROUGH; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci case LP_INTERP_CONSTANT: 260bf215546Sopenharmony_ci case LP_INTERP_FACING: 261bf215546Sopenharmony_ci a0aos = indexed_load(builder, fs_elem_type, setup_bld->vec_type, a0_ptr, index, ""); 262bf215546Sopenharmony_ci attrib_name(a0aos, attrib, 0, ".a0aos"); 263bf215546Sopenharmony_ci break; 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci case LP_INTERP_POSITION: 266bf215546Sopenharmony_ci /* Nothing to do as the position coeffs are already setup in slot 0 */ 267bf215546Sopenharmony_ci continue; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci default: 270bf215546Sopenharmony_ci assert(0); 271bf215546Sopenharmony_ci break; 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci bld->a0aos[attrib] = a0aos; 274bf215546Sopenharmony_ci bld->dadxaos[attrib] = dadxaos; 275bf215546Sopenharmony_ci bld->dadyaos[attrib] = dadyaos; 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci} 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci/** 280bf215546Sopenharmony_ci * Interpolate the shader input attribute values. 281bf215546Sopenharmony_ci * This is called for each (group of) quad(s). 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_cistatic void 284bf215546Sopenharmony_ciattribs_update_simple(struct lp_build_interp_soa_context *bld, 285bf215546Sopenharmony_ci struct gallivm_state *gallivm, 286bf215546Sopenharmony_ci LLVMValueRef loop_iter, 287bf215546Sopenharmony_ci LLVMValueRef mask_store, 288bf215546Sopenharmony_ci LLVMValueRef sample_id, 289bf215546Sopenharmony_ci int start, 290bf215546Sopenharmony_ci int end) 291bf215546Sopenharmony_ci{ 292bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 293bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 294bf215546Sopenharmony_ci struct lp_build_context *setup_bld = &bld->setup_bld; 295bf215546Sopenharmony_ci LLVMValueRef oow = NULL; 296bf215546Sopenharmony_ci unsigned attrib; 297bf215546Sopenharmony_ci LLVMValueRef pixoffx; 298bf215546Sopenharmony_ci LLVMValueRef pixoffy; 299bf215546Sopenharmony_ci LLVMValueRef ptr; 300bf215546Sopenharmony_ci LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci /* could do this with code-generated passed in pixel offsets too */ 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci assert(loop_iter); 305bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, ""); 306bf215546Sopenharmony_ci pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, ""); 307bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, ""); 308bf215546Sopenharmony_ci pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, ""); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, 311bf215546Sopenharmony_ci lp_build_broadcast_scalar(coeff_bld, bld->x), ""); 312bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, 313bf215546Sopenharmony_ci lp_build_broadcast_scalar(coeff_bld, bld->y), ""); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci for (attrib = start; attrib < end; attrib++) { 316bf215546Sopenharmony_ci const unsigned mask = bld->mask[attrib]; 317bf215546Sopenharmony_ci const enum lp_interp interp = bld->interp[attrib]; 318bf215546Sopenharmony_ci const enum tgsi_interpolate_loc loc = bld->interp_loc[attrib]; 319bf215546Sopenharmony_ci unsigned chan; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 322bf215546Sopenharmony_ci if (mask & (1 << chan)) { 323bf215546Sopenharmony_ci LLVMValueRef index; 324bf215546Sopenharmony_ci LLVMValueRef dadx = coeff_bld->zero; 325bf215546Sopenharmony_ci LLVMValueRef dady = coeff_bld->zero; 326bf215546Sopenharmony_ci LLVMValueRef a = coeff_bld->zero; 327bf215546Sopenharmony_ci LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci index = lp_build_const_int32(gallivm, chan); 330bf215546Sopenharmony_ci switch (interp) { 331bf215546Sopenharmony_ci case LP_INTERP_PERSPECTIVE: 332bf215546Sopenharmony_ci FALLTHROUGH; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci case LP_INTERP_LINEAR: 335bf215546Sopenharmony_ci if (attrib == 0 && chan == 0) { 336bf215546Sopenharmony_ci dadx = coeff_bld->one; 337bf215546Sopenharmony_ci if (sample_id) { 338bf215546Sopenharmony_ci LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), ""); 339bf215546Sopenharmony_ci x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx); 340bf215546Sopenharmony_ci a = lp_build_broadcast_scalar(coeff_bld, x_val_idx); 341bf215546Sopenharmony_ci } else { 342bf215546Sopenharmony_ci a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); 343bf215546Sopenharmony_ci } 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci else if (attrib == 0 && chan == 1) { 346bf215546Sopenharmony_ci dady = coeff_bld->one; 347bf215546Sopenharmony_ci if (sample_id) { 348bf215546Sopenharmony_ci LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), ""); 349bf215546Sopenharmony_ci y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), ""); 350bf215546Sopenharmony_ci y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx); 351bf215546Sopenharmony_ci a = lp_build_broadcast_scalar(coeff_bld, y_val_idx); 352bf215546Sopenharmony_ci } else { 353bf215546Sopenharmony_ci a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci else { 357bf215546Sopenharmony_ci dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, 358bf215546Sopenharmony_ci coeff_bld->type, bld->dadxaos[attrib], 359bf215546Sopenharmony_ci index); 360bf215546Sopenharmony_ci dady = lp_build_extract_broadcast(gallivm, setup_bld->type, 361bf215546Sopenharmony_ci coeff_bld->type, bld->dadyaos[attrib], 362bf215546Sopenharmony_ci index); 363bf215546Sopenharmony_ci a = lp_build_extract_broadcast(gallivm, setup_bld->type, 364bf215546Sopenharmony_ci coeff_bld->type, bld->a0aos[attrib], 365bf215546Sopenharmony_ci index); 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci if (bld->coverage_samples > 1) { 368bf215546Sopenharmony_ci LLVMValueRef xoffset = pix_center_offset; 369bf215546Sopenharmony_ci LLVMValueRef yoffset = pix_center_offset; 370bf215546Sopenharmony_ci if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) { 371bf215546Sopenharmony_ci LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), ""); 372bf215546Sopenharmony_ci LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), ""); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx); 375bf215546Sopenharmony_ci y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx); 376bf215546Sopenharmony_ci xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx); 377bf215546Sopenharmony_ci yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx); 378bf215546Sopenharmony_ci } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) { 379bf215546Sopenharmony_ci calc_centroid_offsets(bld, gallivm, loop_iter, mask_store, 380bf215546Sopenharmony_ci pix_center_offset, &xoffset, &yoffset); 381bf215546Sopenharmony_ci } 382bf215546Sopenharmony_ci chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset); 383bf215546Sopenharmony_ci chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset); 384bf215546Sopenharmony_ci } 385bf215546Sopenharmony_ci } 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci /* 388bf215546Sopenharmony_ci * a = a0 + (x * dadx + y * dady) 389bf215546Sopenharmony_ci */ 390bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a); 391bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dady, chan_pixoffy, a); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci if (interp == LP_INTERP_PERSPECTIVE) { 394bf215546Sopenharmony_ci if (oow == NULL) { 395bf215546Sopenharmony_ci LLVMValueRef w = bld->attribs[0][3]; 396bf215546Sopenharmony_ci assert(attrib != 0); 397bf215546Sopenharmony_ci assert(bld->mask[0] & TGSI_WRITEMASK_W); 398bf215546Sopenharmony_ci oow = lp_build_rcp(coeff_bld, w); 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci a = lp_build_mul(coeff_bld, a, oow); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci break; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci case LP_INTERP_CONSTANT: 405bf215546Sopenharmony_ci case LP_INTERP_FACING: 406bf215546Sopenharmony_ci a = lp_build_extract_broadcast(gallivm, setup_bld->type, 407bf215546Sopenharmony_ci coeff_bld->type, bld->a0aos[attrib], 408bf215546Sopenharmony_ci index); 409bf215546Sopenharmony_ci break; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci case LP_INTERP_POSITION: 412bf215546Sopenharmony_ci assert(attrib > 0); 413bf215546Sopenharmony_ci a = bld->attribs[0][chan]; 414bf215546Sopenharmony_ci break; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci default: 417bf215546Sopenharmony_ci assert(0); 418bf215546Sopenharmony_ci break; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci if ((attrib == 0) && (chan == 2)) { 422bf215546Sopenharmony_ci /* add polygon-offset value, stored in the X component of a0 */ 423bf215546Sopenharmony_ci LLVMValueRef offset = 424bf215546Sopenharmony_ci lp_build_extract_broadcast(gallivm, setup_bld->type, 425bf215546Sopenharmony_ci coeff_bld->type, bld->a0aos[0], 426bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 0)); 427bf215546Sopenharmony_ci a = LLVMBuildFAdd(builder, a, offset, ""); 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci bld->attribs[attrib][chan] = a; 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_cistatic LLVMValueRef 437bf215546Sopenharmony_cilp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld, 438bf215546Sopenharmony_ci struct gallivm_state *gallivm, 439bf215546Sopenharmony_ci unsigned attrib, unsigned chan, 440bf215546Sopenharmony_ci LLVMValueRef indir_index, 441bf215546Sopenharmony_ci LLVMValueRef pixoffx, 442bf215546Sopenharmony_ci LLVMValueRef pixoffy) 443bf215546Sopenharmony_ci{ 444bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 445bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 446bf215546Sopenharmony_ci const enum lp_interp interp = bld->interp[attrib]; 447bf215546Sopenharmony_ci LLVMValueRef dadx = coeff_bld->zero; 448bf215546Sopenharmony_ci LLVMValueRef dady = coeff_bld->zero; 449bf215546Sopenharmony_ci LLVMValueRef a = coeff_bld->zero; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), ""); 454bf215546Sopenharmony_ci LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), ""); 455bf215546Sopenharmony_ci index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), ""); 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci /* size up to byte indices */ 458bf215546Sopenharmony_ci index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), ""); 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci struct lp_type dst_type = coeff_bld->type; 461bf215546Sopenharmony_ci dst_type.length = 1; 462bf215546Sopenharmony_ci switch (interp) { 463bf215546Sopenharmony_ci case LP_INTERP_PERSPECTIVE: 464bf215546Sopenharmony_ci FALLTHROUGH; 465bf215546Sopenharmony_ci case LP_INTERP_LINEAR: 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci dadx = lp_build_gather(gallivm, coeff_bld->type.length, 468bf215546Sopenharmony_ci coeff_bld->type.width, dst_type, 469bf215546Sopenharmony_ci true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false); 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci dady = lp_build_gather(gallivm, coeff_bld->type.length, 472bf215546Sopenharmony_ci coeff_bld->type.width, dst_type, 473bf215546Sopenharmony_ci true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci a = lp_build_gather(gallivm, coeff_bld->type.length, 476bf215546Sopenharmony_ci coeff_bld->type.width, dst_type, 477bf215546Sopenharmony_ci true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false); 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci /* 480bf215546Sopenharmony_ci * a = a0 + (x * dadx + y * dady) 481bf215546Sopenharmony_ci */ 482bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dadx, pixoffx, a); 483bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dady, pixoffy, a); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci if (interp == LP_INTERP_PERSPECTIVE) { 486bf215546Sopenharmony_ci LLVMValueRef w = bld->attribs[0][3]; 487bf215546Sopenharmony_ci assert(attrib != 0); 488bf215546Sopenharmony_ci assert(bld->mask[0] & TGSI_WRITEMASK_W); 489bf215546Sopenharmony_ci LLVMValueRef oow = lp_build_rcp(coeff_bld, w); 490bf215546Sopenharmony_ci a = lp_build_mul(coeff_bld, a, oow); 491bf215546Sopenharmony_ci } 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci break; 494bf215546Sopenharmony_ci case LP_INTERP_CONSTANT: 495bf215546Sopenharmony_ci case LP_INTERP_FACING: 496bf215546Sopenharmony_ci a = lp_build_gather(gallivm, coeff_bld->type.length, 497bf215546Sopenharmony_ci coeff_bld->type.width, dst_type, 498bf215546Sopenharmony_ci true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false); 499bf215546Sopenharmony_ci break; 500bf215546Sopenharmony_ci default: 501bf215546Sopenharmony_ci assert(0); 502bf215546Sopenharmony_ci break; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci return a; 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ciLLVMValueRef 508bf215546Sopenharmony_cilp_build_interp_soa(struct lp_build_interp_soa_context *bld, 509bf215546Sopenharmony_ci struct gallivm_state *gallivm, 510bf215546Sopenharmony_ci LLVMValueRef loop_iter, 511bf215546Sopenharmony_ci LLVMValueRef mask_store, 512bf215546Sopenharmony_ci unsigned attrib, unsigned chan, 513bf215546Sopenharmony_ci enum tgsi_interpolate_loc loc, 514bf215546Sopenharmony_ci LLVMValueRef indir_index, 515bf215546Sopenharmony_ci LLVMValueRef offsets[2]) 516bf215546Sopenharmony_ci{ 517bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 518bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 519bf215546Sopenharmony_ci struct lp_build_context *setup_bld = &bld->setup_bld; 520bf215546Sopenharmony_ci LLVMValueRef pixoffx; 521bf215546Sopenharmony_ci LLVMValueRef pixoffy; 522bf215546Sopenharmony_ci LLVMValueRef ptr; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci /* could do this with code-generated passed in pixel offsets too */ 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci assert(loop_iter); 527bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &loop_iter, 1, ""); 528bf215546Sopenharmony_ci pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, ""); 529bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &loop_iter, 1, ""); 530bf215546Sopenharmony_ci pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, ""); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, 533bf215546Sopenharmony_ci lp_build_broadcast_scalar(coeff_bld, bld->x), ""); 534bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, 535bf215546Sopenharmony_ci lp_build_broadcast_scalar(coeff_bld, bld->y), ""); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci if (loc == TGSI_INTERPOLATE_LOC_CENTER) { 540bf215546Sopenharmony_ci if (bld->coverage_samples > 1) { 541bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, ""); 542bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, ""); 543bf215546Sopenharmony_ci } 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci if (offsets[0]) 546bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, 547bf215546Sopenharmony_ci offsets[0], ""); 548bf215546Sopenharmony_ci if (offsets[1]) 549bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, 550bf215546Sopenharmony_ci offsets[1], ""); 551bf215546Sopenharmony_ci } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) { 552bf215546Sopenharmony_ci LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), ""); 553bf215546Sopenharmony_ci LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), ""); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array, 556bf215546Sopenharmony_ci LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); 557bf215546Sopenharmony_ci LLVMValueRef xoffset = lp_build_gather(gallivm, 558bf215546Sopenharmony_ci bld->coeff_bld.type.length, 559bf215546Sopenharmony_ci bld->coeff_bld.type.width, 560bf215546Sopenharmony_ci lp_elem_type(bld->coeff_bld.type), 561bf215546Sopenharmony_ci false, 562bf215546Sopenharmony_ci base_ptr, 563bf215546Sopenharmony_ci x_val_idx, true); 564bf215546Sopenharmony_ci LLVMValueRef yoffset = lp_build_gather(gallivm, 565bf215546Sopenharmony_ci bld->coeff_bld.type.length, 566bf215546Sopenharmony_ci bld->coeff_bld.type.width, 567bf215546Sopenharmony_ci lp_elem_type(bld->coeff_bld.type), 568bf215546Sopenharmony_ci false, 569bf215546Sopenharmony_ci base_ptr, 570bf215546Sopenharmony_ci y_val_idx, true); 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci if (bld->coverage_samples > 1) { 573bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, ""); 574bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, ""); 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) { 577bf215546Sopenharmony_ci LLVMValueRef centroid_x_offset, centroid_y_offset; 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci /* for centroid find covered samples for this quad. */ 580bf215546Sopenharmony_ci /* if all samples are covered use pixel centers */ 581bf215546Sopenharmony_ci if (bld->coverage_samples > 1) { 582bf215546Sopenharmony_ci calc_centroid_offsets(bld, gallivm, loop_iter, mask_store, 583bf215546Sopenharmony_ci pix_center_offset, ¢roid_x_offset, 584bf215546Sopenharmony_ci ¢roid_y_offset); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, ""); 587bf215546Sopenharmony_ci pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, ""); 588bf215546Sopenharmony_ci } 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci // remap attrib properly. 592bf215546Sopenharmony_ci attrib++; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci if (indir_index) 595bf215546Sopenharmony_ci return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan, 596bf215546Sopenharmony_ci indir_index, pixoffx, pixoffy); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci const enum lp_interp interp = bld->interp[attrib]; 600bf215546Sopenharmony_ci LLVMValueRef dadx = coeff_bld->zero; 601bf215546Sopenharmony_ci LLVMValueRef dady = coeff_bld->zero; 602bf215546Sopenharmony_ci LLVMValueRef a = coeff_bld->zero; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci LLVMValueRef index = lp_build_const_int32(gallivm, chan); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci switch (interp) { 607bf215546Sopenharmony_ci case LP_INTERP_PERSPECTIVE: 608bf215546Sopenharmony_ci FALLTHROUGH; 609bf215546Sopenharmony_ci case LP_INTERP_LINEAR: 610bf215546Sopenharmony_ci dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, 611bf215546Sopenharmony_ci coeff_bld->type, bld->dadxaos[attrib], 612bf215546Sopenharmony_ci index); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci dady = lp_build_extract_broadcast(gallivm, setup_bld->type, 615bf215546Sopenharmony_ci coeff_bld->type, bld->dadyaos[attrib], 616bf215546Sopenharmony_ci index); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci a = lp_build_extract_broadcast(gallivm, setup_bld->type, 619bf215546Sopenharmony_ci coeff_bld->type, bld->a0aos[attrib], 620bf215546Sopenharmony_ci index); 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci /* 623bf215546Sopenharmony_ci * a = a0 + (x * dadx + y * dady) 624bf215546Sopenharmony_ci */ 625bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dadx, pixoffx, a); 626bf215546Sopenharmony_ci a = lp_build_fmuladd(builder, dady, pixoffy, a); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci if (interp == LP_INTERP_PERSPECTIVE) { 629bf215546Sopenharmony_ci LLVMValueRef w = bld->attribs[0][3]; 630bf215546Sopenharmony_ci assert(attrib != 0); 631bf215546Sopenharmony_ci assert(bld->mask[0] & TGSI_WRITEMASK_W); 632bf215546Sopenharmony_ci LLVMValueRef oow = lp_build_rcp(coeff_bld, w); 633bf215546Sopenharmony_ci a = lp_build_mul(coeff_bld, a, oow); 634bf215546Sopenharmony_ci } 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci break; 637bf215546Sopenharmony_ci case LP_INTERP_CONSTANT: 638bf215546Sopenharmony_ci case LP_INTERP_FACING: 639bf215546Sopenharmony_ci a = lp_build_extract_broadcast(gallivm, setup_bld->type, 640bf215546Sopenharmony_ci coeff_bld->type, bld->a0aos[attrib], 641bf215546Sopenharmony_ci index); 642bf215546Sopenharmony_ci break; 643bf215546Sopenharmony_ci default: 644bf215546Sopenharmony_ci assert(0); 645bf215546Sopenharmony_ci break; 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci return a; 648bf215546Sopenharmony_ci} 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci/** 651bf215546Sopenharmony_ci * Generate the position vectors. 652bf215546Sopenharmony_ci * 653bf215546Sopenharmony_ci * Parameter x0, y0 are the integer values with upper left coordinates. 654bf215546Sopenharmony_ci */ 655bf215546Sopenharmony_cistatic void 656bf215546Sopenharmony_cipos_init(struct lp_build_interp_soa_context *bld, 657bf215546Sopenharmony_ci LLVMValueRef x0, 658bf215546Sopenharmony_ci LLVMValueRef y0) 659bf215546Sopenharmony_ci{ 660bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder; 661bf215546Sopenharmony_ci struct lp_build_context *coeff_bld = &bld->coeff_bld; 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, ""); 664bf215546Sopenharmony_ci bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, ""); 665bf215546Sopenharmony_ci} 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci/** 669bf215546Sopenharmony_ci * Initialize fragment shader input attribute info. 670bf215546Sopenharmony_ci */ 671bf215546Sopenharmony_civoid 672bf215546Sopenharmony_cilp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, 673bf215546Sopenharmony_ci struct gallivm_state *gallivm, 674bf215546Sopenharmony_ci unsigned num_inputs, 675bf215546Sopenharmony_ci const struct lp_shader_input *inputs, 676bf215546Sopenharmony_ci boolean pixel_center_integer, 677bf215546Sopenharmony_ci unsigned coverage_samples, 678bf215546Sopenharmony_ci LLVMValueRef sample_pos_array, 679bf215546Sopenharmony_ci LLVMValueRef num_loop, 680bf215546Sopenharmony_ci LLVMBuilderRef builder, 681bf215546Sopenharmony_ci struct lp_type type, 682bf215546Sopenharmony_ci LLVMValueRef a0_ptr, 683bf215546Sopenharmony_ci LLVMValueRef dadx_ptr, 684bf215546Sopenharmony_ci LLVMValueRef dady_ptr, 685bf215546Sopenharmony_ci LLVMValueRef x0, 686bf215546Sopenharmony_ci LLVMValueRef y0) 687bf215546Sopenharmony_ci{ 688bf215546Sopenharmony_ci struct lp_type coeff_type; 689bf215546Sopenharmony_ci struct lp_type setup_type; 690bf215546Sopenharmony_ci unsigned attrib; 691bf215546Sopenharmony_ci unsigned chan; 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci memset(bld, 0, sizeof *bld); 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci memset(&coeff_type, 0, sizeof coeff_type); 696bf215546Sopenharmony_ci coeff_type.floating = TRUE; 697bf215546Sopenharmony_ci coeff_type.sign = TRUE; 698bf215546Sopenharmony_ci coeff_type.width = 32; 699bf215546Sopenharmony_ci coeff_type.length = type.length; 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci memset(&setup_type, 0, sizeof setup_type); 702bf215546Sopenharmony_ci setup_type.floating = TRUE; 703bf215546Sopenharmony_ci setup_type.sign = TRUE; 704bf215546Sopenharmony_ci setup_type.width = 32; 705bf215546Sopenharmony_ci setup_type.length = TGSI_NUM_CHANNELS; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci /* XXX: we don't support interpolating into any other types */ 709bf215546Sopenharmony_ci assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); 712bf215546Sopenharmony_ci lp_build_context_init(&bld->setup_bld, gallivm, setup_type); 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci /* For convenience */ 715bf215546Sopenharmony_ci bld->pos = bld->attribs[0]; 716bf215546Sopenharmony_ci bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci /* Position */ 719bf215546Sopenharmony_ci bld->mask[0] = TGSI_WRITEMASK_XYZW; 720bf215546Sopenharmony_ci bld->interp[0] = LP_INTERP_LINEAR; 721bf215546Sopenharmony_ci bld->interp_loc[0] = 0; 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci /* Inputs */ 724bf215546Sopenharmony_ci for (attrib = 0; attrib < num_inputs; ++attrib) { 725bf215546Sopenharmony_ci bld->mask[1 + attrib] = inputs[attrib].usage_mask; 726bf215546Sopenharmony_ci bld->interp[1 + attrib] = inputs[attrib].interp; 727bf215546Sopenharmony_ci bld->interp_loc[1 + attrib] = inputs[attrib].location; 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci bld->num_attribs = 1 + num_inputs; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci /* needed for indirect */ 732bf215546Sopenharmony_ci bld->a0_ptr = a0_ptr; 733bf215546Sopenharmony_ci bld->dadx_ptr = dadx_ptr; 734bf215546Sopenharmony_ci bld->dady_ptr = dady_ptr; 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci /* Ensure all masked out input channels have a valid value */ 737bf215546Sopenharmony_ci for (attrib = 0; attrib < bld->num_attribs; ++attrib) { 738bf215546Sopenharmony_ci for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 739bf215546Sopenharmony_ci bld->attribs[attrib][chan] = bld->coeff_bld.undef; 740bf215546Sopenharmony_ci } 741bf215546Sopenharmony_ci } 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci if (pixel_center_integer) { 744bf215546Sopenharmony_ci bld->pos_offset = 0.0; 745bf215546Sopenharmony_ci } else { 746bf215546Sopenharmony_ci bld->pos_offset = 0.5; 747bf215546Sopenharmony_ci } 748bf215546Sopenharmony_ci bld->coverage_samples = coverage_samples; 749bf215546Sopenharmony_ci bld->num_loop = num_loop; 750bf215546Sopenharmony_ci bld->sample_pos_array = sample_pos_array; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci pos_init(bld, x0, y0); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci /* 755bf215546Sopenharmony_ci * Simple method (single step interpolation) may be slower if vector length 756bf215546Sopenharmony_ci * is just 4, but the results are different (generally less accurate) with 757bf215546Sopenharmony_ci * the other method, so always use more accurate version. 758bf215546Sopenharmony_ci */ 759bf215546Sopenharmony_ci { 760bf215546Sopenharmony_ci /* XXX this should use a global static table */ 761bf215546Sopenharmony_ci unsigned i; 762bf215546Sopenharmony_ci unsigned num_loops = 16 / type.length; 763bf215546Sopenharmony_ci LLVMValueRef pixoffx, pixoffy, index; 764bf215546Sopenharmony_ci LLVMValueRef ptr; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci bld->store_elem_type = lp_build_vec_type(gallivm, type); 767bf215546Sopenharmony_ci bld->xoffset_store = lp_build_array_alloca(gallivm, 768bf215546Sopenharmony_ci bld->store_elem_type, 769bf215546Sopenharmony_ci lp_build_const_int32(gallivm, num_loops), 770bf215546Sopenharmony_ci ""); 771bf215546Sopenharmony_ci bld->yoffset_store = lp_build_array_alloca(gallivm, 772bf215546Sopenharmony_ci bld->store_elem_type, 773bf215546Sopenharmony_ci lp_build_const_int32(gallivm, num_loops), 774bf215546Sopenharmony_ci ""); 775bf215546Sopenharmony_ci for (i = 0; i < num_loops; i++) { 776bf215546Sopenharmony_ci index = lp_build_const_int32(gallivm, i); 777bf215546Sopenharmony_ci calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); 778bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store, &index, 1, ""); 779bf215546Sopenharmony_ci LLVMBuildStore(builder, pixoffx, ptr); 780bf215546Sopenharmony_ci ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store, &index, 1, ""); 781bf215546Sopenharmony_ci LLVMBuildStore(builder, pixoffy, ptr); 782bf215546Sopenharmony_ci } 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); 785bf215546Sopenharmony_ci} 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci/* 789bf215546Sopenharmony_ci * Advance the position and inputs to the given quad within the block. 790bf215546Sopenharmony_ci */ 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_civoid 793bf215546Sopenharmony_cilp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, 794bf215546Sopenharmony_ci struct gallivm_state *gallivm, 795bf215546Sopenharmony_ci LLVMValueRef quad_start_index, 796bf215546Sopenharmony_ci LLVMValueRef mask_store, 797bf215546Sopenharmony_ci LLVMValueRef sample_id) 798bf215546Sopenharmony_ci{ 799bf215546Sopenharmony_ci attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs); 800bf215546Sopenharmony_ci} 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_civoid 803bf215546Sopenharmony_cilp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, 804bf215546Sopenharmony_ci struct gallivm_state *gallivm, 805bf215546Sopenharmony_ci LLVMValueRef quad_start_index, 806bf215546Sopenharmony_ci LLVMValueRef sample_id) 807bf215546Sopenharmony_ci{ 808bf215546Sopenharmony_ci attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1); 809bf215546Sopenharmony_ci} 810bf215546Sopenharmony_ci 811