1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2009-2010 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** 29bf215546Sopenharmony_ci * @file 30bf215546Sopenharmony_ci * Depth/stencil testing to LLVM IR translation. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * To be done accurately/efficiently the depth/stencil test must be done with 33bf215546Sopenharmony_ci * the same type/format of the depth/stencil buffer, which implies massaging 34bf215546Sopenharmony_ci * the incoming depths to fit into place. Using a more straightforward 35bf215546Sopenharmony_ci * type/format for depth/stencil values internally and only convert when 36bf215546Sopenharmony_ci * flushing would avoid this, but it would most likely result in depth fighting 37bf215546Sopenharmony_ci * artifacts. 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * Since we're using linear layout for everything, but we need to deal with 40bf215546Sopenharmony_ci * 2x2 quads, we need to load/store multiple values and swizzle them into 41bf215546Sopenharmony_ci * place (we could avoid this by doing depth/stencil testing in linear format, 42bf215546Sopenharmony_ci * which would be easy for late depth/stencil test as we could do that after 43bf215546Sopenharmony_ci * the fragment shader loop just as we do for color buffers, but more tricky 44bf215546Sopenharmony_ci * for early depth test as we'd need both masks and interpolated depth in 45bf215546Sopenharmony_ci * linear format). 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * 48bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com> 49bf215546Sopenharmony_ci * @author Brian Paul <jfonseca@vmware.com> 50bf215546Sopenharmony_ci */ 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci#include "pipe/p_state.h" 53bf215546Sopenharmony_ci#include "util/format/u_format.h" 54bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci#include "gallivm/lp_bld_type.h" 57bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h" 58bf215546Sopenharmony_ci#include "gallivm/lp_bld_bitarit.h" 59bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h" 60bf215546Sopenharmony_ci#include "gallivm/lp_bld_conv.h" 61bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h" 62bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h" 63bf215546Sopenharmony_ci#include "gallivm/lp_bld_intr.h" 64bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h" 65bf215546Sopenharmony_ci#include "gallivm/lp_bld_swizzle.h" 66bf215546Sopenharmony_ci#include "gallivm/lp_bld_pack.h" 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci#include "lp_bld_depth.h" 69bf215546Sopenharmony_ci#include "lp_state_fs.h" 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci/** Used to select fields from pipe_stencil_state */ 73bf215546Sopenharmony_cienum stencil_op { 74bf215546Sopenharmony_ci S_FAIL_OP, 75bf215546Sopenharmony_ci Z_FAIL_OP, 76bf215546Sopenharmony_ci Z_PASS_OP 77bf215546Sopenharmony_ci}; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci/** 82bf215546Sopenharmony_ci * Do the stencil test comparison (compare FB stencil values against ref value). 83bf215546Sopenharmony_ci * This will be used twice when generating two-sided stencil code. 84bf215546Sopenharmony_ci * \param stencil the front/back stencil state 85bf215546Sopenharmony_ci * \param stencilRef the stencil reference value, replicated as a vector 86bf215546Sopenharmony_ci * \param stencilVals vector of stencil values from framebuffer 87bf215546Sopenharmony_ci * \return vector mask of pass/fail values (~0 or 0) 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_cistatic LLVMValueRef 90bf215546Sopenharmony_cilp_build_stencil_test_single(struct lp_build_context *bld, 91bf215546Sopenharmony_ci const struct pipe_stencil_state *stencil, 92bf215546Sopenharmony_ci LLVMValueRef stencilRef, 93bf215546Sopenharmony_ci LLVMValueRef stencilVals) 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 96bf215546Sopenharmony_ci const unsigned stencilMax = 255; /* XXX fix */ 97bf215546Sopenharmony_ci struct lp_type type = bld->type; 98bf215546Sopenharmony_ci LLVMValueRef res; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci /* 101bf215546Sopenharmony_ci * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values 102bf215546Sopenharmony_ci * are between 0..255 so ensure we generate the fastest comparisons for 103bf215546Sopenharmony_ci * wider elements. 104bf215546Sopenharmony_ci */ 105bf215546Sopenharmony_ci if (type.width <= 8) { 106bf215546Sopenharmony_ci assert(!type.sign); 107bf215546Sopenharmony_ci } else { 108bf215546Sopenharmony_ci assert(type.sign); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci assert(stencil->enabled); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci if (stencil->valuemask != stencilMax) { 114bf215546Sopenharmony_ci /* compute stencilRef = stencilRef & valuemask */ 115bf215546Sopenharmony_ci LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); 116bf215546Sopenharmony_ci stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); 117bf215546Sopenharmony_ci /* compute stencilVals = stencilVals & valuemask */ 118bf215546Sopenharmony_ci stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci return res; 124bf215546Sopenharmony_ci} 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci/** 128bf215546Sopenharmony_ci * Do the one or two-sided stencil test comparison. 129bf215546Sopenharmony_ci * \sa lp_build_stencil_test_single 130bf215546Sopenharmony_ci * \param front_facing an integer vector mask, indicating front (~0) or back 131bf215546Sopenharmony_ci * (0) facing polygon. If NULL, assume front-facing. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_cistatic LLVMValueRef 134bf215546Sopenharmony_cilp_build_stencil_test(struct lp_build_context *bld, 135bf215546Sopenharmony_ci const struct pipe_stencil_state stencil[2], 136bf215546Sopenharmony_ci LLVMValueRef stencilRefs[2], 137bf215546Sopenharmony_ci LLVMValueRef stencilVals, 138bf215546Sopenharmony_ci LLVMValueRef front_facing) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci LLVMValueRef res; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci assert(stencil[0].enabled); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* do front face test */ 145bf215546Sopenharmony_ci res = lp_build_stencil_test_single(bld, &stencil[0], 146bf215546Sopenharmony_ci stencilRefs[0], stencilVals); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (stencil[1].enabled && front_facing != NULL) { 149bf215546Sopenharmony_ci /* do back face test */ 150bf215546Sopenharmony_ci LLVMValueRef back_res; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci back_res = lp_build_stencil_test_single(bld, &stencil[1], 153bf215546Sopenharmony_ci stencilRefs[1], stencilVals); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci res = lp_build_select(bld, front_facing, res, back_res); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci return res; 159bf215546Sopenharmony_ci} 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci/** 163bf215546Sopenharmony_ci * Apply the stencil operator (add/sub/keep/etc) to the given vector 164bf215546Sopenharmony_ci * of stencil values. 165bf215546Sopenharmony_ci * \return new stencil values vector 166bf215546Sopenharmony_ci */ 167bf215546Sopenharmony_cistatic LLVMValueRef 168bf215546Sopenharmony_cilp_build_stencil_op_single(struct lp_build_context *bld, 169bf215546Sopenharmony_ci const struct pipe_stencil_state *stencil, 170bf215546Sopenharmony_ci enum stencil_op op, 171bf215546Sopenharmony_ci LLVMValueRef stencilRef, 172bf215546Sopenharmony_ci LLVMValueRef stencilVals) 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci{ 175bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 176bf215546Sopenharmony_ci struct lp_type type = bld->type; 177bf215546Sopenharmony_ci LLVMValueRef res; 178bf215546Sopenharmony_ci LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); 179bf215546Sopenharmony_ci unsigned stencil_op; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci assert(type.sign); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci switch (op) { 184bf215546Sopenharmony_ci case S_FAIL_OP: 185bf215546Sopenharmony_ci stencil_op = stencil->fail_op; 186bf215546Sopenharmony_ci break; 187bf215546Sopenharmony_ci case Z_FAIL_OP: 188bf215546Sopenharmony_ci stencil_op = stencil->zfail_op; 189bf215546Sopenharmony_ci break; 190bf215546Sopenharmony_ci case Z_PASS_OP: 191bf215546Sopenharmony_ci stencil_op = stencil->zpass_op; 192bf215546Sopenharmony_ci break; 193bf215546Sopenharmony_ci default: 194bf215546Sopenharmony_ci assert(0 && "Invalid stencil_op mode"); 195bf215546Sopenharmony_ci stencil_op = PIPE_STENCIL_OP_KEEP; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci switch (stencil_op) { 199bf215546Sopenharmony_ci case PIPE_STENCIL_OP_KEEP: 200bf215546Sopenharmony_ci res = stencilVals; 201bf215546Sopenharmony_ci /* we can return early for this case */ 202bf215546Sopenharmony_ci return res; 203bf215546Sopenharmony_ci case PIPE_STENCIL_OP_ZERO: 204bf215546Sopenharmony_ci res = bld->zero; 205bf215546Sopenharmony_ci break; 206bf215546Sopenharmony_ci case PIPE_STENCIL_OP_REPLACE: 207bf215546Sopenharmony_ci res = stencilRef; 208bf215546Sopenharmony_ci break; 209bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR: 210bf215546Sopenharmony_ci res = lp_build_add(bld, stencilVals, bld->one); 211bf215546Sopenharmony_ci res = lp_build_min(bld, res, max); 212bf215546Sopenharmony_ci break; 213bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR: 214bf215546Sopenharmony_ci res = lp_build_sub(bld, stencilVals, bld->one); 215bf215546Sopenharmony_ci res = lp_build_max(bld, res, bld->zero); 216bf215546Sopenharmony_ci break; 217bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR_WRAP: 218bf215546Sopenharmony_ci res = lp_build_add(bld, stencilVals, bld->one); 219bf215546Sopenharmony_ci res = LLVMBuildAnd(builder, res, max, ""); 220bf215546Sopenharmony_ci break; 221bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR_WRAP: 222bf215546Sopenharmony_ci res = lp_build_sub(bld, stencilVals, bld->one); 223bf215546Sopenharmony_ci res = LLVMBuildAnd(builder, res, max, ""); 224bf215546Sopenharmony_ci break; 225bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INVERT: 226bf215546Sopenharmony_ci res = LLVMBuildNot(builder, stencilVals, ""); 227bf215546Sopenharmony_ci res = LLVMBuildAnd(builder, res, max, ""); 228bf215546Sopenharmony_ci break; 229bf215546Sopenharmony_ci default: 230bf215546Sopenharmony_ci assert(0 && "bad stencil op mode"); 231bf215546Sopenharmony_ci res = bld->undef; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci return res; 235bf215546Sopenharmony_ci} 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci/** 239bf215546Sopenharmony_ci * Do the one or two-sided stencil test op/update. 240bf215546Sopenharmony_ci */ 241bf215546Sopenharmony_cistatic LLVMValueRef 242bf215546Sopenharmony_cilp_build_stencil_op(struct lp_build_context *bld, 243bf215546Sopenharmony_ci const struct pipe_stencil_state stencil[2], 244bf215546Sopenharmony_ci enum stencil_op op, 245bf215546Sopenharmony_ci LLVMValueRef stencilRefs[2], 246bf215546Sopenharmony_ci LLVMValueRef stencilVals, 247bf215546Sopenharmony_ci LLVMValueRef mask, 248bf215546Sopenharmony_ci LLVMValueRef front_facing) 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 252bf215546Sopenharmony_ci LLVMValueRef res; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci assert(stencil[0].enabled); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* do front face op */ 257bf215546Sopenharmony_ci res = lp_build_stencil_op_single(bld, &stencil[0], op, 258bf215546Sopenharmony_ci stencilRefs[0], stencilVals); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci if (stencil[1].enabled && front_facing != NULL) { 261bf215546Sopenharmony_ci /* do back face op */ 262bf215546Sopenharmony_ci LLVMValueRef back_res; 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci back_res = lp_build_stencil_op_single(bld, &stencil[1], op, 265bf215546Sopenharmony_ci stencilRefs[1], stencilVals); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci res = lp_build_select(bld, front_facing, res, back_res); 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (stencil[0].writemask != 0xff || 271bf215546Sopenharmony_ci (stencil[1].enabled && front_facing != NULL && 272bf215546Sopenharmony_ci stencil[1].writemask != 0xff)) { 273bf215546Sopenharmony_ci /* mask &= stencil[0].writemask */ 274bf215546Sopenharmony_ci LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, 275bf215546Sopenharmony_ci stencil[0].writemask); 276bf215546Sopenharmony_ci if (stencil[1].enabled && 277bf215546Sopenharmony_ci stencil[1].writemask != stencil[0].writemask && 278bf215546Sopenharmony_ci front_facing != NULL) { 279bf215546Sopenharmony_ci LLVMValueRef back_writemask = 280bf215546Sopenharmony_ci lp_build_const_int_vec(bld->gallivm, bld->type, 281bf215546Sopenharmony_ci stencil[1].writemask); 282bf215546Sopenharmony_ci writemask = lp_build_select(bld, front_facing, 283bf215546Sopenharmony_ci writemask, back_writemask); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci mask = LLVMBuildAnd(builder, mask, writemask, ""); 287bf215546Sopenharmony_ci /* res = (res & mask) | (stencilVals & ~mask) */ 288bf215546Sopenharmony_ci res = lp_build_select_bitwise(bld, mask, res, stencilVals); 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci else { 291bf215546Sopenharmony_ci /* res = mask ? res : stencilVals */ 292bf215546Sopenharmony_ci res = lp_build_select(bld, mask, res, stencilVals); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci return res; 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci/** 301bf215546Sopenharmony_ci * Return a type that matches the depth/stencil format. 302bf215546Sopenharmony_ci */ 303bf215546Sopenharmony_cistruct lp_type 304bf215546Sopenharmony_cilp_depth_type(const struct util_format_description *format_desc, 305bf215546Sopenharmony_ci unsigned length) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci struct lp_type type; 308bf215546Sopenharmony_ci unsigned z_swizzle; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 311bf215546Sopenharmony_ci assert(format_desc->block.width == 1); 312bf215546Sopenharmony_ci assert(format_desc->block.height == 1); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci memset(&type, 0, sizeof type); 315bf215546Sopenharmony_ci type.width = format_desc->block.bits; 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci z_swizzle = format_desc->swizzle[0]; 318bf215546Sopenharmony_ci if (z_swizzle < 4) { 319bf215546Sopenharmony_ci if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { 320bf215546Sopenharmony_ci type.floating = TRUE; 321bf215546Sopenharmony_ci assert(z_swizzle == 0); 322bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].size == 32); 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci else if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { 325bf215546Sopenharmony_ci assert(format_desc->block.bits <= 32); 326bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].normalized); 327bf215546Sopenharmony_ci if (format_desc->channel[z_swizzle].size < format_desc->block.bits) { 328bf215546Sopenharmony_ci /* Prefer signed integers when possible, as SSE has less support 329bf215546Sopenharmony_ci * for unsigned comparison; 330bf215546Sopenharmony_ci */ 331bf215546Sopenharmony_ci type.sign = TRUE; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci else 335bf215546Sopenharmony_ci assert(0); 336bf215546Sopenharmony_ci } 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci type.length = length; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci return type; 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci/** 345bf215546Sopenharmony_ci * Compute bitmask and bit shift to apply to the incoming fragment Z values 346bf215546Sopenharmony_ci * and the Z buffer values needed before doing the Z comparison. 347bf215546Sopenharmony_ci * 348bf215546Sopenharmony_ci * Note that we leave the Z bits in the position that we find them 349bf215546Sopenharmony_ci * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us 350bf215546Sopenharmony_ci * get by with fewer bit twiddling steps. 351bf215546Sopenharmony_ci */ 352bf215546Sopenharmony_cistatic boolean 353bf215546Sopenharmony_ciget_z_shift_and_mask(const struct util_format_description *format_desc, 354bf215546Sopenharmony_ci unsigned *shift, unsigned *width, unsigned *mask) 355bf215546Sopenharmony_ci{ 356bf215546Sopenharmony_ci unsigned total_bits; 357bf215546Sopenharmony_ci unsigned z_swizzle; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 360bf215546Sopenharmony_ci assert(format_desc->block.width == 1); 361bf215546Sopenharmony_ci assert(format_desc->block.height == 1); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci /* 64bit d/s format is special already extracted 32 bits */ 364bf215546Sopenharmony_ci total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci z_swizzle = format_desc->swizzle[0]; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci if (z_swizzle == PIPE_SWIZZLE_NONE) 369bf215546Sopenharmony_ci return FALSE; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci *width = format_desc->channel[z_swizzle].size; 372bf215546Sopenharmony_ci /* & 31 is for the same reason as the 32-bit limit above */ 373bf215546Sopenharmony_ci *shift = format_desc->channel[z_swizzle].shift & 31; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci if (*width == total_bits) { 376bf215546Sopenharmony_ci *mask = 0xffffffff; 377bf215546Sopenharmony_ci } else { 378bf215546Sopenharmony_ci *mask = ((1 << *width) - 1) << *shift; 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci return TRUE; 382bf215546Sopenharmony_ci} 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci/** 386bf215546Sopenharmony_ci * Compute bitmask and bit shift to apply to the framebuffer pixel values 387bf215546Sopenharmony_ci * to put the stencil bits in the least significant position. 388bf215546Sopenharmony_ci * (i.e. 0x000000ff) 389bf215546Sopenharmony_ci */ 390bf215546Sopenharmony_cistatic boolean 391bf215546Sopenharmony_ciget_s_shift_and_mask(const struct util_format_description *format_desc, 392bf215546Sopenharmony_ci unsigned *shift, unsigned *mask) 393bf215546Sopenharmony_ci{ 394bf215546Sopenharmony_ci const unsigned s_swizzle = format_desc->swizzle[1]; 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci if (s_swizzle == PIPE_SWIZZLE_NONE) 397bf215546Sopenharmony_ci return FALSE; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci /* just special case 64bit d/s format */ 400bf215546Sopenharmony_ci if (format_desc->block.bits > 32) { 401bf215546Sopenharmony_ci /* XXX big-endian? */ 402bf215546Sopenharmony_ci assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); 403bf215546Sopenharmony_ci *shift = 0; 404bf215546Sopenharmony_ci *mask = 0xff; 405bf215546Sopenharmony_ci return TRUE; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci *shift = format_desc->channel[s_swizzle].shift; 409bf215546Sopenharmony_ci const unsigned sz = format_desc->channel[s_swizzle].size; 410bf215546Sopenharmony_ci *mask = (1U << sz) - 1U; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci return TRUE; 413bf215546Sopenharmony_ci} 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci/** 417bf215546Sopenharmony_ci * Perform the occlusion test and increase the counter. 418bf215546Sopenharmony_ci * Test the depth mask. Add the number of channel which has none zero mask 419bf215546Sopenharmony_ci * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. 420bf215546Sopenharmony_ci * The counter will add 4. 421bf215546Sopenharmony_ci * TODO: could get that out of the fs loop. 422bf215546Sopenharmony_ci * 423bf215546Sopenharmony_ci * \param type holds element type of the mask vector. 424bf215546Sopenharmony_ci * \param maskvalue is the depth test mask. 425bf215546Sopenharmony_ci * \param counter is a pointer of the uint32 counter. 426bf215546Sopenharmony_ci */ 427bf215546Sopenharmony_civoid 428bf215546Sopenharmony_cilp_build_occlusion_count(struct gallivm_state *gallivm, 429bf215546Sopenharmony_ci struct lp_type type, 430bf215546Sopenharmony_ci LLVMValueRef maskvalue, 431bf215546Sopenharmony_ci LLVMValueRef counter) 432bf215546Sopenharmony_ci{ 433bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 434bf215546Sopenharmony_ci LLVMContextRef context = gallivm->context; 435bf215546Sopenharmony_ci LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); 436bf215546Sopenharmony_ci LLVMValueRef count, newcount; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci assert(type.length <= 16); 439bf215546Sopenharmony_ci assert(type.floating); 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (util_get_cpu_caps()->has_sse && type.length == 4) { 442bf215546Sopenharmony_ci const char *movmskintr = "llvm.x86.sse.movmsk.ps"; 443bf215546Sopenharmony_ci const char *popcntintr = "llvm.ctpop.i32"; 444bf215546Sopenharmony_ci LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, 445bf215546Sopenharmony_ci lp_build_vec_type(gallivm, type), ""); 446bf215546Sopenharmony_ci bits = lp_build_intrinsic_unary(builder, movmskintr, 447bf215546Sopenharmony_ci LLVMInt32TypeInContext(context), bits); 448bf215546Sopenharmony_ci count = lp_build_intrinsic_unary(builder, popcntintr, 449bf215546Sopenharmony_ci LLVMInt32TypeInContext(context), bits); 450bf215546Sopenharmony_ci count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci else if (util_get_cpu_caps()->has_avx && type.length == 8) { 453bf215546Sopenharmony_ci const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; 454bf215546Sopenharmony_ci const char *popcntintr = "llvm.ctpop.i32"; 455bf215546Sopenharmony_ci LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, 456bf215546Sopenharmony_ci lp_build_vec_type(gallivm, type), ""); 457bf215546Sopenharmony_ci bits = lp_build_intrinsic_unary(builder, movmskintr, 458bf215546Sopenharmony_ci LLVMInt32TypeInContext(context), bits); 459bf215546Sopenharmony_ci count = lp_build_intrinsic_unary(builder, popcntintr, 460bf215546Sopenharmony_ci LLVMInt32TypeInContext(context), bits); 461bf215546Sopenharmony_ci count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); 462bf215546Sopenharmony_ci } 463bf215546Sopenharmony_ci else { 464bf215546Sopenharmony_ci LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); 465bf215546Sopenharmony_ci LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); 466bf215546Sopenharmony_ci LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); 467bf215546Sopenharmony_ci LLVMValueRef shufflev, countd; 468bf215546Sopenharmony_ci LLVMValueRef shuffles[16]; 469bf215546Sopenharmony_ci const char *popcntintr = NULL; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci for (unsigned i = 0; i < type.length; i++) { 474bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN 475bf215546Sopenharmony_ci shuffles[i] = lp_build_const_int32(gallivm, 4*i); 476bf215546Sopenharmony_ci#else 477bf215546Sopenharmony_ci shuffles[i] = lp_build_const_int32(gallivm, (4*i) + 3); 478bf215546Sopenharmony_ci#endif 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci shufflev = LLVMConstVector(shuffles, type.length); 482bf215546Sopenharmony_ci countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); 483bf215546Sopenharmony_ci countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* 486bf215546Sopenharmony_ci * XXX FIXME 487bf215546Sopenharmony_ci * this is bad on cpus without popcount (on x86 supported by intel 488bf215546Sopenharmony_ci * nehalem, amd barcelona, and up - not tied to sse42). 489bf215546Sopenharmony_ci * Would be much faster to just sum the 4 elements of the vector with 490bf215546Sopenharmony_ci * some horizontal add (shuffle/add/shuffle/add after the initial and). 491bf215546Sopenharmony_ci */ 492bf215546Sopenharmony_ci switch (type.length) { 493bf215546Sopenharmony_ci case 4: 494bf215546Sopenharmony_ci popcntintr = "llvm.ctpop.i32"; 495bf215546Sopenharmony_ci break; 496bf215546Sopenharmony_ci case 8: 497bf215546Sopenharmony_ci popcntintr = "llvm.ctpop.i64"; 498bf215546Sopenharmony_ci break; 499bf215546Sopenharmony_ci case 16: 500bf215546Sopenharmony_ci popcntintr = "llvm.ctpop.i128"; 501bf215546Sopenharmony_ci break; 502bf215546Sopenharmony_ci default: 503bf215546Sopenharmony_ci assert(0); 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci if (type.length > 8) { 508bf215546Sopenharmony_ci count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), ""); 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci else if (type.length < 8) { 511bf215546Sopenharmony_ci count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); 512bf215546Sopenharmony_ci } 513bf215546Sopenharmony_ci } 514bf215546Sopenharmony_ci newcount = LLVMBuildLoad2(builder, LLVMTypeOf(count), counter, "origcount"); 515bf215546Sopenharmony_ci newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); 516bf215546Sopenharmony_ci LLVMBuildStore(builder, newcount, counter); 517bf215546Sopenharmony_ci} 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci/** 521bf215546Sopenharmony_ci * Load depth/stencil values. 522bf215546Sopenharmony_ci * The stored values are linear, swizzle them. 523bf215546Sopenharmony_ci * 524bf215546Sopenharmony_ci * \param type the data type of the fragment depth/stencil values 525bf215546Sopenharmony_ci * \param format_desc description of the depth/stencil surface 526bf215546Sopenharmony_ci * \param is_1d whether this resource has only one dimension 527bf215546Sopenharmony_ci * \param loop_counter the current loop iteration 528bf215546Sopenharmony_ci * \param depth_ptr pointer to the depth/stencil values of this 4x4 block 529bf215546Sopenharmony_ci * \param depth_stride stride of the depth/stencil buffer 530bf215546Sopenharmony_ci * \param z_fb contains z values loaded from fb (may include padding) 531bf215546Sopenharmony_ci * \param s_fb contains s values loaded from fb (may include padding) 532bf215546Sopenharmony_ci */ 533bf215546Sopenharmony_civoid 534bf215546Sopenharmony_cilp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, 535bf215546Sopenharmony_ci struct lp_type z_src_type, 536bf215546Sopenharmony_ci const struct util_format_description *format_desc, 537bf215546Sopenharmony_ci boolean is_1d, 538bf215546Sopenharmony_ci LLVMValueRef depth_ptr, 539bf215546Sopenharmony_ci LLVMValueRef depth_stride, 540bf215546Sopenharmony_ci LLVMValueRef *z_fb, 541bf215546Sopenharmony_ci LLVMValueRef *s_fb, 542bf215546Sopenharmony_ci LLVMValueRef loop_counter) 543bf215546Sopenharmony_ci{ 544bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 545bf215546Sopenharmony_ci LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; 546bf215546Sopenharmony_ci LLVMValueRef depth_offset1, depth_offset2; 547bf215546Sopenharmony_ci const unsigned depth_bytes = format_desc->block.bits / 8; 548bf215546Sopenharmony_ci struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci struct lp_type zs_load_type = zs_type; 551bf215546Sopenharmony_ci zs_load_type.length = zs_load_type.length / 2; 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci LLVMTypeRef zs_dst_type = lp_build_vec_type(gallivm, zs_load_type); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci if (z_src_type.length == 4) { 556bf215546Sopenharmony_ci LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, 557bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 1), ""); 558bf215546Sopenharmony_ci LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, 559bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 2), ""); 560bf215546Sopenharmony_ci LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, 561bf215546Sopenharmony_ci depth_stride, ""); 562bf215546Sopenharmony_ci depth_offset1 = LLVMBuildMul(builder, looplsb, 563bf215546Sopenharmony_ci lp_build_const_int32(gallivm, depth_bytes * 2), ""); 564bf215546Sopenharmony_ci depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci /* just concatenate the loaded 2x2 values into 4-wide vector */ 567bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 568bf215546Sopenharmony_ci shuffles[i] = lp_build_const_int32(gallivm, i); 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci else { 572bf215546Sopenharmony_ci unsigned i; 573bf215546Sopenharmony_ci LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, 574bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 1), ""); 575bf215546Sopenharmony_ci assert(z_src_type.length == 8); 576bf215546Sopenharmony_ci depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); 577bf215546Sopenharmony_ci /* 578bf215546Sopenharmony_ci * We load 2x4 values, and need to swizzle them (order 579bf215546Sopenharmony_ci * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. 580bf215546Sopenharmony_ci */ 581bf215546Sopenharmony_ci for (i = 0; i < 8; i++) { 582bf215546Sopenharmony_ci shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci } 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* Load current z/stencil values from z/stencil buffer */ 589bf215546Sopenharmony_ci LLVMTypeRef load_ptr_type = LLVMPointerType(zs_dst_type, 0); 590bf215546Sopenharmony_ci LLVMValueRef zs_dst_ptr = 591bf215546Sopenharmony_ci LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); 592bf215546Sopenharmony_ci zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); 593bf215546Sopenharmony_ci LLVMValueRef zs_dst1 = LLVMBuildLoad2(builder, zs_dst_type, zs_dst_ptr, ""); 594bf215546Sopenharmony_ci LLVMValueRef zs_dst2; 595bf215546Sopenharmony_ci if (is_1d) { 596bf215546Sopenharmony_ci zs_dst2 = lp_build_undef(gallivm, zs_load_type); 597bf215546Sopenharmony_ci } 598bf215546Sopenharmony_ci else { 599bf215546Sopenharmony_ci zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); 600bf215546Sopenharmony_ci zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); 601bf215546Sopenharmony_ci zs_dst2 = LLVMBuildLoad2(builder, zs_dst_type, zs_dst_ptr, ""); 602bf215546Sopenharmony_ci } 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, 605bf215546Sopenharmony_ci LLVMConstVector(shuffles, zs_type.length), ""); 606bf215546Sopenharmony_ci *s_fb = *z_fb; 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci if (format_desc->block.bits == 8) { 609bf215546Sopenharmony_ci /* Extend stencil-only 8 bit values (S8_UINT) */ 610bf215546Sopenharmony_ci *s_fb = LLVMBuildZExt(builder, *s_fb, 611bf215546Sopenharmony_ci lp_build_int_vec_type(gallivm, z_src_type), ""); 612bf215546Sopenharmony_ci } 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci if (format_desc->block.bits < z_src_type.width) { 615bf215546Sopenharmony_ci /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ 616bf215546Sopenharmony_ci *z_fb = LLVMBuildZExt(builder, *z_fb, 617bf215546Sopenharmony_ci lp_build_int_vec_type(gallivm, z_src_type), ""); 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci else if (format_desc->block.bits > 32) { 621bf215546Sopenharmony_ci /* rely on llvm to handle too wide vector we have here nicely */ 622bf215546Sopenharmony_ci struct lp_type typex2 = zs_type; 623bf215546Sopenharmony_ci struct lp_type s_type = zs_type; 624bf215546Sopenharmony_ci LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; 625bf215546Sopenharmony_ci LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; 626bf215546Sopenharmony_ci LLVMValueRef tmp; 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci typex2.width = typex2.width / 2; 629bf215546Sopenharmony_ci typex2.length = typex2.length * 2; 630bf215546Sopenharmony_ci s_type.width = s_type.width / 2; 631bf215546Sopenharmony_ci s_type.floating = 0; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci tmp = LLVMBuildBitCast(builder, *z_fb, 634bf215546Sopenharmony_ci lp_build_vec_type(gallivm, typex2), ""); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci for (unsigned i = 0; i < zs_type.length; i++) { 637bf215546Sopenharmony_ci shuffles1[i] = lp_build_const_int32(gallivm, i * 2); 638bf215546Sopenharmony_ci shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); 639bf215546Sopenharmony_ci } 640bf215546Sopenharmony_ci *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, 641bf215546Sopenharmony_ci LLVMConstVector(shuffles1, zs_type.length), ""); 642bf215546Sopenharmony_ci *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, 643bf215546Sopenharmony_ci LLVMConstVector(shuffles2, zs_type.length), ""); 644bf215546Sopenharmony_ci *s_fb = LLVMBuildBitCast(builder, *s_fb, 645bf215546Sopenharmony_ci lp_build_vec_type(gallivm, s_type), ""); 646bf215546Sopenharmony_ci lp_build_name(*s_fb, "s_dst"); 647bf215546Sopenharmony_ci } 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci lp_build_name(*z_fb, "z_dst"); 650bf215546Sopenharmony_ci lp_build_name(*s_fb, "s_dst"); 651bf215546Sopenharmony_ci lp_build_name(*z_fb, "z_dst"); 652bf215546Sopenharmony_ci} 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci/** 656bf215546Sopenharmony_ci * Store depth/stencil values. 657bf215546Sopenharmony_ci * Incoming values are swizzled (typically n 2x2 quads), stored linear. 658bf215546Sopenharmony_ci * If there's a mask it will do select/store otherwise just store. 659bf215546Sopenharmony_ci * 660bf215546Sopenharmony_ci * \param type the data type of the fragment depth/stencil values 661bf215546Sopenharmony_ci * \param format_desc description of the depth/stencil surface 662bf215546Sopenharmony_ci * \param is_1d whether this resource has only one dimension 663bf215546Sopenharmony_ci * \param mask_value the alive/dead pixel mask for the quad (vector) 664bf215546Sopenharmony_ci * \param z_fb z values read from fb (with padding) 665bf215546Sopenharmony_ci * \param s_fb s values read from fb (with padding) 666bf215546Sopenharmony_ci * \param loop_counter the current loop iteration 667bf215546Sopenharmony_ci * \param depth_ptr pointer to the depth/stencil values of this 4x4 block 668bf215546Sopenharmony_ci * \param depth_stride stride of the depth/stencil buffer 669bf215546Sopenharmony_ci * \param z_value the depth values to store (with padding) 670bf215546Sopenharmony_ci * \param s_value the stencil values to store (with padding) 671bf215546Sopenharmony_ci */ 672bf215546Sopenharmony_civoid 673bf215546Sopenharmony_cilp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, 674bf215546Sopenharmony_ci struct lp_type z_src_type, 675bf215546Sopenharmony_ci const struct util_format_description *format_desc, 676bf215546Sopenharmony_ci boolean is_1d, 677bf215546Sopenharmony_ci LLVMValueRef mask_value, 678bf215546Sopenharmony_ci LLVMValueRef z_fb, 679bf215546Sopenharmony_ci LLVMValueRef s_fb, 680bf215546Sopenharmony_ci LLVMValueRef loop_counter, 681bf215546Sopenharmony_ci LLVMValueRef depth_ptr, 682bf215546Sopenharmony_ci LLVMValueRef depth_stride, 683bf215546Sopenharmony_ci LLVMValueRef z_value, 684bf215546Sopenharmony_ci LLVMValueRef s_value) 685bf215546Sopenharmony_ci{ 686bf215546Sopenharmony_ci struct lp_build_context z_bld; 687bf215546Sopenharmony_ci LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; 688bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 689bf215546Sopenharmony_ci LLVMValueRef zs_dst1, zs_dst2; 690bf215546Sopenharmony_ci LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; 691bf215546Sopenharmony_ci LLVMValueRef depth_offset1, depth_offset2; 692bf215546Sopenharmony_ci LLVMTypeRef load_ptr_type; 693bf215546Sopenharmony_ci unsigned depth_bytes = format_desc->block.bits / 8; 694bf215546Sopenharmony_ci struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); 695bf215546Sopenharmony_ci struct lp_type z_type = zs_type; 696bf215546Sopenharmony_ci struct lp_type zs_load_type = zs_type; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci zs_load_type.length = zs_load_type.length / 2; 699bf215546Sopenharmony_ci load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci z_type.width = z_src_type.width; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci lp_build_context_init(&z_bld, gallivm, z_type); 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci /* 706bf215546Sopenharmony_ci * This is far from ideal, at least for late depth write we should do this 707bf215546Sopenharmony_ci * outside the fs loop to avoid all the swizzle stuff. 708bf215546Sopenharmony_ci */ 709bf215546Sopenharmony_ci if (z_src_type.length == 4) { 710bf215546Sopenharmony_ci LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, 711bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 1), ""); 712bf215546Sopenharmony_ci LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, 713bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 2), ""); 714bf215546Sopenharmony_ci LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, 715bf215546Sopenharmony_ci depth_stride, ""); 716bf215546Sopenharmony_ci depth_offset1 = LLVMBuildMul(builder, looplsb, 717bf215546Sopenharmony_ci lp_build_const_int32(gallivm, depth_bytes * 2), ""); 718bf215546Sopenharmony_ci depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); 719bf215546Sopenharmony_ci } 720bf215546Sopenharmony_ci else { 721bf215546Sopenharmony_ci LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, 722bf215546Sopenharmony_ci lp_build_const_int32(gallivm, 1), ""); 723bf215546Sopenharmony_ci assert(z_src_type.length == 8); 724bf215546Sopenharmony_ci depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); 725bf215546Sopenharmony_ci /* 726bf215546Sopenharmony_ci * We load 2x4 values, and need to swizzle them (order 727bf215546Sopenharmony_ci * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. 728bf215546Sopenharmony_ci */ 729bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; i++) { 730bf215546Sopenharmony_ci shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci } 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); 737bf215546Sopenharmony_ci zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); 738bf215546Sopenharmony_ci zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); 739bf215546Sopenharmony_ci zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci if (format_desc->block.bits > 32) { 742bf215546Sopenharmony_ci s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); 743bf215546Sopenharmony_ci } 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci if (mask_value) { 746bf215546Sopenharmony_ci z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); 747bf215546Sopenharmony_ci if (format_desc->block.bits > 32) { 748bf215546Sopenharmony_ci s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); 749bf215546Sopenharmony_ci s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci } 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci if (zs_type.width < z_src_type.width) { 754bf215546Sopenharmony_ci /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ 755bf215546Sopenharmony_ci z_value = LLVMBuildTrunc(builder, z_value, 756bf215546Sopenharmony_ci lp_build_int_vec_type(gallivm, zs_type), ""); 757bf215546Sopenharmony_ci } 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci if (format_desc->block.bits <= 32) { 760bf215546Sopenharmony_ci if (z_src_type.length == 4) { 761bf215546Sopenharmony_ci zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); 762bf215546Sopenharmony_ci zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci else { 765bf215546Sopenharmony_ci assert(z_src_type.length == 8); 766bf215546Sopenharmony_ci zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, 767bf215546Sopenharmony_ci LLVMConstVector(&shuffles[0], 768bf215546Sopenharmony_ci zs_load_type.length), ""); 769bf215546Sopenharmony_ci zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, 770bf215546Sopenharmony_ci LLVMConstVector(&shuffles[4], 771bf215546Sopenharmony_ci zs_load_type.length), ""); 772bf215546Sopenharmony_ci } 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci else { 775bf215546Sopenharmony_ci if (z_src_type.length == 4) { 776bf215546Sopenharmony_ci zs_dst1 = lp_build_interleave2(gallivm, z_type, 777bf215546Sopenharmony_ci z_value, s_value, 0); 778bf215546Sopenharmony_ci zs_dst2 = lp_build_interleave2(gallivm, z_type, 779bf215546Sopenharmony_ci z_value, s_value, 1); 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci else { 782bf215546Sopenharmony_ci LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; 783bf215546Sopenharmony_ci assert(z_src_type.length == 8); 784bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; i++) { 785bf215546Sopenharmony_ci shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); 786bf215546Sopenharmony_ci shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + 787bf215546Sopenharmony_ci z_src_type.length); 788bf215546Sopenharmony_ci } 789bf215546Sopenharmony_ci zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, 790bf215546Sopenharmony_ci LLVMConstVector(&shuffles[0], 791bf215546Sopenharmony_ci z_src_type.length), ""); 792bf215546Sopenharmony_ci zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, 793bf215546Sopenharmony_ci LLVMConstVector(&shuffles[8], 794bf215546Sopenharmony_ci z_src_type.length), ""); 795bf215546Sopenharmony_ci } 796bf215546Sopenharmony_ci zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, 797bf215546Sopenharmony_ci lp_build_vec_type(gallivm, zs_load_type), ""); 798bf215546Sopenharmony_ci zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, 799bf215546Sopenharmony_ci lp_build_vec_type(gallivm, zs_load_type), ""); 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); 803bf215546Sopenharmony_ci if (!is_1d) { 804bf215546Sopenharmony_ci LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); 805bf215546Sopenharmony_ci } 806bf215546Sopenharmony_ci} 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci/** 810bf215546Sopenharmony_ci * Generate code for performing depth and/or stencil tests. 811bf215546Sopenharmony_ci * We operate on a vector of values (typically n 2x2 quads). 812bf215546Sopenharmony_ci * 813bf215546Sopenharmony_ci * \param depth the depth test state 814bf215546Sopenharmony_ci * \param stencil the front/back stencil state 815bf215546Sopenharmony_ci * \param type the data type of the fragment depth/stencil values 816bf215546Sopenharmony_ci * \param format_desc description of the depth/stencil surface 817bf215546Sopenharmony_ci * \param mask the alive/dead pixel mask for the quad (vector) 818bf215546Sopenharmony_ci * \param cov_mask coverage mask 819bf215546Sopenharmony_ci * \param stencil_refs the front/back stencil ref values (scalar) 820bf215546Sopenharmony_ci * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) 821bf215546Sopenharmony_ci * \param zs_dst the depth/stencil values in framebuffer 822bf215546Sopenharmony_ci * \param face contains boolean value indicating front/back facing polygon 823bf215546Sopenharmony_ci */ 824bf215546Sopenharmony_civoid 825bf215546Sopenharmony_cilp_build_depth_stencil_test(struct gallivm_state *gallivm, 826bf215546Sopenharmony_ci const struct lp_depth_state *depth, 827bf215546Sopenharmony_ci const struct pipe_stencil_state stencil[2], 828bf215546Sopenharmony_ci struct lp_type z_src_type, 829bf215546Sopenharmony_ci const struct util_format_description *format_desc, 830bf215546Sopenharmony_ci struct lp_build_mask_context *mask, 831bf215546Sopenharmony_ci LLVMValueRef *cov_mask, 832bf215546Sopenharmony_ci LLVMValueRef stencil_refs[2], 833bf215546Sopenharmony_ci LLVMValueRef z_src, 834bf215546Sopenharmony_ci LLVMValueRef z_fb, 835bf215546Sopenharmony_ci LLVMValueRef s_fb, 836bf215546Sopenharmony_ci LLVMValueRef face, 837bf215546Sopenharmony_ci LLVMValueRef *z_value, 838bf215546Sopenharmony_ci LLVMValueRef *s_value, 839bf215546Sopenharmony_ci boolean do_branch, 840bf215546Sopenharmony_ci bool restrict_depth) 841bf215546Sopenharmony_ci{ 842bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 843bf215546Sopenharmony_ci struct lp_type z_type; 844bf215546Sopenharmony_ci struct lp_build_context z_bld; 845bf215546Sopenharmony_ci struct lp_build_context s_bld; 846bf215546Sopenharmony_ci struct lp_type s_type; 847bf215546Sopenharmony_ci unsigned z_shift = 0, z_width = 0, z_mask = 0; 848bf215546Sopenharmony_ci LLVMValueRef z_dst = NULL; 849bf215546Sopenharmony_ci LLVMValueRef stencil_vals = NULL; 850bf215546Sopenharmony_ci LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; 851bf215546Sopenharmony_ci LLVMValueRef z_pass = NULL, s_pass_mask = NULL; 852bf215546Sopenharmony_ci LLVMValueRef current_mask = mask ? lp_build_mask_value(mask) : *cov_mask; 853bf215546Sopenharmony_ci LLVMValueRef front_facing = NULL; 854bf215546Sopenharmony_ci boolean have_z, have_s; 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci /* 857bf215546Sopenharmony_ci * Depths are expected to be between 0 and 1, even if they are stored in 858bf215546Sopenharmony_ci * floats. Setting these bits here will ensure that the lp_build_conv() call 859bf215546Sopenharmony_ci * below won't try to unnecessarily clamp the incoming values. 860bf215546Sopenharmony_ci * If depths are expected outside 0..1 don't set these bits. 861bf215546Sopenharmony_ci */ 862bf215546Sopenharmony_ci if (z_src_type.floating) { 863bf215546Sopenharmony_ci if (restrict_depth) { 864bf215546Sopenharmony_ci z_src_type.sign = FALSE; 865bf215546Sopenharmony_ci z_src_type.norm = TRUE; 866bf215546Sopenharmony_ci } 867bf215546Sopenharmony_ci } 868bf215546Sopenharmony_ci else { 869bf215546Sopenharmony_ci assert(!z_src_type.sign); 870bf215546Sopenharmony_ci assert(z_src_type.norm); 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci /* Pick the type matching the depth-stencil format. */ 874bf215546Sopenharmony_ci z_type = lp_depth_type(format_desc, z_src_type.length); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci /* Pick the intermediate type for depth operations. */ 877bf215546Sopenharmony_ci z_type.width = z_src_type.width; 878bf215546Sopenharmony_ci assert(z_type.length == z_src_type.length); 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci /* FIXME: for non-float depth/stencil might generate better code 881bf215546Sopenharmony_ci * if we'd always split it up to use 128bit operations. 882bf215546Sopenharmony_ci * For stencil we'd almost certainly want to pack to 8xi16 values, 883bf215546Sopenharmony_ci * for z just run twice. 884bf215546Sopenharmony_ci */ 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci /* Sanity checking */ 887bf215546Sopenharmony_ci { 888bf215546Sopenharmony_ci ASSERTED const unsigned z_swizzle = format_desc->swizzle[0]; 889bf215546Sopenharmony_ci ASSERTED const unsigned s_swizzle = format_desc->swizzle[1]; 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci assert(z_swizzle != PIPE_SWIZZLE_NONE || 892bf215546Sopenharmony_ci s_swizzle != PIPE_SWIZZLE_NONE); 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci assert(depth->enabled || stencil[0].enabled); 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 897bf215546Sopenharmony_ci assert(format_desc->block.width == 1); 898bf215546Sopenharmony_ci assert(format_desc->block.height == 1); 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci if (stencil[0].enabled) { 901bf215546Sopenharmony_ci assert(s_swizzle < 4); 902bf215546Sopenharmony_ci assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); 903bf215546Sopenharmony_ci assert(format_desc->channel[s_swizzle].pure_integer); 904bf215546Sopenharmony_ci assert(!format_desc->channel[s_swizzle].normalized); 905bf215546Sopenharmony_ci assert(format_desc->channel[s_swizzle].size == 8); 906bf215546Sopenharmony_ci } 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci if (depth->enabled) { 909bf215546Sopenharmony_ci assert(z_swizzle < 4); 910bf215546Sopenharmony_ci if (z_type.floating) { 911bf215546Sopenharmony_ci assert(z_swizzle == 0); 912bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].type == 913bf215546Sopenharmony_ci UTIL_FORMAT_TYPE_FLOAT); 914bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].size == 32); 915bf215546Sopenharmony_ci } 916bf215546Sopenharmony_ci else { 917bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].type == 918bf215546Sopenharmony_ci UTIL_FORMAT_TYPE_UNSIGNED); 919bf215546Sopenharmony_ci assert(format_desc->channel[z_swizzle].normalized); 920bf215546Sopenharmony_ci assert(!z_type.fixed); 921bf215546Sopenharmony_ci } 922bf215546Sopenharmony_ci } 923bf215546Sopenharmony_ci } 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci /* Setup build context for Z vals */ 927bf215546Sopenharmony_ci lp_build_context_init(&z_bld, gallivm, z_type); 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci /* Setup build context for stencil vals */ 930bf215546Sopenharmony_ci s_type = lp_int_type(z_type); 931bf215546Sopenharmony_ci lp_build_context_init(&s_bld, gallivm, s_type); 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci /* Compute and apply the Z/stencil bitmasks and shifts. 934bf215546Sopenharmony_ci */ 935bf215546Sopenharmony_ci { 936bf215546Sopenharmony_ci unsigned s_shift, s_mask; 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci z_dst = z_fb; 939bf215546Sopenharmony_ci stencil_vals = s_fb; 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); 942bf215546Sopenharmony_ci have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci if (have_z) { 945bf215546Sopenharmony_ci if (z_mask != 0xffffffff) { 946bf215546Sopenharmony_ci z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); 947bf215546Sopenharmony_ci } 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci /* 950bf215546Sopenharmony_ci * Align the framebuffer Z 's LSB to the right. 951bf215546Sopenharmony_ci */ 952bf215546Sopenharmony_ci if (z_shift) { 953bf215546Sopenharmony_ci LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); 954bf215546Sopenharmony_ci z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); 955bf215546Sopenharmony_ci } else if (z_bitmask) { 956bf215546Sopenharmony_ci z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); 957bf215546Sopenharmony_ci } else { 958bf215546Sopenharmony_ci lp_build_name(z_dst, "z_dst"); 959bf215546Sopenharmony_ci } 960bf215546Sopenharmony_ci } 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci if (have_s) { 963bf215546Sopenharmony_ci if (s_shift) { 964bf215546Sopenharmony_ci LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); 965bf215546Sopenharmony_ci stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); 966bf215546Sopenharmony_ci stencil_shift = shift; /* used below */ 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci if (s_mask != 0xffffffff) { 970bf215546Sopenharmony_ci LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); 971bf215546Sopenharmony_ci stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci lp_build_name(stencil_vals, "s_dst"); 975bf215546Sopenharmony_ci } 976bf215546Sopenharmony_ci } 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci if (stencil[0].enabled) { 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci if (face) { 981bf215546Sopenharmony_ci if (0) { 982bf215546Sopenharmony_ci /* 983bf215546Sopenharmony_ci * XXX: the scalar expansion below produces atrocious code 984bf215546Sopenharmony_ci * (basically producing a 64bit scalar value, then moving the 2 985bf215546Sopenharmony_ci * 32bit pieces separately to simd, plus 4 shuffles, which is 986bf215546Sopenharmony_ci * seriously lame). But the scalar-simd transitions are always 987bf215546Sopenharmony_ci * tricky, so no big surprise there. 988bf215546Sopenharmony_ci * This here would be way better, however llvm has some serious 989bf215546Sopenharmony_ci * trouble later using it in the select, probably because it will 990bf215546Sopenharmony_ci * recognize the expression as constant and move the simd value 991bf215546Sopenharmony_ci * away (out of the loop) - and then it will suddenly try 992bf215546Sopenharmony_ci * constructing i1 high-bit masks out of it later... 993bf215546Sopenharmony_ci * (Try piglit stencil-twoside.) 994bf215546Sopenharmony_ci * Note this is NOT due to using SExt/Trunc, it fails exactly the 995bf215546Sopenharmony_ci * same even when using native compare/select. 996bf215546Sopenharmony_ci * I cannot reproduce this problem when using stand-alone compiler 997bf215546Sopenharmony_ci * though, suggesting some problem with optimization passes... 998bf215546Sopenharmony_ci * (With stand-alone compilation, the construction of this mask 999bf215546Sopenharmony_ci * value, no matter if the easy 3 instruction here or the complex 1000bf215546Sopenharmony_ci * 16+ one below, never gets separated from where it's used.) 1001bf215546Sopenharmony_ci * The scalar code still has the same problem, but the generated 1002bf215546Sopenharmony_ci * code looks a bit better at least for some reason, even if 1003bf215546Sopenharmony_ci * mostly by luck (the fundamental issue clearly is the same). 1004bf215546Sopenharmony_ci */ 1005bf215546Sopenharmony_ci front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face); 1006bf215546Sopenharmony_ci /* front_facing = face != 0 ? ~0 : 0 */ 1007bf215546Sopenharmony_ci front_facing = lp_build_compare(gallivm, s_bld.type, 1008bf215546Sopenharmony_ci PIPE_FUNC_NOTEQUAL, 1009bf215546Sopenharmony_ci front_facing, s_bld.zero); 1010bf215546Sopenharmony_ci } else { 1011bf215546Sopenharmony_ci LLVMValueRef zero = lp_build_const_int32(gallivm, 0); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci /* front_facing = face != 0 ? ~0 : 0 */ 1014bf215546Sopenharmony_ci front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); 1015bf215546Sopenharmony_ci front_facing = LLVMBuildSExt(builder, front_facing, 1016bf215546Sopenharmony_ci LLVMIntTypeInContext(gallivm->context, 1017bf215546Sopenharmony_ci s_bld.type.length*s_bld.type.width), 1018bf215546Sopenharmony_ci ""); 1019bf215546Sopenharmony_ci front_facing = LLVMBuildBitCast(builder, front_facing, 1020bf215546Sopenharmony_ci s_bld.int_vec_type, ""); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci } 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci s_pass_mask = lp_build_stencil_test(&s_bld, stencil, 1026bf215546Sopenharmony_ci stencil_refs, stencil_vals, 1027bf215546Sopenharmony_ci front_facing); 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci /* apply stencil-fail operator */ 1030bf215546Sopenharmony_ci { 1031bf215546Sopenharmony_ci LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask); 1032bf215546Sopenharmony_ci stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, 1033bf215546Sopenharmony_ci stencil_refs, stencil_vals, 1034bf215546Sopenharmony_ci s_fail_mask, front_facing); 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci } 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci if (depth->enabled) { 1039bf215546Sopenharmony_ci /* 1040bf215546Sopenharmony_ci * Convert fragment Z to the desired type, aligning the LSB to the right. 1041bf215546Sopenharmony_ci */ 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci assert(z_type.width == z_src_type.width); 1044bf215546Sopenharmony_ci assert(z_type.length == z_src_type.length); 1045bf215546Sopenharmony_ci assert(lp_check_value(z_src_type, z_src)); 1046bf215546Sopenharmony_ci if (z_src_type.floating) { 1047bf215546Sopenharmony_ci /* 1048bf215546Sopenharmony_ci * Convert from floating point values 1049bf215546Sopenharmony_ci */ 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci if (!z_type.floating) { 1052bf215546Sopenharmony_ci z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, 1053bf215546Sopenharmony_ci z_src_type, 1054bf215546Sopenharmony_ci z_width, 1055bf215546Sopenharmony_ci z_src); 1056bf215546Sopenharmony_ci } 1057bf215546Sopenharmony_ci } else { 1058bf215546Sopenharmony_ci /* 1059bf215546Sopenharmony_ci * Convert from unsigned normalized values. 1060bf215546Sopenharmony_ci */ 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci assert(!z_src_type.sign); 1063bf215546Sopenharmony_ci assert(!z_src_type.fixed); 1064bf215546Sopenharmony_ci assert(z_src_type.norm); 1065bf215546Sopenharmony_ci assert(!z_type.floating); 1066bf215546Sopenharmony_ci if (z_src_type.width > z_width) { 1067bf215546Sopenharmony_ci LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, 1068bf215546Sopenharmony_ci z_src_type.width - z_width); 1069bf215546Sopenharmony_ci z_src = LLVMBuildLShr(builder, z_src, shift, ""); 1070bf215546Sopenharmony_ci } 1071bf215546Sopenharmony_ci } 1072bf215546Sopenharmony_ci assert(lp_check_value(z_type, z_src)); 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci lp_build_name(z_src, "z_src"); 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci /* compare src Z to dst Z, returning 'pass' mask */ 1077bf215546Sopenharmony_ci z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci /* mask off bits that failed stencil test */ 1080bf215546Sopenharmony_ci if (s_pass_mask) { 1081bf215546Sopenharmony_ci current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); 1082bf215546Sopenharmony_ci } 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci if (!stencil[0].enabled && mask) { 1085bf215546Sopenharmony_ci /* We can potentially skip all remaining operations here, but only 1086bf215546Sopenharmony_ci * if stencil is disabled because we still need to update the stencil 1087bf215546Sopenharmony_ci * buffer values. Don't need to update Z buffer values. 1088bf215546Sopenharmony_ci */ 1089bf215546Sopenharmony_ci lp_build_mask_update(mask, z_pass); 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci if (do_branch) { 1092bf215546Sopenharmony_ci lp_build_mask_check(mask); 1093bf215546Sopenharmony_ci } 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci if (depth->writemask) { 1097bf215546Sopenharmony_ci LLVMValueRef z_pass_mask; 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci /* mask off bits that failed Z test */ 1100bf215546Sopenharmony_ci z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci /* Mix the old and new Z buffer values. 1103bf215546Sopenharmony_ci * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] 1104bf215546Sopenharmony_ci */ 1105bf215546Sopenharmony_ci z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst); 1106bf215546Sopenharmony_ci } 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci if (stencil[0].enabled) { 1109bf215546Sopenharmony_ci /* update stencil buffer values according to z pass/fail result */ 1110bf215546Sopenharmony_ci LLVMValueRef z_fail_mask, z_pass_mask; 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci /* apply Z-fail operator */ 1113bf215546Sopenharmony_ci z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass); 1114bf215546Sopenharmony_ci stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, 1115bf215546Sopenharmony_ci stencil_refs, stencil_vals, 1116bf215546Sopenharmony_ci z_fail_mask, front_facing); 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci /* apply Z-pass operator */ 1119bf215546Sopenharmony_ci z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); 1120bf215546Sopenharmony_ci stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 1121bf215546Sopenharmony_ci stencil_refs, stencil_vals, 1122bf215546Sopenharmony_ci z_pass_mask, front_facing); 1123bf215546Sopenharmony_ci } 1124bf215546Sopenharmony_ci } 1125bf215546Sopenharmony_ci else { 1126bf215546Sopenharmony_ci /* No depth test: apply Z-pass operator to stencil buffer values which 1127bf215546Sopenharmony_ci * passed the stencil test. 1128bf215546Sopenharmony_ci */ 1129bf215546Sopenharmony_ci s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); 1130bf215546Sopenharmony_ci stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 1131bf215546Sopenharmony_ci stencil_refs, stencil_vals, 1132bf215546Sopenharmony_ci s_pass_mask, front_facing); 1133bf215546Sopenharmony_ci } 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci /* Put Z and stencil bits in the right place */ 1136bf215546Sopenharmony_ci if (have_z && z_shift) { 1137bf215546Sopenharmony_ci LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); 1138bf215546Sopenharmony_ci z_dst = LLVMBuildShl(builder, z_dst, shift, ""); 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci if (stencil_vals && stencil_shift) 1141bf215546Sopenharmony_ci stencil_vals = LLVMBuildShl(builder, stencil_vals, 1142bf215546Sopenharmony_ci stencil_shift, ""); 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci /* Finally, merge the z/stencil values */ 1145bf215546Sopenharmony_ci if (format_desc->block.bits <= 32) { 1146bf215546Sopenharmony_ci if (have_z && have_s) 1147bf215546Sopenharmony_ci *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); 1148bf215546Sopenharmony_ci else if (have_z) 1149bf215546Sopenharmony_ci *z_value = z_dst; 1150bf215546Sopenharmony_ci else 1151bf215546Sopenharmony_ci *z_value = stencil_vals; 1152bf215546Sopenharmony_ci *s_value = *z_value; 1153bf215546Sopenharmony_ci } 1154bf215546Sopenharmony_ci else { 1155bf215546Sopenharmony_ci *z_value = z_dst; 1156bf215546Sopenharmony_ci *s_value = stencil_vals; 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci if (mask) { 1160bf215546Sopenharmony_ci if (s_pass_mask) 1161bf215546Sopenharmony_ci lp_build_mask_update(mask, s_pass_mask); 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci if (depth->enabled && stencil[0].enabled) 1164bf215546Sopenharmony_ci lp_build_mask_update(mask, z_pass); 1165bf215546Sopenharmony_ci } else { 1166bf215546Sopenharmony_ci LLVMValueRef tmp_mask = *cov_mask; 1167bf215546Sopenharmony_ci if (s_pass_mask) 1168bf215546Sopenharmony_ci tmp_mask = LLVMBuildAnd(builder, tmp_mask, s_pass_mask, ""); 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci /* for multisample we don't do the stencil optimisation so update always */ 1171bf215546Sopenharmony_ci if (depth->enabled) 1172bf215546Sopenharmony_ci tmp_mask = LLVMBuildAnd(builder, tmp_mask, z_pass, ""); 1173bf215546Sopenharmony_ci *cov_mask = tmp_mask; 1174bf215546Sopenharmony_ci } 1175bf215546Sopenharmony_ci} 1176bf215546Sopenharmony_ci 1177