1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2009-2010 VMware, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/**
29bf215546Sopenharmony_ci * @file
30bf215546Sopenharmony_ci * Depth/stencil testing to LLVM IR translation.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * To be done accurately/efficiently the depth/stencil test must be done with
33bf215546Sopenharmony_ci * the same type/format of the depth/stencil buffer, which implies massaging
34bf215546Sopenharmony_ci * the incoming depths to fit into place. Using a more straightforward
35bf215546Sopenharmony_ci * type/format for depth/stencil values internally and only convert when
36bf215546Sopenharmony_ci * flushing would avoid this, but it would most likely result in depth fighting
37bf215546Sopenharmony_ci * artifacts.
38bf215546Sopenharmony_ci *
39bf215546Sopenharmony_ci * Since we're using linear layout for everything, but we need to deal with
40bf215546Sopenharmony_ci * 2x2 quads, we need to load/store multiple values and swizzle them into
41bf215546Sopenharmony_ci * place (we could avoid this by doing depth/stencil testing in linear format,
42bf215546Sopenharmony_ci * which would be easy for late depth/stencil test as we could do that after
43bf215546Sopenharmony_ci * the fragment shader loop just as we do for color buffers, but more tricky
44bf215546Sopenharmony_ci * for early depth test as we'd need both masks and interpolated depth in
45bf215546Sopenharmony_ci * linear format).
46bf215546Sopenharmony_ci *
47bf215546Sopenharmony_ci *
48bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com>
49bf215546Sopenharmony_ci * @author Brian Paul <jfonseca@vmware.com>
50bf215546Sopenharmony_ci */
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci#include "pipe/p_state.h"
53bf215546Sopenharmony_ci#include "util/format/u_format.h"
54bf215546Sopenharmony_ci#include "util/u_cpu_detect.h"
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci#include "gallivm/lp_bld_type.h"
57bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h"
58bf215546Sopenharmony_ci#include "gallivm/lp_bld_bitarit.h"
59bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h"
60bf215546Sopenharmony_ci#include "gallivm/lp_bld_conv.h"
61bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h"
62bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h"
63bf215546Sopenharmony_ci#include "gallivm/lp_bld_intr.h"
64bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h"
65bf215546Sopenharmony_ci#include "gallivm/lp_bld_swizzle.h"
66bf215546Sopenharmony_ci#include "gallivm/lp_bld_pack.h"
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci#include "lp_bld_depth.h"
69bf215546Sopenharmony_ci#include "lp_state_fs.h"
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci/** Used to select fields from pipe_stencil_state */
73bf215546Sopenharmony_cienum stencil_op {
74bf215546Sopenharmony_ci   S_FAIL_OP,
75bf215546Sopenharmony_ci   Z_FAIL_OP,
76bf215546Sopenharmony_ci   Z_PASS_OP
77bf215546Sopenharmony_ci};
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci/**
82bf215546Sopenharmony_ci * Do the stencil test comparison (compare FB stencil values against ref value).
83bf215546Sopenharmony_ci * This will be used twice when generating two-sided stencil code.
84bf215546Sopenharmony_ci * \param stencil  the front/back stencil state
85bf215546Sopenharmony_ci * \param stencilRef  the stencil reference value, replicated as a vector
86bf215546Sopenharmony_ci * \param stencilVals  vector of stencil values from framebuffer
87bf215546Sopenharmony_ci * \return vector mask of pass/fail values (~0 or 0)
88bf215546Sopenharmony_ci */
89bf215546Sopenharmony_cistatic LLVMValueRef
90bf215546Sopenharmony_cilp_build_stencil_test_single(struct lp_build_context *bld,
91bf215546Sopenharmony_ci                             const struct pipe_stencil_state *stencil,
92bf215546Sopenharmony_ci                             LLVMValueRef stencilRef,
93bf215546Sopenharmony_ci                             LLVMValueRef stencilVals)
94bf215546Sopenharmony_ci{
95bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
96bf215546Sopenharmony_ci   const unsigned stencilMax = 255; /* XXX fix */
97bf215546Sopenharmony_ci   struct lp_type type = bld->type;
98bf215546Sopenharmony_ci   LLVMValueRef res;
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   /*
101bf215546Sopenharmony_ci    * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
102bf215546Sopenharmony_ci    * are between 0..255 so ensure we generate the fastest comparisons for
103bf215546Sopenharmony_ci    * wider elements.
104bf215546Sopenharmony_ci    */
105bf215546Sopenharmony_ci   if (type.width <= 8) {
106bf215546Sopenharmony_ci      assert(!type.sign);
107bf215546Sopenharmony_ci   } else {
108bf215546Sopenharmony_ci      assert(type.sign);
109bf215546Sopenharmony_ci   }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   assert(stencil->enabled);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   if (stencil->valuemask != stencilMax) {
114bf215546Sopenharmony_ci      /* compute stencilRef = stencilRef & valuemask */
115bf215546Sopenharmony_ci      LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
116bf215546Sopenharmony_ci      stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
117bf215546Sopenharmony_ci      /* compute stencilVals = stencilVals & valuemask */
118bf215546Sopenharmony_ci      stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
119bf215546Sopenharmony_ci   }
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   return res;
124bf215546Sopenharmony_ci}
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci/**
128bf215546Sopenharmony_ci * Do the one or two-sided stencil test comparison.
129bf215546Sopenharmony_ci * \sa lp_build_stencil_test_single
130bf215546Sopenharmony_ci * \param front_facing  an integer vector mask, indicating front (~0) or back
131bf215546Sopenharmony_ci *                      (0) facing polygon. If NULL, assume front-facing.
132bf215546Sopenharmony_ci */
133bf215546Sopenharmony_cistatic LLVMValueRef
134bf215546Sopenharmony_cilp_build_stencil_test(struct lp_build_context *bld,
135bf215546Sopenharmony_ci                      const struct pipe_stencil_state stencil[2],
136bf215546Sopenharmony_ci                      LLVMValueRef stencilRefs[2],
137bf215546Sopenharmony_ci                      LLVMValueRef stencilVals,
138bf215546Sopenharmony_ci                      LLVMValueRef front_facing)
139bf215546Sopenharmony_ci{
140bf215546Sopenharmony_ci   LLVMValueRef res;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   assert(stencil[0].enabled);
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   /* do front face test */
145bf215546Sopenharmony_ci   res = lp_build_stencil_test_single(bld, &stencil[0],
146bf215546Sopenharmony_ci                                      stencilRefs[0], stencilVals);
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   if (stencil[1].enabled && front_facing != NULL) {
149bf215546Sopenharmony_ci      /* do back face test */
150bf215546Sopenharmony_ci      LLVMValueRef back_res;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci      back_res = lp_build_stencil_test_single(bld, &stencil[1],
153bf215546Sopenharmony_ci                                              stencilRefs[1], stencilVals);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci      res = lp_build_select(bld, front_facing, res, back_res);
156bf215546Sopenharmony_ci   }
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   return res;
159bf215546Sopenharmony_ci}
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci/**
163bf215546Sopenharmony_ci * Apply the stencil operator (add/sub/keep/etc) to the given vector
164bf215546Sopenharmony_ci * of stencil values.
165bf215546Sopenharmony_ci * \return  new stencil values vector
166bf215546Sopenharmony_ci */
167bf215546Sopenharmony_cistatic LLVMValueRef
168bf215546Sopenharmony_cilp_build_stencil_op_single(struct lp_build_context *bld,
169bf215546Sopenharmony_ci                           const struct pipe_stencil_state *stencil,
170bf215546Sopenharmony_ci                           enum stencil_op op,
171bf215546Sopenharmony_ci                           LLVMValueRef stencilRef,
172bf215546Sopenharmony_ci                           LLVMValueRef stencilVals)
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci{
175bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
176bf215546Sopenharmony_ci   struct lp_type type = bld->type;
177bf215546Sopenharmony_ci   LLVMValueRef res;
178bf215546Sopenharmony_ci   LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
179bf215546Sopenharmony_ci   unsigned stencil_op;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   assert(type.sign);
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   switch (op) {
184bf215546Sopenharmony_ci   case S_FAIL_OP:
185bf215546Sopenharmony_ci      stencil_op = stencil->fail_op;
186bf215546Sopenharmony_ci      break;
187bf215546Sopenharmony_ci   case Z_FAIL_OP:
188bf215546Sopenharmony_ci      stencil_op = stencil->zfail_op;
189bf215546Sopenharmony_ci      break;
190bf215546Sopenharmony_ci   case Z_PASS_OP:
191bf215546Sopenharmony_ci      stencil_op = stencil->zpass_op;
192bf215546Sopenharmony_ci      break;
193bf215546Sopenharmony_ci   default:
194bf215546Sopenharmony_ci      assert(0 && "Invalid stencil_op mode");
195bf215546Sopenharmony_ci      stencil_op = PIPE_STENCIL_OP_KEEP;
196bf215546Sopenharmony_ci   }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   switch (stencil_op) {
199bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_KEEP:
200bf215546Sopenharmony_ci      res = stencilVals;
201bf215546Sopenharmony_ci      /* we can return early for this case */
202bf215546Sopenharmony_ci      return res;
203bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_ZERO:
204bf215546Sopenharmony_ci      res = bld->zero;
205bf215546Sopenharmony_ci      break;
206bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_REPLACE:
207bf215546Sopenharmony_ci      res = stencilRef;
208bf215546Sopenharmony_ci      break;
209bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_INCR:
210bf215546Sopenharmony_ci      res = lp_build_add(bld, stencilVals, bld->one);
211bf215546Sopenharmony_ci      res = lp_build_min(bld, res, max);
212bf215546Sopenharmony_ci      break;
213bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_DECR:
214bf215546Sopenharmony_ci      res = lp_build_sub(bld, stencilVals, bld->one);
215bf215546Sopenharmony_ci      res = lp_build_max(bld, res, bld->zero);
216bf215546Sopenharmony_ci      break;
217bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_INCR_WRAP:
218bf215546Sopenharmony_ci      res = lp_build_add(bld, stencilVals, bld->one);
219bf215546Sopenharmony_ci      res = LLVMBuildAnd(builder, res, max, "");
220bf215546Sopenharmony_ci      break;
221bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_DECR_WRAP:
222bf215546Sopenharmony_ci      res = lp_build_sub(bld, stencilVals, bld->one);
223bf215546Sopenharmony_ci      res = LLVMBuildAnd(builder, res, max, "");
224bf215546Sopenharmony_ci      break;
225bf215546Sopenharmony_ci   case PIPE_STENCIL_OP_INVERT:
226bf215546Sopenharmony_ci      res = LLVMBuildNot(builder, stencilVals, "");
227bf215546Sopenharmony_ci      res = LLVMBuildAnd(builder, res, max, "");
228bf215546Sopenharmony_ci      break;
229bf215546Sopenharmony_ci   default:
230bf215546Sopenharmony_ci      assert(0 && "bad stencil op mode");
231bf215546Sopenharmony_ci      res = bld->undef;
232bf215546Sopenharmony_ci   }
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci   return res;
235bf215546Sopenharmony_ci}
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci/**
239bf215546Sopenharmony_ci * Do the one or two-sided stencil test op/update.
240bf215546Sopenharmony_ci */
241bf215546Sopenharmony_cistatic LLVMValueRef
242bf215546Sopenharmony_cilp_build_stencil_op(struct lp_build_context *bld,
243bf215546Sopenharmony_ci                    const struct pipe_stencil_state stencil[2],
244bf215546Sopenharmony_ci                    enum stencil_op op,
245bf215546Sopenharmony_ci                    LLVMValueRef stencilRefs[2],
246bf215546Sopenharmony_ci                    LLVMValueRef stencilVals,
247bf215546Sopenharmony_ci                    LLVMValueRef mask,
248bf215546Sopenharmony_ci                    LLVMValueRef front_facing)
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
252bf215546Sopenharmony_ci   LLVMValueRef res;
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   assert(stencil[0].enabled);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   /* do front face op */
257bf215546Sopenharmony_ci   res = lp_build_stencil_op_single(bld, &stencil[0], op,
258bf215546Sopenharmony_ci                                     stencilRefs[0], stencilVals);
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci   if (stencil[1].enabled && front_facing != NULL) {
261bf215546Sopenharmony_ci      /* do back face op */
262bf215546Sopenharmony_ci      LLVMValueRef back_res;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
265bf215546Sopenharmony_ci                                            stencilRefs[1], stencilVals);
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci      res = lp_build_select(bld, front_facing, res, back_res);
268bf215546Sopenharmony_ci   }
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   if (stencil[0].writemask != 0xff ||
271bf215546Sopenharmony_ci       (stencil[1].enabled && front_facing != NULL &&
272bf215546Sopenharmony_ci        stencil[1].writemask != 0xff)) {
273bf215546Sopenharmony_ci      /* mask &= stencil[0].writemask */
274bf215546Sopenharmony_ci      LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
275bf215546Sopenharmony_ci                                                      stencil[0].writemask);
276bf215546Sopenharmony_ci      if (stencil[1].enabled &&
277bf215546Sopenharmony_ci          stencil[1].writemask != stencil[0].writemask &&
278bf215546Sopenharmony_ci          front_facing != NULL) {
279bf215546Sopenharmony_ci         LLVMValueRef back_writemask =
280bf215546Sopenharmony_ci            lp_build_const_int_vec(bld->gallivm, bld->type,
281bf215546Sopenharmony_ci                                   stencil[1].writemask);
282bf215546Sopenharmony_ci         writemask = lp_build_select(bld, front_facing,
283bf215546Sopenharmony_ci                                     writemask, back_writemask);
284bf215546Sopenharmony_ci      }
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci      mask = LLVMBuildAnd(builder, mask, writemask, "");
287bf215546Sopenharmony_ci      /* res = (res & mask) | (stencilVals & ~mask) */
288bf215546Sopenharmony_ci      res = lp_build_select_bitwise(bld, mask, res, stencilVals);
289bf215546Sopenharmony_ci   }
290bf215546Sopenharmony_ci   else {
291bf215546Sopenharmony_ci      /* res = mask ? res : stencilVals */
292bf215546Sopenharmony_ci      res = lp_build_select(bld, mask, res, stencilVals);
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   return res;
296bf215546Sopenharmony_ci}
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci/**
301bf215546Sopenharmony_ci * Return a type that matches the depth/stencil format.
302bf215546Sopenharmony_ci */
303bf215546Sopenharmony_cistruct lp_type
304bf215546Sopenharmony_cilp_depth_type(const struct util_format_description *format_desc,
305bf215546Sopenharmony_ci              unsigned length)
306bf215546Sopenharmony_ci{
307bf215546Sopenharmony_ci   struct lp_type type;
308bf215546Sopenharmony_ci   unsigned z_swizzle;
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
311bf215546Sopenharmony_ci   assert(format_desc->block.width == 1);
312bf215546Sopenharmony_ci   assert(format_desc->block.height == 1);
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   memset(&type, 0, sizeof type);
315bf215546Sopenharmony_ci   type.width = format_desc->block.bits;
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   z_swizzle = format_desc->swizzle[0];
318bf215546Sopenharmony_ci   if (z_swizzle < 4) {
319bf215546Sopenharmony_ci      if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
320bf215546Sopenharmony_ci         type.floating = TRUE;
321bf215546Sopenharmony_ci         assert(z_swizzle == 0);
322bf215546Sopenharmony_ci         assert(format_desc->channel[z_swizzle].size == 32);
323bf215546Sopenharmony_ci      }
324bf215546Sopenharmony_ci      else if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
325bf215546Sopenharmony_ci         assert(format_desc->block.bits <= 32);
326bf215546Sopenharmony_ci         assert(format_desc->channel[z_swizzle].normalized);
327bf215546Sopenharmony_ci         if (format_desc->channel[z_swizzle].size < format_desc->block.bits) {
328bf215546Sopenharmony_ci            /* Prefer signed integers when possible, as SSE has less support
329bf215546Sopenharmony_ci             * for unsigned comparison;
330bf215546Sopenharmony_ci             */
331bf215546Sopenharmony_ci            type.sign = TRUE;
332bf215546Sopenharmony_ci         }
333bf215546Sopenharmony_ci      }
334bf215546Sopenharmony_ci      else
335bf215546Sopenharmony_ci         assert(0);
336bf215546Sopenharmony_ci   }
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   type.length = length;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   return type;
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci/**
345bf215546Sopenharmony_ci * Compute bitmask and bit shift to apply to the incoming fragment Z values
346bf215546Sopenharmony_ci * and the Z buffer values needed before doing the Z comparison.
347bf215546Sopenharmony_ci *
348bf215546Sopenharmony_ci * Note that we leave the Z bits in the position that we find them
349bf215546Sopenharmony_ci * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
350bf215546Sopenharmony_ci * get by with fewer bit twiddling steps.
351bf215546Sopenharmony_ci */
352bf215546Sopenharmony_cistatic boolean
353bf215546Sopenharmony_ciget_z_shift_and_mask(const struct util_format_description *format_desc,
354bf215546Sopenharmony_ci                     unsigned *shift, unsigned *width, unsigned *mask)
355bf215546Sopenharmony_ci{
356bf215546Sopenharmony_ci   unsigned total_bits;
357bf215546Sopenharmony_ci   unsigned z_swizzle;
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
360bf215546Sopenharmony_ci   assert(format_desc->block.width == 1);
361bf215546Sopenharmony_ci   assert(format_desc->block.height == 1);
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci   /* 64bit d/s format is special already extracted 32 bits */
364bf215546Sopenharmony_ci   total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   z_swizzle = format_desc->swizzle[0];
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   if (z_swizzle == PIPE_SWIZZLE_NONE)
369bf215546Sopenharmony_ci      return FALSE;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   *width = format_desc->channel[z_swizzle].size;
372bf215546Sopenharmony_ci   /* & 31 is for the same reason as the 32-bit limit above */
373bf215546Sopenharmony_ci   *shift = format_desc->channel[z_swizzle].shift & 31;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   if (*width == total_bits) {
376bf215546Sopenharmony_ci      *mask = 0xffffffff;
377bf215546Sopenharmony_ci   } else {
378bf215546Sopenharmony_ci      *mask = ((1 << *width) - 1) << *shift;
379bf215546Sopenharmony_ci   }
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   return TRUE;
382bf215546Sopenharmony_ci}
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci/**
386bf215546Sopenharmony_ci * Compute bitmask and bit shift to apply to the framebuffer pixel values
387bf215546Sopenharmony_ci * to put the stencil bits in the least significant position.
388bf215546Sopenharmony_ci * (i.e. 0x000000ff)
389bf215546Sopenharmony_ci */
390bf215546Sopenharmony_cistatic boolean
391bf215546Sopenharmony_ciget_s_shift_and_mask(const struct util_format_description *format_desc,
392bf215546Sopenharmony_ci                     unsigned *shift, unsigned *mask)
393bf215546Sopenharmony_ci{
394bf215546Sopenharmony_ci   const unsigned s_swizzle = format_desc->swizzle[1];
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   if (s_swizzle == PIPE_SWIZZLE_NONE)
397bf215546Sopenharmony_ci      return FALSE;
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   /* just special case 64bit d/s format */
400bf215546Sopenharmony_ci   if (format_desc->block.bits > 32) {
401bf215546Sopenharmony_ci      /* XXX big-endian? */
402bf215546Sopenharmony_ci      assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
403bf215546Sopenharmony_ci      *shift = 0;
404bf215546Sopenharmony_ci      *mask = 0xff;
405bf215546Sopenharmony_ci      return TRUE;
406bf215546Sopenharmony_ci   }
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   *shift = format_desc->channel[s_swizzle].shift;
409bf215546Sopenharmony_ci   const unsigned sz = format_desc->channel[s_swizzle].size;
410bf215546Sopenharmony_ci   *mask = (1U << sz) - 1U;
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   return TRUE;
413bf215546Sopenharmony_ci}
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci/**
417bf215546Sopenharmony_ci * Perform the occlusion test and increase the counter.
418bf215546Sopenharmony_ci * Test the depth mask. Add the number of channel which has none zero mask
419bf215546Sopenharmony_ci * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
420bf215546Sopenharmony_ci * The counter will add 4.
421bf215546Sopenharmony_ci * TODO: could get that out of the fs loop.
422bf215546Sopenharmony_ci *
423bf215546Sopenharmony_ci * \param type holds element type of the mask vector.
424bf215546Sopenharmony_ci * \param maskvalue is the depth test mask.
425bf215546Sopenharmony_ci * \param counter is a pointer of the uint32 counter.
426bf215546Sopenharmony_ci */
427bf215546Sopenharmony_civoid
428bf215546Sopenharmony_cilp_build_occlusion_count(struct gallivm_state *gallivm,
429bf215546Sopenharmony_ci                         struct lp_type type,
430bf215546Sopenharmony_ci                         LLVMValueRef maskvalue,
431bf215546Sopenharmony_ci                         LLVMValueRef counter)
432bf215546Sopenharmony_ci{
433bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
434bf215546Sopenharmony_ci   LLVMContextRef context = gallivm->context;
435bf215546Sopenharmony_ci   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
436bf215546Sopenharmony_ci   LLVMValueRef count, newcount;
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   assert(type.length <= 16);
439bf215546Sopenharmony_ci   assert(type.floating);
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   if (util_get_cpu_caps()->has_sse && type.length == 4) {
442bf215546Sopenharmony_ci      const char *movmskintr = "llvm.x86.sse.movmsk.ps";
443bf215546Sopenharmony_ci      const char *popcntintr = "llvm.ctpop.i32";
444bf215546Sopenharmony_ci      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
445bf215546Sopenharmony_ci                                           lp_build_vec_type(gallivm, type), "");
446bf215546Sopenharmony_ci      bits = lp_build_intrinsic_unary(builder, movmskintr,
447bf215546Sopenharmony_ci                                      LLVMInt32TypeInContext(context), bits);
448bf215546Sopenharmony_ci      count = lp_build_intrinsic_unary(builder, popcntintr,
449bf215546Sopenharmony_ci                                       LLVMInt32TypeInContext(context), bits);
450bf215546Sopenharmony_ci      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
451bf215546Sopenharmony_ci   }
452bf215546Sopenharmony_ci   else if (util_get_cpu_caps()->has_avx && type.length == 8) {
453bf215546Sopenharmony_ci      const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
454bf215546Sopenharmony_ci      const char *popcntintr = "llvm.ctpop.i32";
455bf215546Sopenharmony_ci      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
456bf215546Sopenharmony_ci                                           lp_build_vec_type(gallivm, type), "");
457bf215546Sopenharmony_ci      bits = lp_build_intrinsic_unary(builder, movmskintr,
458bf215546Sopenharmony_ci                                      LLVMInt32TypeInContext(context), bits);
459bf215546Sopenharmony_ci      count = lp_build_intrinsic_unary(builder, popcntintr,
460bf215546Sopenharmony_ci                                       LLVMInt32TypeInContext(context), bits);
461bf215546Sopenharmony_ci      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
462bf215546Sopenharmony_ci   }
463bf215546Sopenharmony_ci   else {
464bf215546Sopenharmony_ci      LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
465bf215546Sopenharmony_ci      LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
466bf215546Sopenharmony_ci      LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
467bf215546Sopenharmony_ci      LLVMValueRef shufflev, countd;
468bf215546Sopenharmony_ci      LLVMValueRef shuffles[16];
469bf215546Sopenharmony_ci      const char *popcntintr = NULL;
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci      countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci       for (unsigned i = 0; i < type.length; i++) {
474bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN
475bf215546Sopenharmony_ci          shuffles[i] = lp_build_const_int32(gallivm, 4*i);
476bf215546Sopenharmony_ci#else
477bf215546Sopenharmony_ci          shuffles[i] = lp_build_const_int32(gallivm, (4*i) + 3);
478bf215546Sopenharmony_ci#endif
479bf215546Sopenharmony_ci       }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci       shufflev = LLVMConstVector(shuffles, type.length);
482bf215546Sopenharmony_ci       countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
483bf215546Sopenharmony_ci       countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci       /*
486bf215546Sopenharmony_ci        * XXX FIXME
487bf215546Sopenharmony_ci        * this is bad on cpus without popcount (on x86 supported by intel
488bf215546Sopenharmony_ci        * nehalem, amd barcelona, and up - not tied to sse42).
489bf215546Sopenharmony_ci        * Would be much faster to just sum the 4 elements of the vector with
490bf215546Sopenharmony_ci        * some horizontal add (shuffle/add/shuffle/add after the initial and).
491bf215546Sopenharmony_ci        */
492bf215546Sopenharmony_ci       switch (type.length) {
493bf215546Sopenharmony_ci       case 4:
494bf215546Sopenharmony_ci          popcntintr = "llvm.ctpop.i32";
495bf215546Sopenharmony_ci          break;
496bf215546Sopenharmony_ci       case 8:
497bf215546Sopenharmony_ci          popcntintr = "llvm.ctpop.i64";
498bf215546Sopenharmony_ci          break;
499bf215546Sopenharmony_ci       case 16:
500bf215546Sopenharmony_ci          popcntintr = "llvm.ctpop.i128";
501bf215546Sopenharmony_ci          break;
502bf215546Sopenharmony_ci       default:
503bf215546Sopenharmony_ci          assert(0);
504bf215546Sopenharmony_ci       }
505bf215546Sopenharmony_ci       count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci       if (type.length > 8) {
508bf215546Sopenharmony_ci          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
509bf215546Sopenharmony_ci       }
510bf215546Sopenharmony_ci       else if (type.length < 8) {
511bf215546Sopenharmony_ci          count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
512bf215546Sopenharmony_ci       }
513bf215546Sopenharmony_ci   }
514bf215546Sopenharmony_ci   newcount = LLVMBuildLoad2(builder, LLVMTypeOf(count), counter, "origcount");
515bf215546Sopenharmony_ci   newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
516bf215546Sopenharmony_ci   LLVMBuildStore(builder, newcount, counter);
517bf215546Sopenharmony_ci}
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci/**
521bf215546Sopenharmony_ci * Load depth/stencil values.
522bf215546Sopenharmony_ci * The stored values are linear, swizzle them.
523bf215546Sopenharmony_ci *
524bf215546Sopenharmony_ci * \param type  the data type of the fragment depth/stencil values
525bf215546Sopenharmony_ci * \param format_desc  description of the depth/stencil surface
526bf215546Sopenharmony_ci * \param is_1d  whether this resource has only one dimension
527bf215546Sopenharmony_ci * \param loop_counter  the current loop iteration
528bf215546Sopenharmony_ci * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
529bf215546Sopenharmony_ci * \param depth_stride  stride of the depth/stencil buffer
530bf215546Sopenharmony_ci * \param z_fb  contains z values loaded from fb (may include padding)
531bf215546Sopenharmony_ci * \param s_fb  contains s values loaded from fb (may include padding)
532bf215546Sopenharmony_ci */
533bf215546Sopenharmony_civoid
534bf215546Sopenharmony_cilp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
535bf215546Sopenharmony_ci                                     struct lp_type z_src_type,
536bf215546Sopenharmony_ci                                     const struct util_format_description *format_desc,
537bf215546Sopenharmony_ci                                     boolean is_1d,
538bf215546Sopenharmony_ci                                     LLVMValueRef depth_ptr,
539bf215546Sopenharmony_ci                                     LLVMValueRef depth_stride,
540bf215546Sopenharmony_ci                                     LLVMValueRef *z_fb,
541bf215546Sopenharmony_ci                                     LLVMValueRef *s_fb,
542bf215546Sopenharmony_ci                                     LLVMValueRef loop_counter)
543bf215546Sopenharmony_ci{
544bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
545bf215546Sopenharmony_ci   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
546bf215546Sopenharmony_ci   LLVMValueRef depth_offset1, depth_offset2;
547bf215546Sopenharmony_ci   const unsigned depth_bytes = format_desc->block.bits / 8;
548bf215546Sopenharmony_ci   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci   struct lp_type zs_load_type = zs_type;
551bf215546Sopenharmony_ci   zs_load_type.length = zs_load_type.length / 2;
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   LLVMTypeRef zs_dst_type = lp_build_vec_type(gallivm, zs_load_type);
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci   if (z_src_type.length == 4) {
556bf215546Sopenharmony_ci      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
557bf215546Sopenharmony_ci                                          lp_build_const_int32(gallivm, 1), "");
558bf215546Sopenharmony_ci      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
559bf215546Sopenharmony_ci                                          lp_build_const_int32(gallivm, 2), "");
560bf215546Sopenharmony_ci      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
561bf215546Sopenharmony_ci                                          depth_stride, "");
562bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildMul(builder, looplsb,
563bf215546Sopenharmony_ci                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
564bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci      /* just concatenate the loaded 2x2 values into 4-wide vector */
567bf215546Sopenharmony_ci      for (unsigned i = 0; i < 4; i++) {
568bf215546Sopenharmony_ci         shuffles[i] = lp_build_const_int32(gallivm, i);
569bf215546Sopenharmony_ci      }
570bf215546Sopenharmony_ci   }
571bf215546Sopenharmony_ci   else {
572bf215546Sopenharmony_ci      unsigned i;
573bf215546Sopenharmony_ci      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
574bf215546Sopenharmony_ci                                         lp_build_const_int32(gallivm, 1), "");
575bf215546Sopenharmony_ci      assert(z_src_type.length == 8);
576bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
577bf215546Sopenharmony_ci      /*
578bf215546Sopenharmony_ci       * We load 2x4 values, and need to swizzle them (order
579bf215546Sopenharmony_ci       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
580bf215546Sopenharmony_ci       */
581bf215546Sopenharmony_ci      for (i = 0; i < 8; i++) {
582bf215546Sopenharmony_ci         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
583bf215546Sopenharmony_ci      }
584bf215546Sopenharmony_ci   }
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   /* Load current z/stencil values from z/stencil buffer */
589bf215546Sopenharmony_ci   LLVMTypeRef load_ptr_type = LLVMPointerType(zs_dst_type, 0);
590bf215546Sopenharmony_ci   LLVMValueRef zs_dst_ptr =
591bf215546Sopenharmony_ci      LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
592bf215546Sopenharmony_ci   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
593bf215546Sopenharmony_ci   LLVMValueRef zs_dst1 = LLVMBuildLoad2(builder, zs_dst_type, zs_dst_ptr, "");
594bf215546Sopenharmony_ci   LLVMValueRef zs_dst2;
595bf215546Sopenharmony_ci   if (is_1d) {
596bf215546Sopenharmony_ci      zs_dst2 = lp_build_undef(gallivm, zs_load_type);
597bf215546Sopenharmony_ci   }
598bf215546Sopenharmony_ci   else {
599bf215546Sopenharmony_ci      zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
600bf215546Sopenharmony_ci      zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
601bf215546Sopenharmony_ci      zs_dst2 = LLVMBuildLoad2(builder, zs_dst_type, zs_dst_ptr, "");
602bf215546Sopenharmony_ci   }
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
605bf215546Sopenharmony_ci                                  LLVMConstVector(shuffles, zs_type.length), "");
606bf215546Sopenharmony_ci   *s_fb = *z_fb;
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   if (format_desc->block.bits == 8) {
609bf215546Sopenharmony_ci      /* Extend stencil-only 8 bit values (S8_UINT) */
610bf215546Sopenharmony_ci      *s_fb = LLVMBuildZExt(builder, *s_fb,
611bf215546Sopenharmony_ci                            lp_build_int_vec_type(gallivm, z_src_type), "");
612bf215546Sopenharmony_ci   }
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   if (format_desc->block.bits < z_src_type.width) {
615bf215546Sopenharmony_ci      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
616bf215546Sopenharmony_ci      *z_fb = LLVMBuildZExt(builder, *z_fb,
617bf215546Sopenharmony_ci                            lp_build_int_vec_type(gallivm, z_src_type), "");
618bf215546Sopenharmony_ci   }
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci   else if (format_desc->block.bits > 32) {
621bf215546Sopenharmony_ci      /* rely on llvm to handle too wide vector we have here nicely */
622bf215546Sopenharmony_ci      struct lp_type typex2 = zs_type;
623bf215546Sopenharmony_ci      struct lp_type s_type = zs_type;
624bf215546Sopenharmony_ci      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
625bf215546Sopenharmony_ci      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
626bf215546Sopenharmony_ci      LLVMValueRef tmp;
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci      typex2.width = typex2.width / 2;
629bf215546Sopenharmony_ci      typex2.length = typex2.length * 2;
630bf215546Sopenharmony_ci      s_type.width = s_type.width / 2;
631bf215546Sopenharmony_ci      s_type.floating = 0;
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci      tmp = LLVMBuildBitCast(builder, *z_fb,
634bf215546Sopenharmony_ci                             lp_build_vec_type(gallivm, typex2), "");
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci      for (unsigned i = 0; i < zs_type.length; i++) {
637bf215546Sopenharmony_ci         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
638bf215546Sopenharmony_ci         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
639bf215546Sopenharmony_ci      }
640bf215546Sopenharmony_ci      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
641bf215546Sopenharmony_ci                                     LLVMConstVector(shuffles1, zs_type.length), "");
642bf215546Sopenharmony_ci      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
643bf215546Sopenharmony_ci                                     LLVMConstVector(shuffles2, zs_type.length), "");
644bf215546Sopenharmony_ci      *s_fb = LLVMBuildBitCast(builder, *s_fb,
645bf215546Sopenharmony_ci                               lp_build_vec_type(gallivm, s_type), "");
646bf215546Sopenharmony_ci      lp_build_name(*s_fb, "s_dst");
647bf215546Sopenharmony_ci   }
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   lp_build_name(*z_fb, "z_dst");
650bf215546Sopenharmony_ci   lp_build_name(*s_fb, "s_dst");
651bf215546Sopenharmony_ci   lp_build_name(*z_fb, "z_dst");
652bf215546Sopenharmony_ci}
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci/**
656bf215546Sopenharmony_ci * Store depth/stencil values.
657bf215546Sopenharmony_ci * Incoming values are swizzled (typically n 2x2 quads), stored linear.
658bf215546Sopenharmony_ci * If there's a mask it will do select/store otherwise just store.
659bf215546Sopenharmony_ci *
660bf215546Sopenharmony_ci * \param type  the data type of the fragment depth/stencil values
661bf215546Sopenharmony_ci * \param format_desc  description of the depth/stencil surface
662bf215546Sopenharmony_ci * \param is_1d  whether this resource has only one dimension
663bf215546Sopenharmony_ci * \param mask_value the alive/dead pixel mask for the quad (vector)
664bf215546Sopenharmony_ci * \param z_fb  z values read from fb (with padding)
665bf215546Sopenharmony_ci * \param s_fb  s values read from fb (with padding)
666bf215546Sopenharmony_ci * \param loop_counter  the current loop iteration
667bf215546Sopenharmony_ci * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
668bf215546Sopenharmony_ci * \param depth_stride  stride of the depth/stencil buffer
669bf215546Sopenharmony_ci * \param z_value the depth values to store (with padding)
670bf215546Sopenharmony_ci * \param s_value the stencil values to store (with padding)
671bf215546Sopenharmony_ci */
672bf215546Sopenharmony_civoid
673bf215546Sopenharmony_cilp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
674bf215546Sopenharmony_ci                                      struct lp_type z_src_type,
675bf215546Sopenharmony_ci                                      const struct util_format_description *format_desc,
676bf215546Sopenharmony_ci                                      boolean is_1d,
677bf215546Sopenharmony_ci                                      LLVMValueRef mask_value,
678bf215546Sopenharmony_ci                                      LLVMValueRef z_fb,
679bf215546Sopenharmony_ci                                      LLVMValueRef s_fb,
680bf215546Sopenharmony_ci                                      LLVMValueRef loop_counter,
681bf215546Sopenharmony_ci                                      LLVMValueRef depth_ptr,
682bf215546Sopenharmony_ci                                      LLVMValueRef depth_stride,
683bf215546Sopenharmony_ci                                      LLVMValueRef z_value,
684bf215546Sopenharmony_ci                                      LLVMValueRef s_value)
685bf215546Sopenharmony_ci{
686bf215546Sopenharmony_ci   struct lp_build_context z_bld;
687bf215546Sopenharmony_ci   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
688bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
689bf215546Sopenharmony_ci   LLVMValueRef zs_dst1, zs_dst2;
690bf215546Sopenharmony_ci   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
691bf215546Sopenharmony_ci   LLVMValueRef depth_offset1, depth_offset2;
692bf215546Sopenharmony_ci   LLVMTypeRef load_ptr_type;
693bf215546Sopenharmony_ci   unsigned depth_bytes = format_desc->block.bits / 8;
694bf215546Sopenharmony_ci   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
695bf215546Sopenharmony_ci   struct lp_type z_type = zs_type;
696bf215546Sopenharmony_ci   struct lp_type zs_load_type = zs_type;
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci   zs_load_type.length = zs_load_type.length / 2;
699bf215546Sopenharmony_ci   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   z_type.width = z_src_type.width;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   lp_build_context_init(&z_bld, gallivm, z_type);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   /*
706bf215546Sopenharmony_ci    * This is far from ideal, at least for late depth write we should do this
707bf215546Sopenharmony_ci    * outside the fs loop to avoid all the swizzle stuff.
708bf215546Sopenharmony_ci    */
709bf215546Sopenharmony_ci   if (z_src_type.length == 4) {
710bf215546Sopenharmony_ci      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
711bf215546Sopenharmony_ci                                          lp_build_const_int32(gallivm, 1), "");
712bf215546Sopenharmony_ci      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
713bf215546Sopenharmony_ci                                          lp_build_const_int32(gallivm, 2), "");
714bf215546Sopenharmony_ci      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
715bf215546Sopenharmony_ci                                          depth_stride, "");
716bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildMul(builder, looplsb,
717bf215546Sopenharmony_ci                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
718bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
719bf215546Sopenharmony_ci   }
720bf215546Sopenharmony_ci   else {
721bf215546Sopenharmony_ci      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
722bf215546Sopenharmony_ci                                         lp_build_const_int32(gallivm, 1), "");
723bf215546Sopenharmony_ci      assert(z_src_type.length == 8);
724bf215546Sopenharmony_ci      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
725bf215546Sopenharmony_ci      /*
726bf215546Sopenharmony_ci       * We load 2x4 values, and need to swizzle them (order
727bf215546Sopenharmony_ci       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
728bf215546Sopenharmony_ci       */
729bf215546Sopenharmony_ci      for (unsigned i = 0; i < 8; i++) {
730bf215546Sopenharmony_ci         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
731bf215546Sopenharmony_ci      }
732bf215546Sopenharmony_ci   }
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
737bf215546Sopenharmony_ci   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
738bf215546Sopenharmony_ci   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
739bf215546Sopenharmony_ci   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   if (format_desc->block.bits > 32) {
742bf215546Sopenharmony_ci      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
743bf215546Sopenharmony_ci   }
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci   if (mask_value) {
746bf215546Sopenharmony_ci      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
747bf215546Sopenharmony_ci      if (format_desc->block.bits > 32) {
748bf215546Sopenharmony_ci         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
749bf215546Sopenharmony_ci         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
750bf215546Sopenharmony_ci      }
751bf215546Sopenharmony_ci   }
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   if (zs_type.width < z_src_type.width) {
754bf215546Sopenharmony_ci      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
755bf215546Sopenharmony_ci      z_value = LLVMBuildTrunc(builder, z_value,
756bf215546Sopenharmony_ci                               lp_build_int_vec_type(gallivm, zs_type), "");
757bf215546Sopenharmony_ci   }
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci   if (format_desc->block.bits <= 32) {
760bf215546Sopenharmony_ci      if (z_src_type.length == 4) {
761bf215546Sopenharmony_ci         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
762bf215546Sopenharmony_ci         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
763bf215546Sopenharmony_ci      }
764bf215546Sopenharmony_ci      else {
765bf215546Sopenharmony_ci         assert(z_src_type.length == 8);
766bf215546Sopenharmony_ci         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
767bf215546Sopenharmony_ci                                          LLVMConstVector(&shuffles[0],
768bf215546Sopenharmony_ci                                                          zs_load_type.length), "");
769bf215546Sopenharmony_ci         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
770bf215546Sopenharmony_ci                                          LLVMConstVector(&shuffles[4],
771bf215546Sopenharmony_ci                                                          zs_load_type.length), "");
772bf215546Sopenharmony_ci      }
773bf215546Sopenharmony_ci   }
774bf215546Sopenharmony_ci   else {
775bf215546Sopenharmony_ci      if (z_src_type.length == 4) {
776bf215546Sopenharmony_ci         zs_dst1 = lp_build_interleave2(gallivm, z_type,
777bf215546Sopenharmony_ci                                        z_value, s_value, 0);
778bf215546Sopenharmony_ci         zs_dst2 = lp_build_interleave2(gallivm, z_type,
779bf215546Sopenharmony_ci                                        z_value, s_value, 1);
780bf215546Sopenharmony_ci      }
781bf215546Sopenharmony_ci      else {
782bf215546Sopenharmony_ci         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
783bf215546Sopenharmony_ci         assert(z_src_type.length == 8);
784bf215546Sopenharmony_ci         for (unsigned i = 0; i < 8; i++) {
785bf215546Sopenharmony_ci            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
786bf215546Sopenharmony_ci            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
787bf215546Sopenharmony_ci                                                   z_src_type.length);
788bf215546Sopenharmony_ci         }
789bf215546Sopenharmony_ci         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
790bf215546Sopenharmony_ci                                          LLVMConstVector(&shuffles[0],
791bf215546Sopenharmony_ci                                                          z_src_type.length), "");
792bf215546Sopenharmony_ci         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
793bf215546Sopenharmony_ci                                          LLVMConstVector(&shuffles[8],
794bf215546Sopenharmony_ci                                                          z_src_type.length), "");
795bf215546Sopenharmony_ci      }
796bf215546Sopenharmony_ci      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
797bf215546Sopenharmony_ci                                 lp_build_vec_type(gallivm, zs_load_type), "");
798bf215546Sopenharmony_ci      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
799bf215546Sopenharmony_ci                                 lp_build_vec_type(gallivm, zs_load_type), "");
800bf215546Sopenharmony_ci   }
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
803bf215546Sopenharmony_ci   if (!is_1d) {
804bf215546Sopenharmony_ci      LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
805bf215546Sopenharmony_ci   }
806bf215546Sopenharmony_ci}
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci/**
810bf215546Sopenharmony_ci * Generate code for performing depth and/or stencil tests.
811bf215546Sopenharmony_ci * We operate on a vector of values (typically n 2x2 quads).
812bf215546Sopenharmony_ci *
813bf215546Sopenharmony_ci * \param depth  the depth test state
814bf215546Sopenharmony_ci * \param stencil  the front/back stencil state
815bf215546Sopenharmony_ci * \param type  the data type of the fragment depth/stencil values
816bf215546Sopenharmony_ci * \param format_desc  description of the depth/stencil surface
817bf215546Sopenharmony_ci * \param mask  the alive/dead pixel mask for the quad (vector)
818bf215546Sopenharmony_ci * \param cov_mask coverage mask
819bf215546Sopenharmony_ci * \param stencil_refs  the front/back stencil ref values (scalar)
820bf215546Sopenharmony_ci * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
821bf215546Sopenharmony_ci * \param zs_dst  the depth/stencil values in framebuffer
822bf215546Sopenharmony_ci * \param face  contains boolean value indicating front/back facing polygon
823bf215546Sopenharmony_ci */
824bf215546Sopenharmony_civoid
825bf215546Sopenharmony_cilp_build_depth_stencil_test(struct gallivm_state *gallivm,
826bf215546Sopenharmony_ci                            const struct lp_depth_state *depth,
827bf215546Sopenharmony_ci                            const struct pipe_stencil_state stencil[2],
828bf215546Sopenharmony_ci                            struct lp_type z_src_type,
829bf215546Sopenharmony_ci                            const struct util_format_description *format_desc,
830bf215546Sopenharmony_ci                            struct lp_build_mask_context *mask,
831bf215546Sopenharmony_ci                            LLVMValueRef *cov_mask,
832bf215546Sopenharmony_ci                            LLVMValueRef stencil_refs[2],
833bf215546Sopenharmony_ci                            LLVMValueRef z_src,
834bf215546Sopenharmony_ci                            LLVMValueRef z_fb,
835bf215546Sopenharmony_ci                            LLVMValueRef s_fb,
836bf215546Sopenharmony_ci                            LLVMValueRef face,
837bf215546Sopenharmony_ci                            LLVMValueRef *z_value,
838bf215546Sopenharmony_ci                            LLVMValueRef *s_value,
839bf215546Sopenharmony_ci                            boolean do_branch,
840bf215546Sopenharmony_ci                            bool restrict_depth)
841bf215546Sopenharmony_ci{
842bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
843bf215546Sopenharmony_ci   struct lp_type z_type;
844bf215546Sopenharmony_ci   struct lp_build_context z_bld;
845bf215546Sopenharmony_ci   struct lp_build_context s_bld;
846bf215546Sopenharmony_ci   struct lp_type s_type;
847bf215546Sopenharmony_ci   unsigned z_shift = 0, z_width = 0, z_mask = 0;
848bf215546Sopenharmony_ci   LLVMValueRef z_dst = NULL;
849bf215546Sopenharmony_ci   LLVMValueRef stencil_vals = NULL;
850bf215546Sopenharmony_ci   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
851bf215546Sopenharmony_ci   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
852bf215546Sopenharmony_ci   LLVMValueRef current_mask = mask ? lp_build_mask_value(mask) : *cov_mask;
853bf215546Sopenharmony_ci   LLVMValueRef front_facing = NULL;
854bf215546Sopenharmony_ci   boolean have_z, have_s;
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   /*
857bf215546Sopenharmony_ci    * Depths are expected to be between 0 and 1, even if they are stored in
858bf215546Sopenharmony_ci    * floats. Setting these bits here will ensure that the lp_build_conv() call
859bf215546Sopenharmony_ci    * below won't try to unnecessarily clamp the incoming values.
860bf215546Sopenharmony_ci    * If depths are expected outside 0..1 don't set these bits.
861bf215546Sopenharmony_ci    */
862bf215546Sopenharmony_ci   if (z_src_type.floating) {
863bf215546Sopenharmony_ci      if (restrict_depth) {
864bf215546Sopenharmony_ci         z_src_type.sign = FALSE;
865bf215546Sopenharmony_ci         z_src_type.norm = TRUE;
866bf215546Sopenharmony_ci      }
867bf215546Sopenharmony_ci   }
868bf215546Sopenharmony_ci   else {
869bf215546Sopenharmony_ci      assert(!z_src_type.sign);
870bf215546Sopenharmony_ci      assert(z_src_type.norm);
871bf215546Sopenharmony_ci   }
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci   /* Pick the type matching the depth-stencil format. */
874bf215546Sopenharmony_ci   z_type = lp_depth_type(format_desc, z_src_type.length);
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_ci   /* Pick the intermediate type for depth operations. */
877bf215546Sopenharmony_ci   z_type.width = z_src_type.width;
878bf215546Sopenharmony_ci   assert(z_type.length == z_src_type.length);
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   /* FIXME: for non-float depth/stencil might generate better code
881bf215546Sopenharmony_ci    * if we'd always split it up to use 128bit operations.
882bf215546Sopenharmony_ci    * For stencil we'd almost certainly want to pack to 8xi16 values,
883bf215546Sopenharmony_ci    * for z just run twice.
884bf215546Sopenharmony_ci    */
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci   /* Sanity checking */
887bf215546Sopenharmony_ci   {
888bf215546Sopenharmony_ci      ASSERTED const unsigned z_swizzle = format_desc->swizzle[0];
889bf215546Sopenharmony_ci      ASSERTED const unsigned s_swizzle = format_desc->swizzle[1];
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci      assert(z_swizzle != PIPE_SWIZZLE_NONE ||
892bf215546Sopenharmony_ci             s_swizzle != PIPE_SWIZZLE_NONE);
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci      assert(depth->enabled || stencil[0].enabled);
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
897bf215546Sopenharmony_ci      assert(format_desc->block.width == 1);
898bf215546Sopenharmony_ci      assert(format_desc->block.height == 1);
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci      if (stencil[0].enabled) {
901bf215546Sopenharmony_ci         assert(s_swizzle < 4);
902bf215546Sopenharmony_ci         assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
903bf215546Sopenharmony_ci         assert(format_desc->channel[s_swizzle].pure_integer);
904bf215546Sopenharmony_ci         assert(!format_desc->channel[s_swizzle].normalized);
905bf215546Sopenharmony_ci         assert(format_desc->channel[s_swizzle].size == 8);
906bf215546Sopenharmony_ci      }
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci      if (depth->enabled) {
909bf215546Sopenharmony_ci         assert(z_swizzle < 4);
910bf215546Sopenharmony_ci         if (z_type.floating) {
911bf215546Sopenharmony_ci            assert(z_swizzle == 0);
912bf215546Sopenharmony_ci            assert(format_desc->channel[z_swizzle].type ==
913bf215546Sopenharmony_ci                   UTIL_FORMAT_TYPE_FLOAT);
914bf215546Sopenharmony_ci            assert(format_desc->channel[z_swizzle].size == 32);
915bf215546Sopenharmony_ci         }
916bf215546Sopenharmony_ci         else {
917bf215546Sopenharmony_ci            assert(format_desc->channel[z_swizzle].type ==
918bf215546Sopenharmony_ci                   UTIL_FORMAT_TYPE_UNSIGNED);
919bf215546Sopenharmony_ci            assert(format_desc->channel[z_swizzle].normalized);
920bf215546Sopenharmony_ci            assert(!z_type.fixed);
921bf215546Sopenharmony_ci         }
922bf215546Sopenharmony_ci      }
923bf215546Sopenharmony_ci   }
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci   /* Setup build context for Z vals */
927bf215546Sopenharmony_ci   lp_build_context_init(&z_bld, gallivm, z_type);
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci   /* Setup build context for stencil vals */
930bf215546Sopenharmony_ci   s_type = lp_int_type(z_type);
931bf215546Sopenharmony_ci   lp_build_context_init(&s_bld, gallivm, s_type);
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   /* Compute and apply the Z/stencil bitmasks and shifts.
934bf215546Sopenharmony_ci    */
935bf215546Sopenharmony_ci   {
936bf215546Sopenharmony_ci      unsigned s_shift, s_mask;
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_ci      z_dst = z_fb;
939bf215546Sopenharmony_ci      stencil_vals = s_fb;
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
942bf215546Sopenharmony_ci      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci      if (have_z) {
945bf215546Sopenharmony_ci         if (z_mask != 0xffffffff) {
946bf215546Sopenharmony_ci            z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
947bf215546Sopenharmony_ci         }
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci         /*
950bf215546Sopenharmony_ci          * Align the framebuffer Z 's LSB to the right.
951bf215546Sopenharmony_ci          */
952bf215546Sopenharmony_ci         if (z_shift) {
953bf215546Sopenharmony_ci            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
954bf215546Sopenharmony_ci            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
955bf215546Sopenharmony_ci         } else if (z_bitmask) {
956bf215546Sopenharmony_ci            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
957bf215546Sopenharmony_ci         } else {
958bf215546Sopenharmony_ci            lp_build_name(z_dst, "z_dst");
959bf215546Sopenharmony_ci         }
960bf215546Sopenharmony_ci      }
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci      if (have_s) {
963bf215546Sopenharmony_ci         if (s_shift) {
964bf215546Sopenharmony_ci            LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
965bf215546Sopenharmony_ci            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
966bf215546Sopenharmony_ci            stencil_shift = shift;  /* used below */
967bf215546Sopenharmony_ci         }
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci         if (s_mask != 0xffffffff) {
970bf215546Sopenharmony_ci            LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
971bf215546Sopenharmony_ci            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
972bf215546Sopenharmony_ci         }
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci         lp_build_name(stencil_vals, "s_dst");
975bf215546Sopenharmony_ci      }
976bf215546Sopenharmony_ci   }
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci   if (stencil[0].enabled) {
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci      if (face) {
981bf215546Sopenharmony_ci         if (0) {
982bf215546Sopenharmony_ci            /*
983bf215546Sopenharmony_ci             * XXX: the scalar expansion below produces atrocious code
984bf215546Sopenharmony_ci             * (basically producing a 64bit scalar value, then moving the 2
985bf215546Sopenharmony_ci             * 32bit pieces separately to simd, plus 4 shuffles, which is
986bf215546Sopenharmony_ci             * seriously lame). But the scalar-simd transitions are always
987bf215546Sopenharmony_ci             * tricky, so no big surprise there.
988bf215546Sopenharmony_ci             * This here would be way better, however llvm has some serious
989bf215546Sopenharmony_ci             * trouble later using it in the select, probably because it will
990bf215546Sopenharmony_ci             * recognize the expression as constant and move the simd value
991bf215546Sopenharmony_ci             * away (out of the loop) - and then it will suddenly try
992bf215546Sopenharmony_ci             * constructing i1 high-bit masks out of it later...
993bf215546Sopenharmony_ci             * (Try piglit stencil-twoside.)
994bf215546Sopenharmony_ci             * Note this is NOT due to using SExt/Trunc, it fails exactly the
995bf215546Sopenharmony_ci             * same even when using native compare/select.
996bf215546Sopenharmony_ci             * I cannot reproduce this problem when using stand-alone compiler
997bf215546Sopenharmony_ci             * though, suggesting some problem with optimization passes...
998bf215546Sopenharmony_ci             * (With stand-alone compilation, the construction of this mask
999bf215546Sopenharmony_ci             * value, no matter if the easy 3 instruction here or the complex
1000bf215546Sopenharmony_ci             * 16+ one below, never gets separated from where it's used.)
1001bf215546Sopenharmony_ci             * The scalar code still has the same problem, but the generated
1002bf215546Sopenharmony_ci             * code looks a bit better at least for some reason, even if
1003bf215546Sopenharmony_ci             * mostly by luck (the fundamental issue clearly is the same).
1004bf215546Sopenharmony_ci             */
1005bf215546Sopenharmony_ci            front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
1006bf215546Sopenharmony_ci            /* front_facing = face != 0 ? ~0 : 0 */
1007bf215546Sopenharmony_ci            front_facing = lp_build_compare(gallivm, s_bld.type,
1008bf215546Sopenharmony_ci                                            PIPE_FUNC_NOTEQUAL,
1009bf215546Sopenharmony_ci                                            front_facing, s_bld.zero);
1010bf215546Sopenharmony_ci         } else {
1011bf215546Sopenharmony_ci            LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci            /* front_facing = face != 0 ? ~0 : 0 */
1014bf215546Sopenharmony_ci            front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
1015bf215546Sopenharmony_ci            front_facing = LLVMBuildSExt(builder, front_facing,
1016bf215546Sopenharmony_ci                                         LLVMIntTypeInContext(gallivm->context,
1017bf215546Sopenharmony_ci                                                s_bld.type.length*s_bld.type.width),
1018bf215546Sopenharmony_ci                                         "");
1019bf215546Sopenharmony_ci            front_facing = LLVMBuildBitCast(builder, front_facing,
1020bf215546Sopenharmony_ci                                            s_bld.int_vec_type, "");
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci         }
1023bf215546Sopenharmony_ci      }
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci      s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
1026bf215546Sopenharmony_ci                                          stencil_refs, stencil_vals,
1027bf215546Sopenharmony_ci                                          front_facing);
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci      /* apply stencil-fail operator */
1030bf215546Sopenharmony_ci      {
1031bf215546Sopenharmony_ci         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
1032bf215546Sopenharmony_ci         stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
1033bf215546Sopenharmony_ci                                            stencil_refs, stencil_vals,
1034bf215546Sopenharmony_ci                                            s_fail_mask, front_facing);
1035bf215546Sopenharmony_ci      }
1036bf215546Sopenharmony_ci   }
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   if (depth->enabled) {
1039bf215546Sopenharmony_ci      /*
1040bf215546Sopenharmony_ci       * Convert fragment Z to the desired type, aligning the LSB to the right.
1041bf215546Sopenharmony_ci       */
1042bf215546Sopenharmony_ci
1043bf215546Sopenharmony_ci      assert(z_type.width == z_src_type.width);
1044bf215546Sopenharmony_ci      assert(z_type.length == z_src_type.length);
1045bf215546Sopenharmony_ci      assert(lp_check_value(z_src_type, z_src));
1046bf215546Sopenharmony_ci      if (z_src_type.floating) {
1047bf215546Sopenharmony_ci         /*
1048bf215546Sopenharmony_ci          * Convert from floating point values
1049bf215546Sopenharmony_ci          */
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci         if (!z_type.floating) {
1052bf215546Sopenharmony_ci            z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
1053bf215546Sopenharmony_ci                                                            z_src_type,
1054bf215546Sopenharmony_ci                                                            z_width,
1055bf215546Sopenharmony_ci                                                            z_src);
1056bf215546Sopenharmony_ci         }
1057bf215546Sopenharmony_ci      } else {
1058bf215546Sopenharmony_ci         /*
1059bf215546Sopenharmony_ci          * Convert from unsigned normalized values.
1060bf215546Sopenharmony_ci          */
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci         assert(!z_src_type.sign);
1063bf215546Sopenharmony_ci         assert(!z_src_type.fixed);
1064bf215546Sopenharmony_ci         assert(z_src_type.norm);
1065bf215546Sopenharmony_ci         assert(!z_type.floating);
1066bf215546Sopenharmony_ci         if (z_src_type.width > z_width) {
1067bf215546Sopenharmony_ci            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
1068bf215546Sopenharmony_ci                                                        z_src_type.width - z_width);
1069bf215546Sopenharmony_ci            z_src = LLVMBuildLShr(builder, z_src, shift, "");
1070bf215546Sopenharmony_ci         }
1071bf215546Sopenharmony_ci      }
1072bf215546Sopenharmony_ci      assert(lp_check_value(z_type, z_src));
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci      lp_build_name(z_src, "z_src");
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci      /* compare src Z to dst Z, returning 'pass' mask */
1077bf215546Sopenharmony_ci      z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci      /* mask off bits that failed stencil test */
1080bf215546Sopenharmony_ci      if (s_pass_mask) {
1081bf215546Sopenharmony_ci         current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
1082bf215546Sopenharmony_ci      }
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci      if (!stencil[0].enabled && mask) {
1085bf215546Sopenharmony_ci         /* We can potentially skip all remaining operations here, but only
1086bf215546Sopenharmony_ci          * if stencil is disabled because we still need to update the stencil
1087bf215546Sopenharmony_ci          * buffer values.  Don't need to update Z buffer values.
1088bf215546Sopenharmony_ci          */
1089bf215546Sopenharmony_ci         lp_build_mask_update(mask, z_pass);
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci         if (do_branch) {
1092bf215546Sopenharmony_ci            lp_build_mask_check(mask);
1093bf215546Sopenharmony_ci         }
1094bf215546Sopenharmony_ci      }
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci      if (depth->writemask) {
1097bf215546Sopenharmony_ci         LLVMValueRef z_pass_mask;
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci         /* mask off bits that failed Z test */
1100bf215546Sopenharmony_ci         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
1101bf215546Sopenharmony_ci
1102bf215546Sopenharmony_ci         /* Mix the old and new Z buffer values.
1103bf215546Sopenharmony_ci          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
1104bf215546Sopenharmony_ci          */
1105bf215546Sopenharmony_ci         z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
1106bf215546Sopenharmony_ci      }
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_ci      if (stencil[0].enabled) {
1109bf215546Sopenharmony_ci         /* update stencil buffer values according to z pass/fail result */
1110bf215546Sopenharmony_ci         LLVMValueRef z_fail_mask, z_pass_mask;
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci         /* apply Z-fail operator */
1113bf215546Sopenharmony_ci         z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
1114bf215546Sopenharmony_ci         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
1115bf215546Sopenharmony_ci                                            stencil_refs, stencil_vals,
1116bf215546Sopenharmony_ci                                            z_fail_mask, front_facing);
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ci         /* apply Z-pass operator */
1119bf215546Sopenharmony_ci         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
1120bf215546Sopenharmony_ci         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
1121bf215546Sopenharmony_ci                                            stencil_refs, stencil_vals,
1122bf215546Sopenharmony_ci                                            z_pass_mask, front_facing);
1123bf215546Sopenharmony_ci      }
1124bf215546Sopenharmony_ci   }
1125bf215546Sopenharmony_ci   else {
1126bf215546Sopenharmony_ci      /* No depth test: apply Z-pass operator to stencil buffer values which
1127bf215546Sopenharmony_ci       * passed the stencil test.
1128bf215546Sopenharmony_ci       */
1129bf215546Sopenharmony_ci      s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
1130bf215546Sopenharmony_ci      stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
1131bf215546Sopenharmony_ci                                         stencil_refs, stencil_vals,
1132bf215546Sopenharmony_ci                                         s_pass_mask, front_facing);
1133bf215546Sopenharmony_ci   }
1134bf215546Sopenharmony_ci
1135bf215546Sopenharmony_ci   /* Put Z and stencil bits in the right place */
1136bf215546Sopenharmony_ci   if (have_z && z_shift) {
1137bf215546Sopenharmony_ci      LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
1138bf215546Sopenharmony_ci      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
1139bf215546Sopenharmony_ci   }
1140bf215546Sopenharmony_ci   if (stencil_vals && stencil_shift)
1141bf215546Sopenharmony_ci      stencil_vals = LLVMBuildShl(builder, stencil_vals,
1142bf215546Sopenharmony_ci                                  stencil_shift, "");
1143bf215546Sopenharmony_ci
1144bf215546Sopenharmony_ci   /* Finally, merge the z/stencil values */
1145bf215546Sopenharmony_ci   if (format_desc->block.bits <= 32) {
1146bf215546Sopenharmony_ci      if (have_z && have_s)
1147bf215546Sopenharmony_ci         *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
1148bf215546Sopenharmony_ci      else if (have_z)
1149bf215546Sopenharmony_ci         *z_value = z_dst;
1150bf215546Sopenharmony_ci      else
1151bf215546Sopenharmony_ci         *z_value = stencil_vals;
1152bf215546Sopenharmony_ci      *s_value = *z_value;
1153bf215546Sopenharmony_ci   }
1154bf215546Sopenharmony_ci   else {
1155bf215546Sopenharmony_ci      *z_value = z_dst;
1156bf215546Sopenharmony_ci      *s_value = stencil_vals;
1157bf215546Sopenharmony_ci   }
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci   if (mask) {
1160bf215546Sopenharmony_ci      if (s_pass_mask)
1161bf215546Sopenharmony_ci         lp_build_mask_update(mask, s_pass_mask);
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci      if (depth->enabled && stencil[0].enabled)
1164bf215546Sopenharmony_ci         lp_build_mask_update(mask, z_pass);
1165bf215546Sopenharmony_ci   } else {
1166bf215546Sopenharmony_ci      LLVMValueRef tmp_mask = *cov_mask;
1167bf215546Sopenharmony_ci      if (s_pass_mask)
1168bf215546Sopenharmony_ci         tmp_mask = LLVMBuildAnd(builder, tmp_mask, s_pass_mask, "");
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_ci      /* for multisample we don't do the stencil optimisation so update always */
1171bf215546Sopenharmony_ci      if (depth->enabled)
1172bf215546Sopenharmony_ci         tmp_mask = LLVMBuildAnd(builder, tmp_mask, z_pass, "");
1173bf215546Sopenharmony_ci      *cov_mask = tmp_mask;
1174bf215546Sopenharmony_ci   }
1175bf215546Sopenharmony_ci}
1176bf215546Sopenharmony_ci
1177