1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- common code.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35#include "pipe/p_defines.h"
36#include "pipe/p_state.h"
37#include "util/format/u_format.h"
38#include "util/u_math.h"
39#include "util/u_cpu_detect.h"
40#include "lp_bld_arit.h"
41#include "lp_bld_const.h"
42#include "lp_bld_debug.h"
43#include "lp_bld_printf.h"
44#include "lp_bld_flow.h"
45#include "lp_bld_sample.h"
46#include "lp_bld_swizzle.h"
47#include "lp_bld_type.h"
48#include "lp_bld_logic.h"
49#include "lp_bld_pack.h"
50#include "lp_bld_quad.h"
51#include "lp_bld_bitarit.h"
52
53
54/*
55 * Bri-linear factor. Should be greater than one.
56 */
57#define BRILINEAR_FACTOR 2
58
59/**
60 * Does the given texture wrap mode allow sampling the texture border color?
61 * XXX maybe move this into gallium util code.
62 */
63boolean
64lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
65                                       enum pipe_tex_filter min_img_filter,
66                                       enum pipe_tex_filter mag_img_filter)
67{
68   switch (mode) {
69   case PIPE_TEX_WRAP_REPEAT:
70   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
71   case PIPE_TEX_WRAP_MIRROR_REPEAT:
72   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
73      return FALSE;
74   case PIPE_TEX_WRAP_CLAMP:
75   case PIPE_TEX_WRAP_MIRROR_CLAMP:
76      if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
77          mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
78         return FALSE;
79      } else {
80         return TRUE;
81      }
82   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
83   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
84      return TRUE;
85   default:
86      assert(0 && "unexpected wrap mode");
87      return FALSE;
88   }
89}
90
91
92/**
93 * Initialize lp_sampler_static_texture_state object with the gallium
94 * texture/sampler_view state (this contains the parts which are
95 * considered static).
96 */
97void
98lp_sampler_static_texture_state(struct lp_static_texture_state *state,
99                                const struct pipe_sampler_view *view)
100{
101   memset(state, 0, sizeof *state);
102
103   if (!view || !view->texture)
104      return;
105
106   const struct pipe_resource *texture = view->texture;
107
108   state->format = view->format;
109   state->swizzle_r = view->swizzle_r;
110   state->swizzle_g = view->swizzle_g;
111   state->swizzle_b = view->swizzle_b;
112   state->swizzle_a = view->swizzle_a;
113   assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
114   assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
115   assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
116   assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
117
118   state->target = view->target;
119   state->pot_width = util_is_power_of_two_or_zero(texture->width0);
120   state->pot_height = util_is_power_of_two_or_zero(texture->height0);
121   state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
122   state->level_zero_only = !view->u.tex.last_level;
123
124   /*
125    * the layer / element / level parameters are all either dynamic
126    * state or handled transparently wrt execution.
127    */
128}
129
130/**
131 * Initialize lp_sampler_static_texture_state object with the gallium
132 * texture/sampler_view state (this contains the parts which are
133 * considered static).
134 */
135void
136lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
137                                      const struct pipe_image_view *view)
138{
139   memset(state, 0, sizeof *state);
140
141   if (!view || !view->resource)
142      return;
143
144   const struct pipe_resource *resource = view->resource;
145
146   state->format = view->format;
147   state->swizzle_r = PIPE_SWIZZLE_X;
148   state->swizzle_g = PIPE_SWIZZLE_Y;
149   state->swizzle_b = PIPE_SWIZZLE_Z;
150   state->swizzle_a = PIPE_SWIZZLE_W;
151   assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
152   assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
153   assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
154   assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
155
156   state->target = view->resource->target;
157   state->pot_width = util_is_power_of_two_or_zero(resource->width0);
158   state->pot_height = util_is_power_of_two_or_zero(resource->height0);
159   state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
160   state->level_zero_only = 0;
161
162   /*
163    * the layer / element / level parameters are all either dynamic
164    * state or handled transparently wrt execution.
165    */
166}
167
168/**
169 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
170 * state (this contains the parts which are considered static).
171 */
172void
173lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
174                                const struct pipe_sampler_state *sampler)
175{
176   memset(state, 0, sizeof *state);
177
178   if (!sampler)
179      return;
180
181   /*
182    * We don't copy sampler state over unless it is actually enabled, to avoid
183    * spurious recompiles, as the sampler static state is part of the shader
184    * key.
185    *
186    * Ideally gallium frontends or cso_cache module would make all state
187    * canonical, but until that happens it's better to be safe than sorry here.
188    *
189    * XXX: Actually there's much more than can be done here, especially
190    * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
191    */
192
193   state->wrap_s            = sampler->wrap_s;
194   state->wrap_t            = sampler->wrap_t;
195   state->wrap_r            = sampler->wrap_r;
196   state->min_img_filter    = sampler->min_img_filter;
197   state->mag_img_filter    = sampler->mag_img_filter;
198   state->min_mip_filter    = sampler->min_mip_filter;
199   state->seamless_cube_map = sampler->seamless_cube_map;
200   state->reduction_mode    = sampler->reduction_mode;
201   state->aniso = sampler->max_anisotropy > 1.0f;
202
203   if (sampler->max_lod > 0.0f) {
204      state->max_lod_pos = 1;
205   }
206
207   if (sampler->lod_bias != 0.0f) {
208      state->lod_bias_non_zero = 1;
209   }
210
211   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
212       state->min_img_filter != state->mag_img_filter) {
213
214      /* If min_lod == max_lod we can greatly simplify mipmap selection.
215       * This is a case that occurs during automatic mipmap generation.
216       */
217      if (sampler->min_lod == sampler->max_lod) {
218         state->min_max_lod_equal = 1;
219      } else {
220         if (sampler->min_lod > 0.0f) {
221            state->apply_min_lod = 1;
222         }
223
224         /*
225          * XXX this won't do anything with the mesa state tracker which always
226          * sets max_lod to not more than actually present mip maps...
227          */
228         if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
229            state->apply_max_lod = 1;
230         }
231      }
232   }
233
234   state->compare_mode      = sampler->compare_mode;
235   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
236      state->compare_func   = sampler->compare_func;
237   }
238
239   state->normalized_coords = sampler->normalized_coords;
240}
241
242/* build aniso pmin value */
243static LLVMValueRef
244lp_build_pmin(struct lp_build_sample_context *bld,
245              unsigned texture_unit,
246              LLVMValueRef s,
247              LLVMValueRef t,
248              LLVMValueRef max_aniso)
249{
250   struct gallivm_state *gallivm = bld->gallivm;
251   LLVMBuilderRef builder = bld->gallivm->builder;
252   struct lp_build_context *coord_bld = &bld->coord_bld;
253   struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
254   struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
255   struct lp_build_context *pmin_bld = &bld->lodf_bld;
256   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
257   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
258   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
259   LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
260   LLVMValueRef int_size, float_size;
261   LLVMValueRef first_level, first_level_vec;
262   unsigned length = coord_bld->type.length;
263   unsigned num_quads = length / 4;
264   boolean pmin_per_quad = pmin_bld->type.length != length;
265   unsigned i;
266
267   first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
268                                                 bld->context_ptr, texture_unit, NULL);
269   first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
270   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
271   float_size = lp_build_int_to_float(float_size_bld, int_size);
272   max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
273   max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
274
275   static const unsigned char swizzle01[] = { /* no-op swizzle */
276      0, 1,
277      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
278   };
279   static const unsigned char swizzle23[] = {
280      2, 3,
281      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
282   };
283   LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
284
285   for (i = 0; i < num_quads; i++) {
286      shuffles[i*4+0] = shuffles[i*4+1] = index0;
287      shuffles[i*4+2] = shuffles[i*4+3] = index1;
288   }
289   floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
290                                     LLVMConstVector(shuffles, length), "");
291   ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
292
293   ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
294
295   ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
296   ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
297
298   LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
299
300   static const unsigned char swizzle0[] = { /* no-op swizzle */
301     0, LP_BLD_SWIZZLE_DONTCARE,
302     LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
303   };
304   static const unsigned char swizzle1[] = {
305     1, LP_BLD_SWIZZLE_DONTCARE,
306     LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
307   };
308   LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
309   LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
310
311   LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
312   LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
313
314   LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
315
316   LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
317                                        pmin2, temp);
318
319   LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
320
321   pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
322
323   if (pmin_per_quad)
324      pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
325                                        pmin_bld->type, pmin2, 0);
326   else
327      pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
328   return pmin2;
329}
330
331/**
332 * Generate code to compute coordinate gradient (rho).
333 * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
334 *
335 * The resulting rho has bld->levelf format (per quad or per element).
336 */
337static LLVMValueRef
338lp_build_rho(struct lp_build_sample_context *bld,
339             unsigned texture_unit,
340             LLVMValueRef s,
341             LLVMValueRef t,
342             LLVMValueRef r,
343             LLVMValueRef cube_rho,
344             const struct lp_derivatives *derivs)
345{
346   struct gallivm_state *gallivm = bld->gallivm;
347   struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
348   struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
349   struct lp_build_context *float_bld = &bld->float_bld;
350   struct lp_build_context *coord_bld = &bld->coord_bld;
351   struct lp_build_context *rho_bld = &bld->lodf_bld;
352   const unsigned dims = bld->dims;
353   LLVMValueRef ddx_ddy[2] = {NULL};
354   LLVMBuilderRef builder = bld->gallivm->builder;
355   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
356   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
357   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
358   LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
359   LLVMValueRef rho_vec;
360   LLVMValueRef int_size, float_size;
361   LLVMValueRef rho;
362   LLVMValueRef first_level, first_level_vec;
363   unsigned length = coord_bld->type.length;
364   unsigned num_quads = length / 4;
365   boolean rho_per_quad = rho_bld->type.length != length;
366   boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
367   unsigned i;
368   LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
369   LLVMValueRef rho_xvec, rho_yvec;
370
371   /* Note that all simplified calculations will only work for isotropic filtering */
372
373   /*
374    * rho calcs are always per quad except for explicit derivs (excluding
375    * the messy cube maps for now) when requested.
376    */
377
378   first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
379                                                 bld->context_ptr, texture_unit, NULL);
380   first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
381   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
382   float_size = lp_build_int_to_float(float_size_bld, int_size);
383
384   if (cube_rho) {
385      LLVMValueRef cubesize;
386      LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
387
388      /*
389       * Cube map code did already everything except size mul and per-quad extraction.
390       * Luckily cube maps are always quadratic!
391       */
392      if (rho_per_quad) {
393         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
394                                         rho_bld->type, cube_rho, 0);
395      }
396      else {
397         rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
398      }
399      /* Could optimize this for single quad just skip the broadcast */
400      cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
401                                            rho_bld->type, float_size, index0);
402      /* skipping sqrt hence returning rho squared */
403      cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
404      rho = lp_build_mul(rho_bld, cubesize, rho);
405   }
406   else if (derivs) {
407      LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
408      for (i = 0; i < dims; i++) {
409         LLVMValueRef floatdim;
410         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
411
412         floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
413                                               coord_bld->type, float_size, indexi);
414
415         /*
416          * note that for rho_per_quad case could reduce math (at some shuffle
417          * cost), but for now use same code to per-pixel lod case.
418          */
419         if (no_rho_opt) {
420            ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
421            ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
422            ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
423            ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
424         }
425         else {
426            LLVMValueRef tmpx, tmpy;
427            tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
428            tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
429            ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
430            ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
431         }
432      }
433      if (no_rho_opt) {
434         rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
435         rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
436         if (dims > 2) {
437            rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
438            rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
439         }
440         rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
441         /* skipping sqrt hence returning rho squared */
442     }
443      else {
444         rho = ddmax[0];
445         if (dims > 1) {
446            rho = lp_build_max(coord_bld, rho, ddmax[1]);
447            if (dims > 2) {
448               rho = lp_build_max(coord_bld, rho, ddmax[2]);
449            }
450         }
451      }
452
453      LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho);
454      rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
455
456      if (rho_per_quad) {
457         /*
458          * rho_vec contains per-pixel rho, convert to scalar per quad.
459          */
460         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
461                                         rho_bld->type, rho, 0);
462      }
463   }
464   else {
465      /*
466       * This looks all a bit complex, but it's not that bad
467       * (the shuffle code makes it look worse than it is).
468       * Still, might not be ideal for all cases.
469       */
470      static const unsigned char swizzle0[] = { /* no-op swizzle */
471         0, LP_BLD_SWIZZLE_DONTCARE,
472         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
473      };
474      static const unsigned char swizzle1[] = {
475         1, LP_BLD_SWIZZLE_DONTCARE,
476         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
477      };
478      static const unsigned char swizzle2[] = {
479         2, LP_BLD_SWIZZLE_DONTCARE,
480         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
481      };
482
483      if (dims < 2) {
484         ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
485      }
486      else if (dims >= 2) {
487         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
488         if (dims > 2) {
489            ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
490         }
491      }
492
493      if (no_rho_opt) {
494         static const unsigned char swizzle01[] = { /* no-op swizzle */
495            0, 1,
496            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
497         };
498         static const unsigned char swizzle23[] = {
499            2, 3,
500            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
501         };
502         LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
503
504         for (i = 0; i < num_quads; i++) {
505            shuffles[i*4+0] = shuffles[i*4+1] = index0;
506            shuffles[i*4+2] = shuffles[i*4+3] = index1;
507         }
508         floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
509                                           LLVMConstVector(shuffles, length), "");
510         ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
511         ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
512         ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
513         ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
514         rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
515
516         if (dims > 2) {
517            static const unsigned char swizzle02[] = {
518               0, 2,
519               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
520            };
521            floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
522                                                  coord_bld->type, float_size, index2);
523            ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
524            ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
525            ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
526            rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
527         }
528
529         rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
530         rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
531         rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
532
533         if (rho_per_quad) {
534            rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
535                                            rho_bld->type, rho, 0);
536         }
537         else {
538            rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
539         }
540         /* skipping sqrt hence returning rho squared */
541      }
542      else {
543         ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
544         if (dims > 2) {
545            ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
546         }
547         else {
548            ddx_ddy[1] = NULL; /* silence compiler warning */
549         }
550
551         if (dims < 2) {
552            rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
553            rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
554         }
555         else if (dims == 2) {
556            static const unsigned char swizzle02[] = {
557               0, 2,
558               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
559            };
560            static const unsigned char swizzle13[] = {
561               1, 3,
562               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
563            };
564            rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
565            rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
566         }
567         else {
568            LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
569            LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
570            assert(dims == 3);
571            for (i = 0; i < num_quads; i++) {
572               shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
573               shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
574               shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
575               shuffles1[4*i + 3] = i32undef;
576               shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
577               shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
578               shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
579               shuffles2[4*i + 3] = i32undef;
580            }
581            rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
582                                              LLVMConstVector(shuffles1, length), "");
583            rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
584                                              LLVMConstVector(shuffles2, length), "");
585         }
586
587         rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
588
589         if (bld->coord_type.length > 4) {
590            /* expand size to each quad */
591            if (dims > 1) {
592               /* could use some broadcast_vector helper for this? */
593               LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
594               for (i = 0; i < num_quads; i++) {
595                  src[i] = float_size;
596               }
597               float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
598            }
599            else {
600               float_size = lp_build_broadcast_scalar(coord_bld, float_size);
601            }
602            rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
603
604            if (dims <= 1) {
605               rho = rho_vec;
606            }
607            else {
608               if (dims >= 2) {
609                  LLVMValueRef rho_s, rho_t, rho_r;
610
611                  rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
612                  rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
613
614                  rho = lp_build_max(coord_bld, rho_s, rho_t);
615
616                  if (dims >= 3) {
617                     rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
618                     rho = lp_build_max(coord_bld, rho, rho_r);
619                  }
620               }
621            }
622            if (rho_per_quad) {
623               rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
624                                               rho_bld->type, rho, 0);
625            }
626            else {
627               rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
628            }
629         }
630         else {
631            if (dims <= 1) {
632               rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
633            }
634            rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
635
636            if (dims <= 1) {
637               rho = rho_vec;
638            }
639            else {
640               if (dims >= 2) {
641                  LLVMValueRef rho_s, rho_t, rho_r;
642
643                  rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
644                  rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
645
646                  rho = lp_build_max(float_bld, rho_s, rho_t);
647
648                  if (dims >= 3) {
649                     rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
650                     rho = lp_build_max(float_bld, rho, rho_r);
651                  }
652               }
653            }
654            if (!rho_per_quad) {
655               rho = lp_build_broadcast_scalar(rho_bld, rho);
656            }
657         }
658      }
659   }
660
661   return rho;
662}
663
664
665/*
666 * Bri-linear lod computation
667 *
668 * Use a piece-wise linear approximation of log2 such that:
669 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
670 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
671 *   with the steepness specified in 'factor'
672 * - exact result for 0.5, 1.5, etc.
673 *
674 *
675 *   1.0 -              /----*
676 *                     /
677 *                    /
678 *                   /
679 *   0.5 -          *
680 *                 /
681 *                /
682 *               /
683 *   0.0 - *----/
684 *
685 *         |                 |
686 *        2^0               2^1
687 *
688 * This is a technique also commonly used in hardware:
689 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
690 *
691 * TODO: For correctness, this should only be applied when texture is known to
692 * have regular mipmaps, i.e., mipmaps derived from the base level.
693 *
694 * TODO: This could be done in fixed point, where applicable.
695 */
696static void
697lp_build_brilinear_lod(struct lp_build_context *bld,
698                       LLVMValueRef lod,
699                       double factor,
700                       LLVMValueRef *out_lod_ipart,
701                       LLVMValueRef *out_lod_fpart)
702{
703   LLVMValueRef lod_fpart;
704   double pre_offset = (factor - 0.5)/factor - 0.5;
705   double post_offset = 1 - factor;
706
707   if (0) {
708      lp_build_printf(bld->gallivm, "lod = %f\n", lod);
709   }
710
711   lod = lp_build_add(bld, lod,
712                      lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
713
714   lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
715
716   lod_fpart = lp_build_mad(bld, lod_fpart,
717                            lp_build_const_vec(bld->gallivm, bld->type, factor),
718                            lp_build_const_vec(bld->gallivm, bld->type, post_offset));
719
720   /*
721    * It's not necessary to clamp lod_fpart since:
722    * - the above expression will never produce numbers greater than one.
723    * - the mip filtering branch is only taken if lod_fpart is positive
724    */
725
726   *out_lod_fpart = lod_fpart;
727
728   if (0) {
729      lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
730      lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
731   }
732}
733
734
735/*
736 * Combined log2 and brilinear lod computation.
737 *
738 * It's in all identical to calling lp_build_fast_log2() and
739 * lp_build_brilinear_lod() above, but by combining we can compute the integer
740 * and fractional part independently.
741 */
742static void
743lp_build_brilinear_rho(struct lp_build_context *bld,
744                       LLVMValueRef rho,
745                       double factor,
746                       LLVMValueRef *out_lod_ipart,
747                       LLVMValueRef *out_lod_fpart)
748{
749   LLVMValueRef lod_ipart;
750   LLVMValueRef lod_fpart;
751
752   const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
753   const double post_offset = 1 - 2*factor;
754
755   assert(bld->type.floating);
756
757   assert(lp_check_value(bld->type, rho));
758
759   /*
760    * The pre factor will make the intersections with the exact powers of two
761    * happen precisely where we want them to be, which means that the integer
762    * part will not need any post adjustments.
763    */
764   rho = lp_build_mul(bld, rho,
765                      lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
766
767   /* ipart = ifloor(log2(rho)) */
768   lod_ipart = lp_build_extract_exponent(bld, rho, 0);
769
770   /* fpart = rho / 2**ipart */
771   lod_fpart = lp_build_extract_mantissa(bld, rho);
772
773   lod_fpart = lp_build_mad(bld, lod_fpart,
774                            lp_build_const_vec(bld->gallivm, bld->type, factor),
775                            lp_build_const_vec(bld->gallivm, bld->type, post_offset));
776
777   /*
778    * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
779    * - the above expression will never produce numbers greater than one.
780    * - the mip filtering branch is only taken if lod_fpart is positive
781    */
782
783   *out_lod_ipart = lod_ipart;
784   *out_lod_fpart = lod_fpart;
785}
786
787
788/**
789 * Fast implementation of iround(log2(sqrt(x))), based on
790 * log2(x^n) == n*log2(x).
791 *
792 * Gives accurate results all the time.
793 * (Could be trivially extended to handle other power-of-two roots.)
794 */
795static LLVMValueRef
796lp_build_ilog2_sqrt(struct lp_build_context *bld,
797                    LLVMValueRef x)
798{
799   LLVMBuilderRef builder = bld->gallivm->builder;
800   LLVMValueRef ipart;
801   struct lp_type i_type = lp_int_type(bld->type);
802   LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
803
804   assert(bld->type.floating);
805
806   assert(lp_check_value(bld->type, x));
807
808   /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
809   ipart = lp_build_extract_exponent(bld, x, 1);
810   ipart = LLVMBuildAShr(builder, ipart, one, "");
811
812   return ipart;
813}
814
815
816/**
817 * Generate code to compute texture level of detail (lambda).
818 * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
819 * \param lod_bias  optional float vector with the shader lod bias
820 * \param explicit_lod  optional float vector with the explicit lod
821 * \param cube_rho  rho calculated by cube coord mapping (optional)
822 * \param out_lod_ipart  integer part of lod
823 * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
824 * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
825 *
826 * The resulting lod can be scalar per quad or be per element.
827 */
828void
829lp_build_lod_selector(struct lp_build_sample_context *bld,
830                      boolean is_lodq,
831                      unsigned texture_unit,
832                      unsigned sampler_unit,
833                      LLVMValueRef s,
834                      LLVMValueRef t,
835                      LLVMValueRef r,
836                      LLVMValueRef cube_rho,
837                      const struct lp_derivatives *derivs,
838                      LLVMValueRef lod_bias, /* optional */
839                      LLVMValueRef explicit_lod, /* optional */
840                      enum pipe_tex_mipfilter mip_filter,
841                      LLVMValueRef max_aniso,
842                      LLVMValueRef *out_lod,
843                      LLVMValueRef *out_lod_ipart,
844                      LLVMValueRef *out_lod_fpart,
845                      LLVMValueRef *out_lod_positive)
846
847{
848   LLVMBuilderRef builder = bld->gallivm->builder;
849   struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
850   struct lp_build_context *lodf_bld = &bld->lodf_bld;
851   LLVMValueRef lod;
852
853   *out_lod_ipart = bld->lodi_bld.zero;
854   *out_lod_positive = bld->lodi_bld.zero;
855   *out_lod_fpart = lodf_bld->zero;
856
857   /*
858    * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
859    * "Implementations may either unconditionally assume c = 0 for the minification
860    * vs. magnification switch-over point, or may choose to make c depend on the
861    * combination of minification and magnification modes as follows: if the
862    * magnification filter is given by LINEAR and the minification filter is given
863    * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
864    * done to ensure that a minified texture does not appear "sharper" than a
865    * magnified texture. Otherwise c = 0."
866    * And 3.9.11 Texture Minification:
867    * "If lod is less than or equal to the constant c (see section 3.9.12) the
868    * texture is said to be magnified; if it is greater, the texture is minified."
869    * So, using 0 as switchover point always, and using magnification for lod == 0.
870    * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
871    * old GL versions required 0.5 for the modes listed above.
872    * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
873    */
874
875   if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
876      /* User is forcing sampling from a particular mipmap level.
877       * This is hit during mipmap generation.
878       */
879      LLVMValueRef min_lod =
880         dynamic_state->min_lod(dynamic_state, bld->gallivm,
881                                bld->context_ptr, sampler_unit);
882
883      lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
884   }
885   else {
886      if (explicit_lod) {
887         if (bld->num_lods != bld->coord_type.length)
888            lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
889                                            lodf_bld->type, explicit_lod, 0);
890         else
891            lod = explicit_lod;
892      }
893      else {
894         LLVMValueRef rho;
895         boolean rho_squared = (bld->no_rho_approx &&
896                                (bld->dims > 1)) || cube_rho;
897
898         if (bld->static_sampler_state->aniso &&
899             !explicit_lod) {
900            rho = lp_build_pmin(bld, texture_unit, s, t, max_aniso);
901            rho_squared = true;
902         } else
903            rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
904
905         /*
906          * Compute lod = log2(rho)
907          */
908
909         if (!lod_bias && !is_lodq &&
910             !bld->static_sampler_state->aniso &&
911             !bld->static_sampler_state->lod_bias_non_zero &&
912             !bld->static_sampler_state->apply_max_lod &&
913             !bld->static_sampler_state->apply_min_lod) {
914            /*
915             * Special case when there are no post-log2 adjustments, which
916             * saves instructions but keeping the integer and fractional lod
917             * computations separate from the start.
918             */
919
920            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
921                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
922               /*
923                * Don't actually need both values all the time, lod_ipart is
924                * needed for nearest mipfilter, lod_positive if min != mag.
925                */
926               if (rho_squared) {
927                  *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
928               }
929               else {
930                  *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
931               }
932               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
933                                                rho, lodf_bld->one);
934               return;
935            }
936            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
937                !bld->no_brilinear && !rho_squared &&
938                !bld->static_sampler_state->aniso) {
939               /*
940                * This can't work if rho is squared. Not sure if it could be
941                * fixed while keeping it worthwile, could also do sqrt here
942                * but brilinear and no_rho_opt seems like a combination not
943                * making much sense anyway so just use ordinary path below.
944                */
945               lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
946                                      out_lod_ipart, out_lod_fpart);
947               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
948                                                rho, lodf_bld->one);
949               return;
950            }
951         }
952
953         if (0) {
954            lod = lp_build_log2(lodf_bld, rho);
955         }
956         else {
957            /* get more accurate results if we just sqaure rho always */
958            if (!rho_squared)
959               rho = lp_build_mul(lodf_bld, rho, rho);
960            lod = lp_build_fast_log2(lodf_bld, rho);
961         }
962
963         /* log2(x^2) == 0.5*log2(x) */
964         lod = lp_build_mul(lodf_bld, lod,
965                            lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
966
967         /* add shader lod bias */
968         if (lod_bias) {
969            if (bld->num_lods != bld->coord_type.length)
970               lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
971                                                    lodf_bld->type, lod_bias, 0);
972            lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
973         }
974      }
975
976      /* add sampler lod bias */
977      if (bld->static_sampler_state->lod_bias_non_zero) {
978         LLVMValueRef sampler_lod_bias =
979            dynamic_state->lod_bias(dynamic_state, bld->gallivm,
980                                    bld->context_ptr, sampler_unit);
981         sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
982                                                      sampler_lod_bias);
983         lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
984      }
985
986      if (is_lodq) {
987         *out_lod = lod;
988      }
989
990      /* clamp lod */
991      if (bld->static_sampler_state->apply_max_lod) {
992         LLVMValueRef max_lod =
993            dynamic_state->max_lod(dynamic_state, bld->gallivm,
994                                   bld->context_ptr, sampler_unit);
995         max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
996
997         lod = lp_build_min(lodf_bld, lod, max_lod);
998      }
999      if (bld->static_sampler_state->apply_min_lod) {
1000         LLVMValueRef min_lod =
1001            dynamic_state->min_lod(dynamic_state, bld->gallivm,
1002                                   bld->context_ptr, sampler_unit);
1003         min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
1004
1005         lod = lp_build_max(lodf_bld, lod, min_lod);
1006      }
1007
1008      if (is_lodq) {
1009         *out_lod_fpart = lod;
1010         return;
1011      }
1012   }
1013
1014   *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
1015                                    lod, lodf_bld->zero);
1016
1017   if (bld->static_sampler_state->aniso) {
1018      *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
1019   } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1020      if (!bld->no_brilinear) {
1021         lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
1022                                out_lod_ipart, out_lod_fpart);
1023      }
1024      else {
1025         lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1026      }
1027
1028      lp_build_name(*out_lod_fpart, "lod_fpart");
1029   }
1030   else {
1031      *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1032   }
1033
1034   lp_build_name(*out_lod_ipart, "lod_ipart");
1035
1036   return;
1037}
1038
1039
1040/**
1041 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1042 * to actual mip level.
1043 * Note: this is all scalar per quad code.
1044 * \param lod_ipart  int texture level of detail
1045 * \param level_out  returns integer
1046 * \param out_of_bounds returns per coord out_of_bounds mask if provided
1047 */
1048void
1049lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1050                           unsigned texture_unit,
1051                           LLVMValueRef lod_ipart,
1052                           LLVMValueRef *level_out,
1053                           LLVMValueRef *out_of_bounds)
1054{
1055   struct lp_build_context *leveli_bld = &bld->leveli_bld;
1056   struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1057   LLVMValueRef first_level, last_level, level;
1058
1059   first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1060                                            bld->context_ptr, texture_unit, NULL);
1061   last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1062                                          bld->context_ptr, texture_unit, NULL);
1063   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1064   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1065
1066   level = lp_build_add(leveli_bld, lod_ipart, first_level);
1067
1068   if (out_of_bounds) {
1069      LLVMValueRef out, out1;
1070      out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1071      out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1072      out = lp_build_or(leveli_bld, out, out1);
1073      if (bld->num_mips == bld->coord_bld.type.length) {
1074         *out_of_bounds = out;
1075      }
1076      else if (bld->num_mips == 1) {
1077         *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1078      }
1079      else {
1080         assert(bld->num_mips == bld->coord_bld.type.length / 4);
1081         *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1082                                                                leveli_bld->type,
1083                                                                bld->int_coord_bld.type,
1084                                                                out);
1085      }
1086      level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1087      *level_out = level;
1088   }
1089   else {
1090      /* clamp level to legal range of levels */
1091      *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1092
1093   }
1094}
1095
1096
1097/**
1098 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1099 * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1100 * part accordingly.
1101 * Later, we'll sample from those two mipmap levels and interpolate between them.
1102 */
1103void
1104lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1105                           unsigned texture_unit,
1106                           LLVMValueRef lod_ipart,
1107                           LLVMValueRef *lod_fpart_inout,
1108                           LLVMValueRef *level0_out,
1109                           LLVMValueRef *level1_out)
1110{
1111   LLVMBuilderRef builder = bld->gallivm->builder;
1112   struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1113   struct lp_build_context *leveli_bld = &bld->leveli_bld;
1114   struct lp_build_context *levelf_bld = &bld->levelf_bld;
1115   LLVMValueRef first_level, last_level;
1116   LLVMValueRef clamp_min;
1117   LLVMValueRef clamp_max;
1118
1119   assert(bld->num_lods == bld->num_mips);
1120
1121   first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1122                                            bld->context_ptr, texture_unit, NULL);
1123   last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1124                                          bld->context_ptr, texture_unit, NULL);
1125   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1126   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1127
1128   *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1129   *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1130
1131   /*
1132    * Clamp both *level0_out and *level1_out to [first_level, last_level], with
1133    * the minimum number of comparisons, and zeroing lod_fpart in the extreme
1134    * ends in the process.
1135    */
1136
1137   /* *level0_out < first_level */
1138   clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1139                             *level0_out, first_level,
1140                             "clamp_lod_to_first");
1141
1142   *level0_out = LLVMBuildSelect(builder, clamp_min,
1143                                 first_level, *level0_out, "");
1144
1145   *level1_out = LLVMBuildSelect(builder, clamp_min,
1146                                 first_level, *level1_out, "");
1147
1148   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1149                                      levelf_bld->zero, *lod_fpart_inout, "");
1150
1151   /* *level0_out >= last_level */
1152   clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1153                             *level0_out, last_level,
1154                             "clamp_lod_to_last");
1155
1156   *level0_out = LLVMBuildSelect(builder, clamp_max,
1157                                 last_level, *level0_out, "");
1158
1159   *level1_out = LLVMBuildSelect(builder, clamp_max,
1160                                 last_level, *level1_out, "");
1161
1162   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1163                                      levelf_bld->zero, *lod_fpart_inout, "");
1164
1165   lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1166   lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1167   lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1168}
1169
1170/**
1171 * A helper function that factorizes this common pattern.
1172 */
1173static LLVMValueRef
1174load_mip(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef index1) {
1175   LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1176   LLVMValueRef indexes[2] = {zero, index1};
1177   LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, offsets, indexes, ARRAY_SIZE(indexes), "");
1178   return LLVMBuildLoad(gallivm->builder, ptr, "");
1179}
1180
1181/**
1182 * Return pointer to a single mipmap level.
1183 * \param level  integer mipmap level
1184 */
1185LLVMValueRef
1186lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1187                          LLVMValueRef level)
1188{
1189   LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets, level);
1190   LLVMBuilderRef builder = bld->gallivm->builder;
1191   LLVMValueRef data_ptr = LLVMBuildGEP(builder, bld->base_ptr, &mip_offset, 1, "");
1192   return data_ptr;
1193}
1194
1195/**
1196 * Return (per-pixel) offsets to mip levels.
1197 * \param level  integer mipmap level
1198 */
1199LLVMValueRef
1200lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1201                         LLVMValueRef level)
1202{
1203   LLVMBuilderRef builder = bld->gallivm->builder;
1204   LLVMValueRef offsets, offset1;
1205
1206   if (bld->num_mips == 1) {
1207      offset1 = load_mip(bld->gallivm, bld->mip_offsets, level);
1208      offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1209   }
1210   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1211      unsigned i;
1212
1213      offsets = bld->int_coord_bld.undef;
1214      for (i = 0; i < bld->num_mips; i++) {
1215         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1216         offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1217         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1218         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
1219      }
1220      offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
1221   }
1222   else {
1223      unsigned i;
1224
1225      assert (bld->num_mips == bld->coord_bld.type.length);
1226
1227      offsets = bld->int_coord_bld.undef;
1228      for (i = 0; i < bld->num_mips; i++) {
1229         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1230         offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1231         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
1232      }
1233   }
1234   return offsets;
1235}
1236
1237
1238/**
1239 * Codegen equivalent for u_minify().
1240 * @param lod_scalar  if lod is a (broadcasted) scalar
1241 * Return max(1, base_size >> level);
1242 */
1243LLVMValueRef
1244lp_build_minify(struct lp_build_context *bld,
1245                LLVMValueRef base_size,
1246                LLVMValueRef level,
1247                boolean lod_scalar)
1248{
1249   LLVMBuilderRef builder = bld->gallivm->builder;
1250   assert(lp_check_value(bld->type, base_size));
1251   assert(lp_check_value(bld->type, level));
1252
1253   if (level == bld->zero) {
1254      /* if we're using mipmap level zero, no minification is needed */
1255      return base_size;
1256   }
1257   else {
1258      LLVMValueRef size;
1259      assert(bld->type.sign);
1260      if (lod_scalar ||
1261         (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1262         size = LLVMBuildLShr(builder, base_size, level, "minify");
1263         size = lp_build_max(bld, size, bld->one);
1264      }
1265      else {
1266         /*
1267          * emulate shift with float mul, since intel "forgot" shifts with
1268          * per-element shift count until avx2, which results in terrible
1269          * scalar extraction (both count and value), scalar shift,
1270          * vector reinsertion. Should not be an issue on any non-x86 cpu
1271          * with a vector instruction set.
1272          * On cpus with AMD's XOP this should also be unnecessary but I'm
1273          * not sure if llvm would emit this with current flags.
1274          */
1275         LLVMValueRef const127, const23, lf;
1276         struct lp_type ftype;
1277         struct lp_build_context fbld;
1278         ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1279         lp_build_context_init(&fbld, bld->gallivm, ftype);
1280         const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1281         const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1282
1283         /* calculate 2^(-level) float */
1284         lf = lp_build_sub(bld, const127, level);
1285         lf = lp_build_shl(bld, lf, const23);
1286         lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1287
1288         /* finish shift operation by doing float mul */
1289         base_size = lp_build_int_to_float(&fbld, base_size);
1290         size = lp_build_mul(&fbld, base_size, lf);
1291         /*
1292          * do the max also with floats because
1293          * a) non-emulated int max requires sse41
1294          *    (this is actually a lie as we could cast to 16bit values
1295          *    as 16bit is sufficient and 16bit int max is sse2)
1296          * b) with avx we can do int max 4-wide but float max 8-wide
1297          */
1298         size = lp_build_max(&fbld, size, fbld.one);
1299         size = lp_build_itrunc(&fbld, size);
1300      }
1301      return size;
1302   }
1303}
1304
1305
1306/**
1307 * Dereference stride_array[mipmap_level] array to get a stride.
1308 * Return stride as a vector.
1309 */
1310static LLVMValueRef
1311lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1312                              LLVMValueRef stride_array, LLVMValueRef level)
1313{
1314   LLVMBuilderRef builder = bld->gallivm->builder;
1315   LLVMValueRef stride, stride1;
1316   if (bld->num_mips == 1) {
1317      stride1 = load_mip(bld->gallivm, stride_array, level);
1318      stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1319   }
1320   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1321      LLVMValueRef stride1;
1322      unsigned i;
1323
1324      stride = bld->int_coord_bld.undef;
1325      for (i = 0; i < bld->num_mips; i++) {
1326         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1327         stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1328         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1329         stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1330      }
1331      stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1332   }
1333   else {
1334      LLVMValueRef stride1;
1335      unsigned i;
1336
1337      assert (bld->num_mips == bld->coord_bld.type.length);
1338
1339      stride = bld->int_coord_bld.undef;
1340      for (i = 0; i < bld->coord_bld.type.length; i++) {
1341         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1342         stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1343         stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1344      }
1345   }
1346   return stride;
1347}
1348
1349
1350/**
1351 * When sampling a mipmap, we need to compute the width, height, depth
1352 * of the source levels from the level indexes.  This helper function
1353 * does that.
1354 */
1355void
1356lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1357                            LLVMValueRef ilevel,
1358                            LLVMValueRef *out_size,
1359                            LLVMValueRef *row_stride_vec,
1360                            LLVMValueRef *img_stride_vec)
1361{
1362   const unsigned dims = bld->dims;
1363   LLVMValueRef ilevel_vec;
1364
1365   /*
1366    * Compute width, height, depth at mipmap level 'ilevel'
1367    */
1368   if (bld->num_mips == 1) {
1369      ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1370      *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
1371   }
1372   else {
1373      LLVMValueRef int_size_vec;
1374      LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1375      unsigned num_quads = bld->coord_bld.type.length / 4;
1376      unsigned i;
1377
1378      if (bld->num_mips == num_quads) {
1379         /*
1380          * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1381          * intel "forgot" the variable shift count instruction until avx2.
1382          * A harmless 8x32 shift gets translated into 32 instructions
1383          * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1384          * unable to recognize if there are really just 2 different shift
1385          * count values. So do the shift 4-wide before expansion.
1386          */
1387         struct lp_build_context bld4;
1388         struct lp_type type4;
1389
1390         type4 = bld->int_coord_bld.type;
1391         type4.length = 4;
1392
1393         lp_build_context_init(&bld4, bld->gallivm, type4);
1394
1395         if (bld->dims == 1) {
1396            assert(bld->int_size_in_bld.type.length == 1);
1397            int_size_vec = lp_build_broadcast_scalar(&bld4,
1398                                                     bld->int_size);
1399         }
1400         else {
1401            assert(bld->int_size_in_bld.type.length == 4);
1402            int_size_vec = bld->int_size;
1403         }
1404
1405         for (i = 0; i < num_quads; i++) {
1406            LLVMValueRef ileveli;
1407            LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1408
1409            ileveli = lp_build_extract_broadcast(bld->gallivm,
1410                                                 bld->leveli_bld.type,
1411                                                 bld4.type,
1412                                                 ilevel,
1413                                                 indexi);
1414            tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
1415         }
1416         /*
1417          * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1418          * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1419          */
1420         *out_size = lp_build_concat(bld->gallivm,
1421                                     tmp,
1422                                     bld4.type,
1423                                     num_quads);
1424      }
1425      else {
1426        /* FIXME: this is terrible and results in _huge_ vector
1427         * (for the dims > 1 case).
1428         * Should refactor this (together with extract_image_sizes) and do
1429         * something more useful. Could for instance if we have width,height
1430         * with 4-wide vector pack all elements into a 8xi16 vector
1431         * (on which we can still do useful math) instead of using a 16xi32
1432         * vector.
1433         * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1434         * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1435         */
1436         assert(bld->num_mips == bld->coord_bld.type.length);
1437         if (bld->dims == 1) {
1438            assert(bld->int_size_in_bld.type.length == 1);
1439            int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1440                                                     bld->int_size);
1441            *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
1442         }
1443         else {
1444            LLVMValueRef ilevel1;
1445            for (i = 0; i < bld->num_mips; i++) {
1446               LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1447               ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
1448                                                    bld->int_size_in_bld.type, ilevel, indexi);
1449               tmp[i] = bld->int_size;
1450               tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
1451            }
1452            *out_size = lp_build_concat(bld->gallivm, tmp,
1453                                        bld->int_size_in_bld.type,
1454                                        bld->num_mips);
1455         }
1456      }
1457   }
1458
1459   if (dims >= 2) {
1460      *row_stride_vec = lp_build_get_level_stride_vec(bld,
1461                                                      bld->row_stride_array,
1462                                                      ilevel);
1463   }
1464   if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1465      *img_stride_vec = lp_build_get_level_stride_vec(bld,
1466                                                      bld->img_stride_array,
1467                                                      ilevel);
1468   }
1469}
1470
1471
1472/**
1473 * Extract and broadcast texture size.
1474 *
1475 * @param size_type   type of the texture size vector (either
1476 *                    bld->int_size_type or bld->float_size_type)
1477 * @param coord_type  type of the texture size vector (either
1478 *                    bld->int_coord_type or bld->coord_type)
1479 * @param size        vector with the texture size (width, height, depth)
1480 */
1481void
1482lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1483                             struct lp_build_context *size_bld,
1484                             struct lp_type coord_type,
1485                             LLVMValueRef size,
1486                             LLVMValueRef *out_width,
1487                             LLVMValueRef *out_height,
1488                             LLVMValueRef *out_depth)
1489{
1490   const unsigned dims = bld->dims;
1491   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1492   struct lp_type size_type = size_bld->type;
1493
1494   if (bld->num_mips == 1) {
1495      *out_width = lp_build_extract_broadcast(bld->gallivm,
1496                                              size_type,
1497                                              coord_type,
1498                                              size,
1499                                              LLVMConstInt(i32t, 0, 0));
1500      if (dims >= 2) {
1501         *out_height = lp_build_extract_broadcast(bld->gallivm,
1502                                                  size_type,
1503                                                  coord_type,
1504                                                  size,
1505                                                  LLVMConstInt(i32t, 1, 0));
1506         if (dims == 3) {
1507            *out_depth = lp_build_extract_broadcast(bld->gallivm,
1508                                                    size_type,
1509                                                    coord_type,
1510                                                    size,
1511                                                    LLVMConstInt(i32t, 2, 0));
1512         }
1513      }
1514   }
1515   else {
1516      unsigned num_quads = bld->coord_bld.type.length / 4;
1517
1518      if (dims == 1) {
1519         *out_width = size;
1520      }
1521      else if (bld->num_mips == num_quads) {
1522         *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1523         if (dims >= 2) {
1524            *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1525            if (dims == 3) {
1526               *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1527            }
1528         }
1529      }
1530      else {
1531         assert(bld->num_mips == bld->coord_type.length);
1532         *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1533                                                coord_type, size, 0);
1534         if (dims >= 2) {
1535            *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1536                                                    coord_type, size, 1);
1537            if (dims == 3) {
1538               *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1539                                                      coord_type, size, 2);
1540            }
1541         }
1542      }
1543   }
1544}
1545
1546
1547/**
1548 * Unnormalize coords.
1549 *
1550 * @param flt_size  vector with the integer texture size (width, height, depth)
1551 */
1552void
1553lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1554                             LLVMValueRef flt_size,
1555                             LLVMValueRef *s,
1556                             LLVMValueRef *t,
1557                             LLVMValueRef *r)
1558{
1559   const unsigned dims = bld->dims;
1560   LLVMValueRef width;
1561   LLVMValueRef height = NULL;
1562   LLVMValueRef depth = NULL;
1563
1564   lp_build_extract_image_sizes(bld,
1565                                &bld->float_size_bld,
1566                                bld->coord_type,
1567                                flt_size,
1568                                &width,
1569                                &height,
1570                                &depth);
1571
1572   /* s = s * width, t = t * height */
1573   *s = lp_build_mul(&bld->coord_bld, *s, width);
1574   if (dims >= 2) {
1575      *t = lp_build_mul(&bld->coord_bld, *t, height);
1576      if (dims >= 3) {
1577         *r = lp_build_mul(&bld->coord_bld, *r, depth);
1578      }
1579   }
1580}
1581
1582/**
1583 * Generate new coords and faces for cubemap texels falling off the face.
1584 *
1585 * @param face   face (center) of the pixel
1586 * @param x0     lower x coord
1587 * @param x1     higher x coord (must be x0 + 1)
1588 * @param y0     lower y coord
1589 * @param y1     higher y coord (must be x0 + 1)
1590 * @param max_coord     texture cube (level) size - 1
1591 * @param next_faces    new face values when falling off
1592 * @param next_xcoords  new x coord values when falling off
1593 * @param next_ycoords  new y coord values when falling off
1594 *
1595 * The arrays hold the new values when under/overflow of
1596 * lower x, higher x, lower y, higher y coord would occur (in this order).
1597 * next_xcoords/next_ycoords have two entries each (for both new lower and
1598 * higher coord).
1599 */
1600void
1601lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1602                        LLVMValueRef face,
1603                        LLVMValueRef x0,
1604                        LLVMValueRef x1,
1605                        LLVMValueRef y0,
1606                        LLVMValueRef y1,
1607                        LLVMValueRef max_coord,
1608                        LLVMValueRef next_faces[4],
1609                        LLVMValueRef next_xcoords[4][2],
1610                        LLVMValueRef next_ycoords[4][2])
1611{
1612   /*
1613    * Lookup tables aren't nice for simd code hence try some logic here.
1614    * (Note that while it would not be necessary to do per-sample (4) lookups
1615    * when using a LUT as it's impossible that texels fall off of positive
1616    * and negative edges simultaneously, it would however be necessary to
1617    * do 2 lookups for corner handling as in this case texels both fall off
1618    * of x and y axes.)
1619    */
1620   /*
1621    * Next faces (for face 012345):
1622    * x < 0.0  : 451110
1623    * x >= 1.0 : 540001
1624    * y < 0.0  : 225422
1625    * y >= 1.0 : 334533
1626    * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1627    * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1628    * nfy+: face & ~4 > 1 ? face + 2 : 3;
1629    * This could also use pshufb instead, but would need (manually coded)
1630    * ssse3 intrinsic (llvm won't do non-constant shuffles).
1631    */
1632   struct gallivm_state *gallivm = ivec_bld->gallivm;
1633   LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1634   LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1635   LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1636   LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1637   LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1638   LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1639
1640   sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1641   tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1642   sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1643   faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1644   tmp = lp_build_add(ivec_bld, faceand1, c4);
1645   next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1646   next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1647
1648   tmp = lp_build_andnot(ivec_bld, face, c4);
1649   sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1650   tmp = lp_build_add(ivec_bld, face, c2);
1651   next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1652   next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1653
1654   /*
1655    * new xcoords (for face 012345):
1656    * x < 0.0  : max   max   t     max-t max  max
1657    * x >= 1.0 : 0     0     max-t t     0    0
1658    * y < 0.0  : max   0     max-s s     s    max-s
1659    * y >= 1.0 : max   0     s     max-s s    max-s
1660    *
1661    * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1662    * ncx[0] = max - ncx[1]
1663    * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1664    * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1665    */
1666   sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1667   maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1668   tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1669   next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1670   next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1671   maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1672   tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1673   next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1674   next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1675
1676   sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1677
1678   tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1679   maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1680   tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1681   next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1682   tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1683   next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1684   maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1685   tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1686   next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1687   tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1688   next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1689
1690   /*
1691    * new ycoords (for face 012345):
1692    * x < 0.0  : t     t     0     max   t    t
1693    * x >= 1.0 : t     t     0     max   t    t
1694    * y < 0.0  : max-s s     0     max   max  0
1695    * y >= 1.0 : s     max-s 0     max   0    max
1696    *
1697    * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1698    * ncy[1] = ncy[0]
1699    * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1700    * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1701    */
1702   tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1703   next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1704   next_ycoords[1][0] = next_ycoords[0][0];
1705   next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1706   next_ycoords[1][1] = next_ycoords[0][1];
1707
1708   tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1709   tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1710   next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1711   tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1712   next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1713   tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1714   tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1715   next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1716   tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1717   next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1718}
1719
1720
1721/** Helper used by lp_build_cube_lookup() */
1722static LLVMValueRef
1723lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1724{
1725   /* ima = +0.5 / abs(coord); */
1726   LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1727   LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1728   /* avoid div by zero */
1729   LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1730   LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1731   LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1732   return ima;
1733}
1734
1735
1736/** Helper for doing 3-wise selection.
1737 * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1738 */
1739static LLVMValueRef
1740lp_build_select3(struct lp_build_context *sel_bld,
1741                 LLVMValueRef sel0,
1742                 LLVMValueRef sel1,
1743                 LLVMValueRef val0,
1744                 LLVMValueRef val1,
1745                 LLVMValueRef val2)
1746{
1747   LLVMValueRef tmp;
1748   tmp = lp_build_select(sel_bld, sel0, val0, val1);
1749   return lp_build_select(sel_bld, sel1, val2, tmp);
1750}
1751
1752
1753/**
1754 * Generate code to do cube face selection and compute per-face texcoords.
1755 */
1756void
1757lp_build_cube_lookup(struct lp_build_sample_context *bld,
1758                     LLVMValueRef *coords,
1759                     const struct lp_derivatives *derivs_in, /* optional */
1760                     LLVMValueRef *rho,
1761                     struct lp_derivatives *derivs_out, /* optional */
1762                     boolean need_derivs)
1763{
1764   struct lp_build_context *coord_bld = &bld->coord_bld;
1765   LLVMBuilderRef builder = bld->gallivm->builder;
1766   struct gallivm_state *gallivm = bld->gallivm;
1767   LLVMValueRef si, ti, ri;
1768
1769   /*
1770    * Do per-pixel face selection. We cannot however (as we used to do)
1771    * simply calculate the derivs afterwards (which is very bogus for
1772    * explicit derivs btw) because the values would be "random" when
1773    * not all pixels lie on the same face. So what we do here is just
1774    * calculate the derivatives after scaling the coords by the absolute
1775    * value of the inverse major axis, and essentially do rho calculation
1776    * steps as if it were a 3d texture. This is perfect if all pixels hit
1777    * the same face, but not so great at edges, I believe the max error
1778    * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1779    * the 3d distance between 2 points on the cube instead of measuring up/down
1780    * the edge). Still this is possibly a win over just selecting the same face
1781    * for all pixels. Unfortunately, something like that doesn't work for
1782    * explicit derivatives.
1783    */
1784   struct lp_build_context *cint_bld = &bld->int_coord_bld;
1785   struct lp_type intctype = cint_bld->type;
1786   LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1787   LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1788   LLVMValueRef as, at, ar, face, face_s, face_t;
1789   LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1790   LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1791   LLVMValueRef tnegi, rnegi;
1792   LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1793   LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1794   LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1795                                                  1LL << (intctype.width - 1));
1796   LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1797                                                   intctype.width -1);
1798   LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1799   LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1800   LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1801   LLVMValueRef s = coords[0];
1802   LLVMValueRef t = coords[1];
1803   LLVMValueRef r = coords[2];
1804
1805   assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1806   assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1807   assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1808
1809   /*
1810    * get absolute value (for x/y/z face selection) and sign bit
1811    * (for mirroring minor coords and pos/neg face selection)
1812    * of the original coords.
1813    */
1814   as = lp_build_abs(&bld->coord_bld, s);
1815   at = lp_build_abs(&bld->coord_bld, t);
1816   ar = lp_build_abs(&bld->coord_bld, r);
1817
1818   /*
1819    * major face determination: select x if x > y else select y
1820    * select z if z >= max(x,y) else select previous result
1821    * if some axis are the same we chose z over y, y over x - the
1822    * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1823    * wouldn't care could save a select or two if using different
1824    * compares and doing at_g_as_ar last since tnewx and tnewz are the
1825    * same).
1826    */
1827   as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1828   maxasat = lp_build_max(coord_bld, as, at);
1829   ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1830
1831   if (need_derivs) {
1832      /*
1833       * XXX: This is really really complex.
1834       * It is a bit overkill to use this for implicit derivatives as well,
1835       * no way this is worth the cost in practice, but seems to be the
1836       * only way for getting accurate and per-pixel lod values.
1837       */
1838      LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1839      LLVMValueRef madx, mady, madxdivma, madydivma;
1840      LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1841      LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1842      LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1843      LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1844      LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1845      /*
1846       * s = 1/2 * ( sc / ma + 1)
1847       * t = 1/2 * ( tc / ma + 1)
1848       *
1849       * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1850       * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1851       *
1852       * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1853       * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1854       * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1855       * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1856       */
1857
1858      /* select ma, calculate ima */
1859      ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1860      mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1861      signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1862      ima = lp_build_div(coord_bld, coord_bld->one, ma);
1863      imahalf = lp_build_mul(coord_bld, posHalf, ima);
1864      imahalfpos = lp_build_abs(coord_bld, imahalf);
1865
1866      if (!derivs_in) {
1867         ddx[0] = lp_build_ddx(coord_bld, s);
1868         ddx[1] = lp_build_ddx(coord_bld, t);
1869         ddx[2] = lp_build_ddx(coord_bld, r);
1870         ddy[0] = lp_build_ddy(coord_bld, s);
1871         ddy[1] = lp_build_ddy(coord_bld, t);
1872         ddy[2] = lp_build_ddy(coord_bld, r);
1873      }
1874      else {
1875         ddx[0] = derivs_in->ddx[0];
1876         ddx[1] = derivs_in->ddx[1];
1877         ddx[2] = derivs_in->ddx[2];
1878         ddy[0] = derivs_in->ddy[0];
1879         ddy[1] = derivs_in->ddy[1];
1880         ddy[2] = derivs_in->ddy[2];
1881      }
1882
1883      /* select major derivatives */
1884      madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1885      mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1886
1887      si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1888      ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1889      ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1890
1891      sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1892      tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1893      rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1894
1895      sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1896      tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1897      rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1898
1899      /*
1900       * compute all possible new s/t coords, which does the mirroring,
1901       * and do the same for derivs minor axes.
1902       * snewx = signma * -r;
1903       * tnewx = -t;
1904       * snewy = s;
1905       * tnewy = signma * r;
1906       * snewz = signma * s;
1907       * tnewz = -t;
1908       */
1909      tnegi = LLVMBuildXor(builder, ti, signmask, "");
1910      rnegi = LLVMBuildXor(builder, ri, signmask, "");
1911      tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1912      rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1913      tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1914      rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1915
1916      snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1917      tnewx = tnegi;
1918      sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1919      tdxnewx = tdxnegi;
1920      sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1921      tdynewx = tdynegi;
1922
1923      snewy = si;
1924      tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1925      sdxnewy = sdxi;
1926      tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1927      sdynewy = sdyi;
1928      tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1929
1930      snewz = LLVMBuildXor(builder, signmabit, si, "");
1931      tnewz = tnegi;
1932      sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1933      tdxnewz = tdxnegi;
1934      sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1935      tdynewz = tdynegi;
1936
1937      /* select the mirrored values */
1938      face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1939      face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1940      face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1941      face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1942      face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1943      face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1944      face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1945
1946      face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1947      face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1948      face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1949      face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1950      face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1951      face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1952
1953      /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1954      madxdivma = lp_build_mul(coord_bld, madx, ima);
1955      tmp = lp_build_mul(coord_bld, madxdivma, face_s);
1956      tmp = lp_build_sub(coord_bld, face_sdx, tmp);
1957      derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
1958
1959      /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
1960      tmp = lp_build_mul(coord_bld, madxdivma, face_t);
1961      tmp = lp_build_sub(coord_bld, face_tdx, tmp);
1962      derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
1963
1964      /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
1965      madydivma = lp_build_mul(coord_bld, mady, ima);
1966      tmp = lp_build_mul(coord_bld, madydivma, face_s);
1967      tmp = lp_build_sub(coord_bld, face_sdy, tmp);
1968      derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
1969
1970      /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
1971      tmp = lp_build_mul(coord_bld, madydivma, face_t);
1972      tmp = lp_build_sub(coord_bld, face_tdy, tmp);
1973      derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
1974
1975      signma = LLVMBuildLShr(builder, mai, signshift, "");
1976      coords[2] = LLVMBuildOr(builder, face, signma, "face");
1977
1978      /* project coords */
1979      face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
1980      face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
1981
1982      coords[0] = lp_build_add(coord_bld, face_s, posHalf);
1983      coords[1] = lp_build_add(coord_bld, face_t, posHalf);
1984
1985      return;
1986   }
1987
1988   ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1989   mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1990   signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1991
1992   si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1993   ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1994   ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1995
1996   /*
1997    * compute all possible new s/t coords, which does the mirroring
1998    * snewx = signma * -r;
1999    * tnewx = -t;
2000    * snewy = s;
2001    * tnewy = signma * r;
2002    * snewz = signma * s;
2003    * tnewz = -t;
2004    */
2005   tnegi = LLVMBuildXor(builder, ti, signmask, "");
2006   rnegi = LLVMBuildXor(builder, ri, signmask, "");
2007
2008   snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2009   tnewx = tnegi;
2010
2011   snewy = si;
2012   tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2013
2014   snewz = LLVMBuildXor(builder, signmabit, si, "");
2015   tnewz = tnegi;
2016
2017   /* select the mirrored values */
2018   face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2019   face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2020   face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2021
2022   face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2023   face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2024
2025   /* add +1 for neg face */
2026   /* XXX with AVX probably want to use another select here -
2027    * as long as we ensure vblendvps gets used we can actually
2028    * skip the comparison and just use sign as a "mask" directly.
2029    */
2030   signma = LLVMBuildLShr(builder, mai, signshift, "");
2031   coords[2] = LLVMBuildOr(builder, face, signma, "face");
2032
2033   /* project coords */
2034   if (!need_derivs) {
2035      imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2036      face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2037      face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2038   }
2039
2040   coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2041   coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2042}
2043
2044
2045/**
2046 * Compute the partial offset of a pixel block along an arbitrary axis.
2047 *
2048 * @param coord   coordinate in pixels
2049 * @param stride  number of bytes between rows of successive pixel blocks
2050 * @param block_length  number of pixels in a pixels block along the coordinate
2051 *                      axis
2052 * @param out_offset    resulting relative offset of the pixel block in bytes
2053 * @param out_subcoord  resulting sub-block pixel coordinate
2054 */
2055void
2056lp_build_sample_partial_offset(struct lp_build_context *bld,
2057                               unsigned block_length,
2058                               LLVMValueRef coord,
2059                               LLVMValueRef stride,
2060                               LLVMValueRef *out_offset,
2061                               LLVMValueRef *out_subcoord)
2062{
2063   LLVMBuilderRef builder = bld->gallivm->builder;
2064   LLVMValueRef offset;
2065   LLVMValueRef subcoord;
2066
2067   if (block_length == 1) {
2068      subcoord = bld->zero;
2069   }
2070   else {
2071      /*
2072       * Pixel blocks have power of two dimensions. LLVM should convert the
2073       * rem/div to bit arithmetic.
2074       * TODO: Verify this.
2075       * It does indeed BUT it does transform it to scalar (and back) when doing so
2076       * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2077       * The generated code looks seriously unfunny and is quite expensive.
2078       */
2079#if 0
2080      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2081      subcoord = LLVMBuildURem(builder, coord, block_width, "");
2082      coord    = LLVMBuildUDiv(builder, coord, block_width, "");
2083#else
2084      unsigned logbase2 = util_logbase2(block_length);
2085      LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2086      LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2087      subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2088      coord = LLVMBuildLShr(builder, coord, block_shift, "");
2089#endif
2090   }
2091
2092   offset = lp_build_mul(bld, coord, stride);
2093
2094   assert(out_offset);
2095   assert(out_subcoord);
2096
2097   *out_offset = offset;
2098   *out_subcoord = subcoord;
2099}
2100
2101
2102/**
2103 * Compute the offset of a pixel block.
2104 *
2105 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2106 *
2107 * Returns the relative offset and i,j sub-block coordinates
2108 */
2109void
2110lp_build_sample_offset(struct lp_build_context *bld,
2111                       const struct util_format_description *format_desc,
2112                       LLVMValueRef x,
2113                       LLVMValueRef y,
2114                       LLVMValueRef z,
2115                       LLVMValueRef y_stride,
2116                       LLVMValueRef z_stride,
2117                       LLVMValueRef *out_offset,
2118                       LLVMValueRef *out_i,
2119                       LLVMValueRef *out_j)
2120{
2121   LLVMValueRef x_stride;
2122   LLVMValueRef offset;
2123
2124   x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2125                                 format_desc->block.bits/8);
2126
2127   lp_build_sample_partial_offset(bld,
2128                                  format_desc->block.width,
2129                                  x, x_stride,
2130                                  &offset, out_i);
2131
2132   if (y && y_stride) {
2133      LLVMValueRef y_offset;
2134      lp_build_sample_partial_offset(bld,
2135                                     format_desc->block.height,
2136                                     y, y_stride,
2137                                     &y_offset, out_j);
2138      offset = lp_build_add(bld, offset, y_offset);
2139   }
2140   else {
2141      *out_j = bld->zero;
2142   }
2143
2144   if (z && z_stride) {
2145      LLVMValueRef z_offset;
2146      LLVMValueRef k;
2147      lp_build_sample_partial_offset(bld,
2148                                     1, /* pixel blocks are always 2D */
2149                                     z, z_stride,
2150                                     &z_offset, &k);
2151      offset = lp_build_add(bld, offset, z_offset);
2152   }
2153
2154   *out_offset = offset;
2155}
2156
2157static LLVMValueRef
2158lp_build_sample_min(struct lp_build_context *bld,
2159                    LLVMValueRef x,
2160                    LLVMValueRef v0,
2161                    LLVMValueRef v1)
2162{
2163   /* if the incoming LERP weight is 0 then the min/max
2164    * should ignore that value. */
2165   LLVMValueRef mask = lp_build_compare(bld->gallivm,
2166                                        bld->type,
2167                                        PIPE_FUNC_NOTEQUAL,
2168                                        x, bld->zero);
2169   LLVMValueRef min = lp_build_min(bld, v0, v1);
2170
2171   return lp_build_select(bld, mask, min, v0);
2172}
2173
2174static LLVMValueRef
2175lp_build_sample_max(struct lp_build_context *bld,
2176                    LLVMValueRef x,
2177                    LLVMValueRef v0,
2178                    LLVMValueRef v1)
2179{
2180   /* if the incoming LERP weight is 0 then the min/max
2181    * should ignore that value. */
2182   LLVMValueRef mask = lp_build_compare(bld->gallivm,
2183                                        bld->type,
2184                                        PIPE_FUNC_NOTEQUAL,
2185                                        x, bld->zero);
2186   LLVMValueRef max = lp_build_max(bld, v0, v1);
2187
2188   return lp_build_select(bld, mask, max, v0);
2189}
2190
2191static LLVMValueRef
2192lp_build_sample_min_2d(struct lp_build_context *bld,
2193                       LLVMValueRef x,
2194                       LLVMValueRef y,
2195                       LLVMValueRef a,
2196                       LLVMValueRef b,
2197                       LLVMValueRef c,
2198                       LLVMValueRef d)
2199{
2200   LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2201   LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2202   return lp_build_sample_min(bld, y, v0, v1);
2203}
2204
2205static LLVMValueRef
2206lp_build_sample_max_2d(struct lp_build_context *bld,
2207                       LLVMValueRef x,
2208                       LLVMValueRef y,
2209                       LLVMValueRef a,
2210                       LLVMValueRef b,
2211                       LLVMValueRef c,
2212                       LLVMValueRef d)
2213{
2214   LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2215   LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2216   return lp_build_sample_max(bld, y, v0, v1);
2217}
2218
2219static LLVMValueRef
2220lp_build_sample_min_3d(struct lp_build_context *bld,
2221                LLVMValueRef x,
2222                LLVMValueRef y,
2223                LLVMValueRef z,
2224                LLVMValueRef a, LLVMValueRef b,
2225                LLVMValueRef c, LLVMValueRef d,
2226                LLVMValueRef e, LLVMValueRef f,
2227                LLVMValueRef g, LLVMValueRef h)
2228{
2229   LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2230   LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2231   return lp_build_sample_min(bld, z, v0, v1);
2232}
2233
2234static LLVMValueRef
2235lp_build_sample_max_3d(struct lp_build_context *bld,
2236                       LLVMValueRef x,
2237                       LLVMValueRef y,
2238                       LLVMValueRef z,
2239                       LLVMValueRef a, LLVMValueRef b,
2240                       LLVMValueRef c, LLVMValueRef d,
2241                       LLVMValueRef e, LLVMValueRef f,
2242                       LLVMValueRef g, LLVMValueRef h)
2243{
2244   LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2245   LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2246   return lp_build_sample_max(bld, z, v0, v1);
2247}
2248
2249void
2250lp_build_reduce_filter(struct lp_build_context *bld,
2251                       enum pipe_tex_reduction_mode mode,
2252                       unsigned flags,
2253                       unsigned num_chan,
2254                       LLVMValueRef x,
2255                       LLVMValueRef *v00,
2256                       LLVMValueRef *v01,
2257                       LLVMValueRef *out)
2258{
2259   unsigned chan;
2260   switch (mode) {
2261   case PIPE_TEX_REDUCTION_MIN:
2262      for (chan = 0; chan < num_chan; chan++)
2263         out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2264      break;
2265   case PIPE_TEX_REDUCTION_MAX:
2266      for (chan = 0; chan < num_chan; chan++)
2267         out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2268      break;
2269   case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2270   default:
2271      for (chan = 0; chan < num_chan; chan++)
2272         out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2273      break;
2274   }
2275}
2276
2277void
2278lp_build_reduce_filter_2d(struct lp_build_context *bld,
2279                          enum pipe_tex_reduction_mode mode,
2280                          unsigned flags,
2281                          unsigned num_chan,
2282                          LLVMValueRef x,
2283                          LLVMValueRef y,
2284                          LLVMValueRef *v00,
2285                          LLVMValueRef *v01,
2286                          LLVMValueRef *v10,
2287                          LLVMValueRef *v11,
2288                          LLVMValueRef *out)
2289{
2290   unsigned chan;
2291   switch (mode) {
2292   case PIPE_TEX_REDUCTION_MIN:
2293      for (chan = 0; chan < num_chan; chan++)
2294         out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2295      break;
2296   case PIPE_TEX_REDUCTION_MAX:
2297      for (chan = 0; chan < num_chan; chan++)
2298         out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2299      break;
2300   case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2301   default:
2302      for (chan = 0; chan < num_chan; chan++)
2303         out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
2304      break;
2305   }
2306}
2307
2308void
2309lp_build_reduce_filter_3d(struct lp_build_context *bld,
2310                          enum pipe_tex_reduction_mode mode,
2311                          unsigned flags,
2312                          unsigned num_chan,
2313                          LLVMValueRef x,
2314                          LLVMValueRef y,
2315                          LLVMValueRef z,
2316                          LLVMValueRef *v000,
2317                          LLVMValueRef *v001,
2318                          LLVMValueRef *v010,
2319                          LLVMValueRef *v011,
2320                          LLVMValueRef *v100,
2321                          LLVMValueRef *v101,
2322                          LLVMValueRef *v110,
2323                          LLVMValueRef *v111,
2324                          LLVMValueRef *out)
2325{
2326   unsigned chan;
2327   switch (mode) {
2328   case PIPE_TEX_REDUCTION_MIN:
2329      for (chan = 0; chan < num_chan; chan++)
2330         out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2331                                     v000[chan], v001[chan], v010[chan], v011[chan],
2332                                     v100[chan], v101[chan], v110[chan], v111[chan]);
2333      break;
2334   case PIPE_TEX_REDUCTION_MAX:
2335      for (chan = 0; chan < num_chan; chan++)
2336         out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2337                                     v000[chan], v001[chan], v010[chan], v011[chan],
2338                                     v100[chan], v101[chan], v110[chan], v111[chan]);
2339      break;
2340   case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2341   default:
2342      for (chan = 0; chan < num_chan; chan++)
2343         out[chan] = lp_build_lerp_3d(bld, x, y, z,
2344                                      v000[chan], v001[chan], v010[chan], v011[chan],
2345                                      v100[chan], v101[chan], v110[chan], v111[chan],
2346                                      flags);
2347      break;
2348   }
2349}
2350
2351/*
2352 * generated from
2353 * const float alpha = 2;
2354 * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2355 *    const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2356 *    const float weight = (float)expf(-alpha * r2);
2357 */
2358static const float aniso_filter_table[1024] = {
2359   1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2360   0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2361   0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2362   0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2363   0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2364   0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2365   0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2366   0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2367   0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2368   0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2369   0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2370   0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2371   0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2372   0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2373   0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2374   0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2375   0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2376   0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2377   0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2378   0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2379   0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2380   0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2381   0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2382   0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2383   0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2384   0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2385   0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2386   0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2387   0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2388   0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2389   0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2390   0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2391   0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2392   0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2393   0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2394   0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2395   0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2396   0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2397   0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2398   0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2399   0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2400   0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2401   0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2402   0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2403   0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2404   0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2405   0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2406   0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2407   0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2408   0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2409   0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2410   0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2411   0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2412   0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2413   0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2414   0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2415   0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2416   0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2417   0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2418   0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2419   0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2420   0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2421   0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2422   0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2423   0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2424   0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2425   0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2426   0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2427   0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2428   0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2429   0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2430   0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2431   0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2432   0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2433   0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2434   0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2435   0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2436   0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2437   0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2438   0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2439   0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2440   0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2441   0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2442   0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2443   0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2444   0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2445   0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2446   0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2447   0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2448   0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2449   0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2450   0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2451   0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2452   0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2453   0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2454   0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2455   0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2456   0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2457   0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2458   0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2459   0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2460   0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2461   0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2462   0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2463   0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2464   0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2465   0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2466   0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2467   0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2468   0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2469   0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2470   0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2471   0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2472   0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2473   0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2474   0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2475   0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2476   0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2477   0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2478   0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2479   0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2480   0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2481   0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2482   0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2483   0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2484   0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2485   0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2486   0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2487};
2488
2489const float *
2490lp_build_sample_aniso_filter_table(void)
2491{
2492   return aniso_filter_table;
2493}
2494