1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 #if defined(PIPE_ARCH_SSE)
45
46 #include <emmintrin.h>
47
48
49 struct nearest_sampler {
50 alignas(16) uint32_t out[64];
51
52 const struct lp_jit_texture *texture;
53 float fsrc_x; /* src_x0 */
54 float fsrc_y; /* src_y0 */
55 float fdsdx; /* sx */
56 float fdsdy; /* sx */
57 float fdtdx; /* sy */
58 float fdtdy; /* sy */
59 int width;
60 int y;
61
62 const uint32_t *(*fetch)(struct nearest_sampler *samp);
63 };
64
65
66 struct linear_interp {
67 alignas(16) uint32_t out[64];
68 __m128i a0;
69 __m128i dadx;
70 __m128i dady;
71 int width; /* rounded up to multiple of 4 */
72 boolean is_constant;
73 };
74
75 /* Organize all the information needed for blending in one place.
76 * Could have blend function pointer here, but we currently always
77 * know which one we want to call.
78 */
79 struct color_blend {
80 const uint32_t *src;
81 uint8_t *color;
82 int stride;
83 int width; /* the exact width */
84 };
85
86
87 /* Organize all the information needed for running each of the shaders
88 * in one place.
89 */
90 struct shader {
91 alignas(16) uint32_t out0[64];
92 const uint32_t *src0;
93 const uint32_t *src1;
94 __m128i const0;
95 int width; /* rounded up to multiple of 4 */
96 };
97
98
99 /* For a row of pixels, perform add/one/inv_src_alpha (ie
100 * premultiplied alpha) blending between the incoming pixels and the
101 * destination buffer.
102 *
103 * Used to implement the BLIT_RGBA + blend shader, there are no
104 * operations from the pixel shader left to implement at this level -
105 * effectively the pixel shader was just a texture fetch which has
106 * already been performed. This routine then purely implements
107 * blending.
108 */
109 static void
blend_premul(struct color_blend *blend)110 blend_premul(struct color_blend *blend)
111 {
112 const uint32_t *src = blend->src; /* aligned */
113 uint32_t *dst = (uint32_t *)blend->color; /* unaligned */
114 const int width = blend->width;
115 int i;
116 union { __m128i m128; uint ui[4]; } dstreg;
117
118 blend->color += blend->stride;
119
120 for (i = 0; i + 3 < width; i += 4) {
121 __m128i tmp;
122 tmp = _mm_loadu_si128((const __m128i *)&dst[i]); /* UNALIGNED READ */
123 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
124 tmp);
125 _mm_storeu_si128((__m128i *)&dst[i], dstreg.m128); /* UNALIGNED WRITE */
126 }
127
128 if (i < width) {
129 int j;
130 for (j = 0; j < width - i ; j++) {
131 dstreg.ui[j] = dst[i+j];
132 }
133 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
134 dstreg.m128);
135 for (; i < width; i++)
136 dst[i] = dstreg.ui[i&3];
137 }
138 }
139
140
141 static void
blend_noop(struct color_blend *blend)142 blend_noop(struct color_blend *blend)
143 {
144 memcpy(blend->color, blend->src, blend->width * sizeof(unsigned));
145 blend->color += blend->stride;
146 }
147
148
149 static void
init_blend(struct color_blend *blend, int x, int y, int width, int height, uint8_t *color, int stride)150 init_blend(struct color_blend *blend,
151 int x, int y, int width, int height,
152 uint8_t *color,
153 int stride)
154 {
155 blend->color = color + x * 4 + y * stride;
156 blend->stride = stride;
157 blend->width = width;
158 }
159
160
161 /*
162 * Perform nearest filtered lookup of a row of texels. Texture lookup
163 * is assumed to be axis aligned but with arbitrary scaling.
164 *
165 * Texture coordinate interpolation is performed in 24.8 fixed point.
166 * Note that the longest span we will encounter is 64 pixels long,
167 * meaning that 8 fractional bits is more than sufficient to represent
168 * the shallowest gradient possible within this span.
169 *
170 * After 64 pixels (ie. in the next tile), the starting point will be
171 * recalculated with floating point arithmetic.
172 *
173 * XXX: migrate this to use Jose's quad blitter texture fetch routines.
174 */
175 static const uint32_t *
fetch_row(struct nearest_sampler *samp)176 fetch_row(struct nearest_sampler *samp)
177 {
178 const int y = samp->y++;
179 uint32_t *row = samp->out;
180 const struct lp_jit_texture *texture = samp->texture;
181 const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
182 const uint32_t *src_row =
183 (const uint32_t *)((const uint8_t *)texture->base +
184 yy * texture->row_stride[0]);
185 const int iscale_x = samp->fdsdx * 256;
186 const int width = samp->width;
187 int acc = samp->fsrc_x * 256 + 128;
188
189 for (int i = 0; i < width; i++) {
190 row[i] = src_row[acc>>8];
191 acc += iscale_x;
192 }
193
194 return row;
195 }
196
197
198 /* Version of fetch_row which can cope with texture edges. In
199 * practise, aero never triggers this.
200 */
201 static const uint32_t *
fetch_row_clamped(struct nearest_sampler *samp)202 fetch_row_clamped(struct nearest_sampler *samp)
203 {
204 const int y = samp->y++;
205 uint32_t *row = samp->out;
206 const struct lp_jit_texture *texture = samp->texture;
207 const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
208 const uint32_t *src_row =
209 (const uint32_t *)((const uint8_t *)texture->base +
210 CLAMP(yy, 0, texture->height-1) *
211 texture->row_stride[0]);
212 const float src_x0 = samp->fsrc_x;
213 const float scale_x = samp->fdsdx;
214 const int width = samp->width;
215
216 for (int i = 0; i < width; i++) {
217 row[i] = src_row[CLAMP(util_iround(src_x0 + i * scale_x),
218 0, texture->width - 1)];
219 }
220
221 return row;
222 }
223
224 /* It vary rarely happens that some non-axis-aligned texturing creeps
225 * into the linear path. Handle it here. The alternative would be
226 * more pre-checking or an option to fallback by returning false from
227 * jit_linear.
228 */
229 static const uint32_t *
fetch_row_xy_clamped(struct nearest_sampler *samp)230 fetch_row_xy_clamped(struct nearest_sampler *samp)
231 {
232 const int y = samp->y++;
233 uint32_t *row = samp->out;
234 const struct lp_jit_texture *texture = samp->texture;
235 const float yrow = samp->fsrc_y + samp->fdtdy * y;
236 const float xrow = samp->fsrc_x + samp->fdsdy * y;
237 const int width = samp->width;
238
239 for (int i = 0; i < width; i++) {
240 int yy = util_iround(yrow + samp->fdtdx * i);
241 int xx = util_iround(xrow + samp->fdsdx * i);
242
243 const uint32_t *src_row =
244 (const uint32_t *)((const uint8_t *) texture->base +
245 CLAMP(yy, 0, texture->height-1) *
246 texture->row_stride[0]);
247
248 row[i] = src_row[CLAMP(xx, 0, texture->width - 1)];
249 }
250
251 return row;
252 }
253
254
255 static boolean
init_nearest_sampler(struct nearest_sampler *samp, const struct lp_jit_texture *texture, int x0, int y0, int width, int height, float s0, float dsdx, float dsdy, float t0, float dtdx, float dtdy, float w0, float dwdx, float dwdy)256 init_nearest_sampler(struct nearest_sampler *samp,
257 const struct lp_jit_texture *texture,
258 int x0, int y0,
259 int width, int height,
260 float s0, float dsdx, float dsdy,
261 float t0, float dtdx, float dtdy,
262 float w0, float dwdx, float dwdy)
263 {
264 const float oow = 1.0f / w0;
265
266 if (dwdx != 0.0 || dwdy != 0.0)
267 return FALSE;
268
269 samp->texture = texture;
270 samp->width = width;
271 samp->fdsdx = dsdx * texture->width * oow;
272 samp->fdsdy = dsdy * texture->width * oow;
273 samp->fdtdx = dtdx * texture->height * oow;
274 samp->fdtdy = dtdy * texture->height * oow;
275 samp->fsrc_x = (samp->fdsdx * x0 +
276 samp->fdsdy * y0 +
277 s0 * texture->width * oow - 0.5f);
278
279 samp->fsrc_y = (samp->fdtdx * x0 +
280 samp->fdtdy * y0 +
281 t0 * texture->height * oow - 0.5f);
282 samp->y = 0;
283
284 /* Because we want to permit consumers of this data to round up to
285 * the next multiple of 4, and because we don't want valgrind to
286 * complain about uninitialized reads, set the last bit of the
287 * buffer to zero:
288 */
289 for (int i = width; i & 3; i++)
290 samp->out[i] = 0;
291
292 if (dsdy != 0 || dtdx != 0) {
293 /* Arbitrary texture lookup:
294 */
295 samp->fetch = fetch_row_xy_clamped;
296 } else {
297 /* Axis aligned stretch blit, abitrary scaling factors including
298 * flipped, minifying and magnifying:
299 */
300 int isrc_x = util_iround(samp->fsrc_x);
301 int isrc_y = util_iround(samp->fsrc_y);
302 int isrc_x1 = util_iround(samp->fsrc_x + width * samp->fdsdx);
303 int isrc_y1 = util_iround(samp->fsrc_y + height * samp->fdtdy);
304
305 /* Look at the maximum and minimum texture coordinates we will be
306 * fetching and figure out if we need to use clamping. There is
307 * similar code in u_blit_sw.c which takes a better approach to
308 * this which could be substituted later.
309 */
310 if (isrc_x <= texture->width && isrc_x >= 0 &&
311 isrc_y <= texture->height && isrc_y >= 0 &&
312 isrc_x1 <= texture->width && isrc_x1 >= 0 &&
313 isrc_y1 <= texture->height && isrc_y1 >= 0) {
314 samp->fetch = fetch_row;
315 } else {
316 samp->fetch = fetch_row_clamped;
317 }
318 }
319
320 return TRUE;
321 }
322
323
324 static const uint32_t *
shade_rgb1(struct shader *shader)325 shade_rgb1(struct shader *shader)
326 {
327 const __m128i rgb1 = _mm_set1_epi32(0xff000000);
328 const uint32_t *src0 = shader->src0;
329 uint32_t *dst = shader->out0;
330 int width = shader->width;
331 int i;
332
333 for (i = 0; i + 3 < width; i += 4) {
334 __m128i s = *(const __m128i *)&src0[i];
335 *(__m128i *)&dst[i] = _mm_or_si128(s, rgb1);
336 }
337
338 return shader->out0;
339 }
340
341
342 static void
init_shader(struct shader *shader, int x, int y, int width, int height)343 init_shader(struct shader *shader,
344 int x, int y, int width, int height)
345 {
346 shader->width = align(width, 4);
347 }
348
349
350 /* Linear shader which implements the BLIT_RGBA shader with the
351 * additional constraints imposed by lp_setup_is_blit().
352 */
353 static boolean
blit_rgba_blit(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)354 blit_rgba_blit(const struct lp_rast_state *state,
355 unsigned x, unsigned y,
356 unsigned width, unsigned height,
357 const float (*a0)[4],
358 const float (*dadx)[4],
359 const float (*dady)[4],
360 uint8_t *color,
361 unsigned stride)
362 {
363 const struct lp_jit_context *context = &state->jit_context;
364 const struct lp_jit_texture *texture = &context->textures[0];
365 const uint8_t *src;
366 unsigned src_stride;
367 int src_x, src_y;
368
369 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
370
371 /* Require w==1.0:
372 */
373 if (a0[0][3] != 1.0 ||
374 dadx[0][3] != 0.0 ||
375 dady[0][3] != 0.0)
376 return FALSE;
377
378 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
379 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
380
381 src = texture->base;
382 src_stride = texture->row_stride[0];
383
384 /* Fall back to blit_rgba() if clamping required:
385 */
386 if (src_x < 0 ||
387 src_y < 0 ||
388 src_x + width > texture->width ||
389 src_y + height > texture->height)
390 return FALSE;
391
392 util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
393 x, y,
394 width, height,
395 src, src_stride,
396 src_x, src_y);
397
398 return TRUE;
399 }
400
401
402 /* Linear shader which implements the BLIT_RGB1 shader, with the
403 * additional constraints imposed by lp_setup_is_blit().
404 */
405 static boolean
blit_rgb1_blit(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)406 blit_rgb1_blit(const struct lp_rast_state *state,
407 unsigned x, unsigned y,
408 unsigned width, unsigned height,
409 const float (*a0)[4],
410 const float (*dadx)[4],
411 const float (*dady)[4],
412 uint8_t *color,
413 unsigned stride)
414 {
415 const struct lp_jit_context *context = &state->jit_context;
416 const struct lp_jit_texture *texture = &context->textures[0];
417 const uint8_t *src;
418 unsigned src_stride;
419 int src_x, src_y;
420
421 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
422
423 /* Require w==1.0:
424 */
425 if (a0[0][3] != 1.0 ||
426 dadx[0][3] != 0.0 ||
427 dady[0][3] != 0.0)
428 return FALSE;
429
430 color += x * 4 + y * stride;
431
432 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
433 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
434
435 src = texture->base;
436 src_stride = texture->row_stride[0];
437 src += src_x * 4;
438 src += src_y * src_stride;
439
440 if (src_x < 0 ||
441 src_y < 0 ||
442 src_x + width > texture->width ||
443 src_y + height > texture->height)
444 return FALSE;
445
446 for (y = 0; y < height; y++) {
447 const uint32_t *src_row = (const uint32_t *)src;
448 uint32_t *dst_row = (uint32_t *)color;
449
450 for (x = 0; x < width; x++) {
451 *dst_row++ = *src_row++ | 0xff000000;
452 }
453
454 color += stride;
455 src += src_stride;
456 }
457
458 return TRUE;
459 }
460
461
462 /* Linear shader variant implementing the BLIT_RGBA shader without
463 * blending.
464 */
465 static boolean
blit_rgba(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)466 blit_rgba(const struct lp_rast_state *state,
467 unsigned x, unsigned y,
468 unsigned width, unsigned height,
469 const float (*a0)[4],
470 const float (*dadx)[4],
471 const float (*dady)[4],
472 uint8_t *color,
473 unsigned stride)
474 {
475 const struct lp_jit_context *context = &state->jit_context;
476 struct nearest_sampler samp;
477 struct color_blend blend;
478
479 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
480
481 if (!init_nearest_sampler(&samp,
482 &context->textures[0],
483 x, y, width, height,
484 a0[1][0], dadx[1][0], dady[1][0],
485 a0[1][1], dadx[1][1], dady[1][1],
486 a0[0][3], dadx[0][3], dady[0][3]))
487 return FALSE;
488
489 init_blend(&blend,
490 x, y, width, height,
491 color, stride);
492
493 /* Rasterize the rectangle and run the shader:
494 */
495 for (y = 0; y < height; y++) {
496 blend.src = samp.fetch(&samp);
497 blend_noop(&blend);
498 }
499
500 return TRUE;
501 }
502
503
504 static boolean
blit_rgb1(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)505 blit_rgb1(const struct lp_rast_state *state,
506 unsigned x, unsigned y,
507 unsigned width, unsigned height,
508 const float (*a0)[4],
509 const float (*dadx)[4],
510 const float (*dady)[4],
511 uint8_t *color,
512 unsigned stride)
513 {
514 const struct lp_jit_context *context = &state->jit_context;
515 struct nearest_sampler samp;
516 struct color_blend blend;
517 struct shader shader;
518
519 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
520
521 if (!init_nearest_sampler(&samp,
522 &context->textures[0],
523 x, y, width, height,
524 a0[1][0], dadx[1][0], dady[1][0],
525 a0[1][1], dadx[1][1], dady[1][1],
526 a0[0][3], dadx[0][3], dady[0][3]))
527 return FALSE;
528
529 init_blend(&blend, x, y, width, height, color, stride);
530
531 init_shader(&shader, x, y, width, height);
532
533 /* Rasterize the rectangle and run the shader:
534 */
535 for (y = 0; y < height; y++) {
536 shader.src0 = samp.fetch(&samp);
537 blend.src = shade_rgb1(&shader);
538 blend_noop(&blend);
539 }
540
541 return TRUE;
542 }
543
544
545 /* Linear shader variant implementing the BLIT_RGBA shader with
546 * one/inv_src_alpha blending.
547 */
548 static boolean
blit_rgba_blend_premul(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)549 blit_rgba_blend_premul(const struct lp_rast_state *state,
550 unsigned x, unsigned y,
551 unsigned width, unsigned height,
552 const float (*a0)[4],
553 const float (*dadx)[4],
554 const float (*dady)[4],
555 uint8_t *color,
556 unsigned stride)
557 {
558 const struct lp_jit_context *context = &state->jit_context;
559 struct nearest_sampler samp;
560 struct color_blend blend;
561
562 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
563
564 if (!init_nearest_sampler(&samp,
565 &context->textures[0],
566 x, y, width, height,
567 a0[1][0], dadx[1][0], dady[1][0],
568 a0[1][1], dadx[1][1], dady[1][1],
569 a0[0][3], dadx[0][3], dady[0][3]))
570 return FALSE;
571
572 init_blend(&blend, x, y, width, height, color, stride);
573
574 /* Rasterize the rectangle and run the shader:
575 */
576 for (y = 0; y < height; y++) {
577 blend.src = samp.fetch(&samp);
578 blend_premul(&blend);
579 }
580
581 return TRUE;
582 }
583
584
585 /* Linear shader which always emits red. Used for debugging.
586 */
587 static boolean
linear_red(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)588 linear_red(const struct lp_rast_state *state,
589 unsigned x, unsigned y,
590 unsigned width, unsigned height,
591 const float (*a0)[4],
592 const float (*dadx)[4],
593 const float (*dady)[4],
594 uint8_t *color,
595 unsigned stride)
596 {
597 union util_color uc;
598
599 util_pack_color_ub(0xff, 0, 0, 0xff,
600 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
601
602 util_fill_rect(color,
603 PIPE_FORMAT_B8G8R8A8_UNORM,
604 stride,
605 x,
606 y,
607 width,
608 height,
609 &uc);
610
611 return TRUE;
612 }
613
614
615 /* Noop linear shader variant, for debugging.
616 */
617 static boolean
linear_no_op(const struct lp_rast_state *state, unsigned x, unsigned y, unsigned width, unsigned height, const float (*a0)[4], const float (*dadx)[4], const float (*dady)[4], uint8_t *color, unsigned stride)618 linear_no_op(const struct lp_rast_state *state,
619 unsigned x, unsigned y,
620 unsigned width, unsigned height,
621 const float (*a0)[4],
622 const float (*dadx)[4],
623 const float (*dady)[4],
624 uint8_t *color,
625 unsigned stride)
626 {
627 return TRUE;
628 }
629
630
631 /* Check for ADD/ONE/INV_SRC_ALPHA, ie premultiplied-alpha blending.
632 */
633 static boolean
is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)634 is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)
635 {
636 return
637 !variant->key.blend.logicop_enable &&
638 variant->key.blend.rt[0].blend_enable &&
639 variant->key.blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
640 variant->key.blend.rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
641 variant->key.blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
642 variant->key.blend.rt[0].alpha_func == PIPE_BLEND_ADD &&
643 variant->key.blend.rt[0].alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
644 variant->key.blend.rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
645 variant->key.blend.rt[0].colormask == 0xf;
646 }
647
648
649 /* Examine the fragment shader variant and determine whether we can
650 * substitute a fastpath linear shader implementation.
651 */
652 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)653 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
654 {
655 if (LP_PERF & PERF_NO_SHADE) {
656 variant->jit_linear = linear_red;
657 return;
658 }
659
660 struct lp_sampler_static_state *samp0 =
661 lp_fs_variant_key_sampler_idx(&variant->key, 0);
662 if (!samp0)
663 return;
664
665 enum pipe_format tex_format = samp0->texture_state.format;
666 if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
667 tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
668 is_nearest_clamp_sampler(samp0)) {
669 if (variant->opaque) {
670 variant->jit_linear_blit = blit_rgba_blit;
671 variant->jit_linear = blit_rgba;
672 } else if (is_one_inv_src_alpha_blend(variant) &&
673 util_get_cpu_caps()->has_sse2) {
674 variant->jit_linear = blit_rgba_blend_premul;
675 }
676 return;
677 }
678
679 if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
680 variant->opaque &&
681 (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
682 tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
683 is_nearest_clamp_sampler(samp0)) {
684 variant->jit_linear_blit = blit_rgb1_blit;
685 variant->jit_linear = blit_rgb1;
686 return;
687 }
688
689 if (0) {
690 variant->jit_linear = linear_no_op;
691 return;
692 }
693 }
694 #else
695 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)696 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
697 {
698 /* don't bother if there is no SSE */
699 }
700 #endif
701
702