1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2012 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "pipe/p_state.h" 29bf215546Sopenharmony_ci#include "util/u_debug.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "gallivm/lp_bld_type.h" 32bf215546Sopenharmony_ci#include "gallivm/lp_bld_arit.h" 33bf215546Sopenharmony_ci#include "gallivm/lp_bld_const.h" 34bf215546Sopenharmony_ci#include "gallivm/lp_bld_logic.h" 35bf215546Sopenharmony_ci#include "gallivm/lp_bld_swizzle.h" 36bf215546Sopenharmony_ci#include "gallivm/lp_bld_flow.h" 37bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h" 38bf215546Sopenharmony_ci#include "gallivm/lp_bld_pack.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci#include "lp_bld_blend.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci/** 43bf215546Sopenharmony_ci * Is (a OP b) == (b OP a)? 44bf215546Sopenharmony_ci */ 45bf215546Sopenharmony_ciboolean 46bf215546Sopenharmony_cilp_build_blend_func_commutative(enum pipe_blend_func func) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci switch (func) { 49bf215546Sopenharmony_ci case PIPE_BLEND_ADD: 50bf215546Sopenharmony_ci case PIPE_BLEND_MIN: 51bf215546Sopenharmony_ci case PIPE_BLEND_MAX: 52bf215546Sopenharmony_ci return TRUE; 53bf215546Sopenharmony_ci case PIPE_BLEND_SUBTRACT: 54bf215546Sopenharmony_ci case PIPE_BLEND_REVERSE_SUBTRACT: 55bf215546Sopenharmony_ci return FALSE; 56bf215546Sopenharmony_ci default: 57bf215546Sopenharmony_ci assert(0); 58bf215546Sopenharmony_ci return TRUE; 59bf215546Sopenharmony_ci } 60bf215546Sopenharmony_ci} 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci/** 64bf215546Sopenharmony_ci * Whether the blending functions are the reverse of each other. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ciboolean 67bf215546Sopenharmony_cilp_build_blend_func_reverse(enum pipe_blend_func rgb_func, 68bf215546Sopenharmony_ci enum pipe_blend_func alpha_func) 69bf215546Sopenharmony_ci{ 70bf215546Sopenharmony_ci if (rgb_func == alpha_func) 71bf215546Sopenharmony_ci return FALSE; 72bf215546Sopenharmony_ci if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 73bf215546Sopenharmony_ci return TRUE; 74bf215546Sopenharmony_ci if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 75bf215546Sopenharmony_ci return TRUE; 76bf215546Sopenharmony_ci return FALSE; 77bf215546Sopenharmony_ci} 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci/** 81bf215546Sopenharmony_ci * Whether the blending factors are complementary of each other. 82bf215546Sopenharmony_ci */ 83bf215546Sopenharmony_cistatic inline boolean 84bf215546Sopenharmony_cilp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); 87bf215546Sopenharmony_ci STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == 88bf215546Sopenharmony_ci PIPE_BLENDFACTOR_INV_CONST_COLOR); 89bf215546Sopenharmony_ci return dst_factor == (src_factor ^ 0x10); 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci/** 94bf215546Sopenharmony_ci * Whether this is a inverse blend factor 95bf215546Sopenharmony_ci */ 96bf215546Sopenharmony_cistatic inline boolean 97bf215546Sopenharmony_ciis_inverse_factor(unsigned factor) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); 100bf215546Sopenharmony_ci return factor > 0x11; 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci/** 105bf215546Sopenharmony_ci * Calculates the (expanded to wider type) multiplication 106bf215546Sopenharmony_ci * of 2 normalized numbers. 107bf215546Sopenharmony_ci */ 108bf215546Sopenharmony_cistatic void 109bf215546Sopenharmony_cilp_build_mul_norm_expand(struct lp_build_context *bld, 110bf215546Sopenharmony_ci LLVMValueRef a, LLVMValueRef b, 111bf215546Sopenharmony_ci LLVMValueRef *resl, LLVMValueRef *resh, 112bf215546Sopenharmony_ci boolean signedness_differs) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci const struct lp_type type = bld->type; 115bf215546Sopenharmony_ci struct lp_type wide_type = lp_wider_type(type); 116bf215546Sopenharmony_ci struct lp_type wide_type2 = wide_type; 117bf215546Sopenharmony_ci struct lp_type type2 = type; 118bf215546Sopenharmony_ci LLVMValueRef al, ah, bl, bh; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 121bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 122bf215546Sopenharmony_ci assert(!type.floating && !type.fixed && type.norm); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci if (a == bld->zero || b == bld->zero) { 125bf215546Sopenharmony_ci LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); 126bf215546Sopenharmony_ci *resl = zero; 127bf215546Sopenharmony_ci *resh = zero; 128bf215546Sopenharmony_ci return; 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci if (signedness_differs) { 132bf215546Sopenharmony_ci type2.sign = !type.sign; 133bf215546Sopenharmony_ci wide_type2.sign = !wide_type2.sign; 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); 137bf215546Sopenharmony_ci lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); 140bf215546Sopenharmony_ci *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); 141bf215546Sopenharmony_ci} 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci/** 145bf215546Sopenharmony_ci * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ciLLVMValueRef 148bf215546Sopenharmony_cilp_build_blend_func(struct lp_build_context *bld, 149bf215546Sopenharmony_ci enum pipe_blend_func func, 150bf215546Sopenharmony_ci LLVMValueRef term1, 151bf215546Sopenharmony_ci LLVMValueRef term2) 152bf215546Sopenharmony_ci{ 153bf215546Sopenharmony_ci switch (func) { 154bf215546Sopenharmony_ci case PIPE_BLEND_ADD: 155bf215546Sopenharmony_ci return lp_build_add(bld, term1, term2); 156bf215546Sopenharmony_ci case PIPE_BLEND_SUBTRACT: 157bf215546Sopenharmony_ci return lp_build_sub(bld, term1, term2); 158bf215546Sopenharmony_ci case PIPE_BLEND_REVERSE_SUBTRACT: 159bf215546Sopenharmony_ci return lp_build_sub(bld, term2, term1); 160bf215546Sopenharmony_ci case PIPE_BLEND_MIN: 161bf215546Sopenharmony_ci return lp_build_min(bld, term1, term2); 162bf215546Sopenharmony_ci case PIPE_BLEND_MAX: 163bf215546Sopenharmony_ci return lp_build_max(bld, term1, term2); 164bf215546Sopenharmony_ci default: 165bf215546Sopenharmony_ci assert(0); 166bf215546Sopenharmony_ci return bld->zero; 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci/** 172bf215546Sopenharmony_ci * Performs optimizations and blending independent of SoA/AoS 173bf215546Sopenharmony_ci * 174bf215546Sopenharmony_ci * @param func the blend function 175bf215546Sopenharmony_ci * @param factor_src PIPE_BLENDFACTOR_xxx 176bf215546Sopenharmony_ci * @param factor_dst PIPE_BLENDFACTOR_xxx 177bf215546Sopenharmony_ci * @param src source rgba 178bf215546Sopenharmony_ci * @param dst dest rgba 179bf215546Sopenharmony_ci * @param src_factor src factor computed value 180bf215546Sopenharmony_ci * @param dst_factor dst factor computed value 181bf215546Sopenharmony_ci * @param not_alpha_dependent same factors accross all channels of src/dst 182bf215546Sopenharmony_ci * 183bf215546Sopenharmony_ci * not_alpha_dependent should be: 184bf215546Sopenharmony_ci * SoA: always true as it is only one channel at a time 185bf215546Sopenharmony_ci * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor 186bf215546Sopenharmony_ci * 187bf215546Sopenharmony_ci * Note that pretty much every possible optimisation can only be done on non-unorm targets 188bf215546Sopenharmony_ci * due to unorm values not going above 1.0 meaning factorisation can change results. 189bf215546Sopenharmony_ci * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. 190bf215546Sopenharmony_ci */ 191bf215546Sopenharmony_ciLLVMValueRef 192bf215546Sopenharmony_cilp_build_blend(struct lp_build_context *bld, 193bf215546Sopenharmony_ci enum pipe_blend_func func, 194bf215546Sopenharmony_ci enum pipe_blendfactor factor_src, 195bf215546Sopenharmony_ci enum pipe_blendfactor factor_dst, 196bf215546Sopenharmony_ci LLVMValueRef src, 197bf215546Sopenharmony_ci LLVMValueRef dst, 198bf215546Sopenharmony_ci LLVMValueRef src_factor, 199bf215546Sopenharmony_ci LLVMValueRef dst_factor, 200bf215546Sopenharmony_ci boolean not_alpha_dependent, 201bf215546Sopenharmony_ci boolean optimise_only) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci LLVMValueRef result, src_term, dst_term; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci /* If we are not alpha dependent we can mess with the src/dst factors */ 206bf215546Sopenharmony_ci if (not_alpha_dependent) { 207bf215546Sopenharmony_ci if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { 208bf215546Sopenharmony_ci if (func == PIPE_BLEND_ADD) { 209bf215546Sopenharmony_ci if (factor_src < factor_dst) { 210bf215546Sopenharmony_ci return lp_build_lerp(bld, src_factor, dst, src, 0); 211bf215546Sopenharmony_ci } else { 212bf215546Sopenharmony_ci return lp_build_lerp(bld, dst_factor, src, dst, 0); 213bf215546Sopenharmony_ci } 214bf215546Sopenharmony_ci } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { 215bf215546Sopenharmony_ci result = lp_build_add(bld, src, dst); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci if (factor_src < factor_dst) { 218bf215546Sopenharmony_ci result = lp_build_mul(bld, result, src_factor); 219bf215546Sopenharmony_ci return lp_build_sub(bld, result, dst); 220bf215546Sopenharmony_ci } else { 221bf215546Sopenharmony_ci result = lp_build_mul(bld, result, dst_factor); 222bf215546Sopenharmony_ci return lp_build_sub(bld, src, result); 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { 225bf215546Sopenharmony_ci result = lp_build_add(bld, src, dst); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci if (factor_src < factor_dst) { 228bf215546Sopenharmony_ci result = lp_build_mul(bld, result, src_factor); 229bf215546Sopenharmony_ci return lp_build_sub(bld, dst, result); 230bf215546Sopenharmony_ci } else { 231bf215546Sopenharmony_ci result = lp_build_mul(bld, result, dst_factor); 232bf215546Sopenharmony_ci return lp_build_sub(bld, result, src); 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci if (bld->type.floating && factor_src == factor_dst) { 238bf215546Sopenharmony_ci if (func == PIPE_BLEND_ADD || 239bf215546Sopenharmony_ci func == PIPE_BLEND_SUBTRACT || 240bf215546Sopenharmony_ci func == PIPE_BLEND_REVERSE_SUBTRACT) { 241bf215546Sopenharmony_ci LLVMValueRef result; 242bf215546Sopenharmony_ci result = lp_build_blend_func(bld, func, src, dst); 243bf215546Sopenharmony_ci return lp_build_mul(bld, result, src_factor); 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci } 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (optimise_only) 249bf215546Sopenharmony_ci return NULL; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if ((bld->type.norm && bld->type.sign) && 252bf215546Sopenharmony_ci (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { 253bf215546Sopenharmony_ci /* 254bf215546Sopenharmony_ci * With snorm blending, the inverse blend factors range from [0,2] 255bf215546Sopenharmony_ci * instead of [-1,1], so the ordinary signed normalized arithmetic 256bf215546Sopenharmony_ci * doesn't quite work. Unpack must be unsigned, and the add/sub 257bf215546Sopenharmony_ci * must be done with wider type. 258bf215546Sopenharmony_ci * (Note that it's not quite obvious what the blend equation wrt to 259bf215546Sopenharmony_ci * clamping should actually be based on GL spec in this case, but 260bf215546Sopenharmony_ci * really the incoming src values are clamped to [-1,1] (the dst is 261bf215546Sopenharmony_ci * always clamped already), and then NO further clamping occurs until 262bf215546Sopenharmony_ci * the end.) 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_ci struct lp_build_context bldw; 265bf215546Sopenharmony_ci struct lp_type wide_type = lp_wider_type(bld->type); 266bf215546Sopenharmony_ci LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; 267bf215546Sopenharmony_ci LLVMValueRef resl, resh; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* 270bf215546Sopenharmony_ci * We don't need saturate math for the sub/add, since we have 271bf215546Sopenharmony_ci * x+1 bit numbers in x*2 wide type (result is x+2 bits). 272bf215546Sopenharmony_ci * (Doesn't really matter on x86 sse2 though as we use saturated 273bf215546Sopenharmony_ci * intrinsics.) 274bf215546Sopenharmony_ci */ 275bf215546Sopenharmony_ci wide_type.norm = 0; 276bf215546Sopenharmony_ci lp_build_context_init(&bldw, bld->gallivm, wide_type); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci /* 279bf215546Sopenharmony_ci * XXX This is a bit hackish. Note that -128 really should 280bf215546Sopenharmony_ci * be -1.0, the same as -127. However, we did not actually clamp 281bf215546Sopenharmony_ci * things anywhere (relying on pack intrinsics instead) therefore 282bf215546Sopenharmony_ci * we will get -128, and the inverted factor then 255. But the mul 283bf215546Sopenharmony_ci * can overflow in this case (rather the rounding fixups for the mul, 284bf215546Sopenharmony_ci * -128*255 will be positive). 285bf215546Sopenharmony_ci * So we clamp the src and dst up here but only when necessary (we 286bf215546Sopenharmony_ci * should do this before calculating blend factors but it's enough 287bf215546Sopenharmony_ci * for avoiding overflow). 288bf215546Sopenharmony_ci */ 289bf215546Sopenharmony_ci if (is_inverse_factor(factor_src)) { 290bf215546Sopenharmony_ci src = lp_build_max(bld, src, 291bf215546Sopenharmony_ci lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci if (is_inverse_factor(factor_dst)) { 294bf215546Sopenharmony_ci dst = lp_build_max(bld, dst, 295bf215546Sopenharmony_ci lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, 299bf215546Sopenharmony_ci is_inverse_factor(factor_src) ? TRUE : FALSE); 300bf215546Sopenharmony_ci lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, 301bf215546Sopenharmony_ci is_inverse_factor(factor_dst) ? TRUE : FALSE); 302bf215546Sopenharmony_ci resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); 303bf215546Sopenharmony_ci resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci /* 306bf215546Sopenharmony_ci * XXX pack2_native is not ok because the values have to be in dst 307bf215546Sopenharmony_ci * range. We need native pack though for the correct order on avx2. 308bf215546Sopenharmony_ci * Will break on everything not implementing clamping pack intrinsics 309bf215546Sopenharmony_ci * (i.e. everything but sse2 and altivec). 310bf215546Sopenharmony_ci */ 311bf215546Sopenharmony_ci return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); 312bf215546Sopenharmony_ci } else { 313bf215546Sopenharmony_ci src_term = lp_build_mul(bld, src, src_factor); 314bf215546Sopenharmony_ci dst_term = lp_build_mul(bld, dst, dst_factor); 315bf215546Sopenharmony_ci return lp_build_blend_func(bld, func, src_term, dst_term); 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci} 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_civoid 320bf215546Sopenharmony_cilp_build_alpha_to_coverage(struct gallivm_state *gallivm, 321bf215546Sopenharmony_ci struct lp_type type, 322bf215546Sopenharmony_ci struct lp_build_mask_context *mask, 323bf215546Sopenharmony_ci LLVMValueRef alpha, 324bf215546Sopenharmony_ci boolean do_branch) 325bf215546Sopenharmony_ci{ 326bf215546Sopenharmony_ci struct lp_build_context bld; 327bf215546Sopenharmony_ci LLVMValueRef test; 328bf215546Sopenharmony_ci LLVMValueRef alpha_ref_value; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci lp_build_context_init(&bld, gallivm, type); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci lp_build_name(test, "alpha_to_coverage"); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci lp_build_mask_update(mask, test); 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci if (do_branch) 341bf215546Sopenharmony_ci lp_build_mask_check(mask); 342bf215546Sopenharmony_ci} 343