1/************************************************************************** 2 * 3 * Copyright 2012 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "pipe/p_state.h" 29#include "util/u_debug.h" 30 31#include "gallivm/lp_bld_type.h" 32#include "gallivm/lp_bld_arit.h" 33#include "gallivm/lp_bld_const.h" 34#include "gallivm/lp_bld_logic.h" 35#include "gallivm/lp_bld_swizzle.h" 36#include "gallivm/lp_bld_flow.h" 37#include "gallivm/lp_bld_debug.h" 38#include "gallivm/lp_bld_pack.h" 39 40#include "lp_bld_blend.h" 41 42/** 43 * Is (a OP b) == (b OP a)? 44 */ 45boolean 46lp_build_blend_func_commutative(enum pipe_blend_func func) 47{ 48 switch (func) { 49 case PIPE_BLEND_ADD: 50 case PIPE_BLEND_MIN: 51 case PIPE_BLEND_MAX: 52 return TRUE; 53 case PIPE_BLEND_SUBTRACT: 54 case PIPE_BLEND_REVERSE_SUBTRACT: 55 return FALSE; 56 default: 57 assert(0); 58 return TRUE; 59 } 60} 61 62 63/** 64 * Whether the blending functions are the reverse of each other. 65 */ 66boolean 67lp_build_blend_func_reverse(enum pipe_blend_func rgb_func, 68 enum pipe_blend_func alpha_func) 69{ 70 if (rgb_func == alpha_func) 71 return FALSE; 72 if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 73 return TRUE; 74 if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 75 return TRUE; 76 return FALSE; 77} 78 79 80/** 81 * Whether the blending factors are complementary of each other. 82 */ 83static inline boolean 84lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) 85{ 86 STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); 87 STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == 88 PIPE_BLENDFACTOR_INV_CONST_COLOR); 89 return dst_factor == (src_factor ^ 0x10); 90} 91 92 93/** 94 * Whether this is a inverse blend factor 95 */ 96static inline boolean 97is_inverse_factor(unsigned factor) 98{ 99 STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); 100 return factor > 0x11; 101} 102 103 104/** 105 * Calculates the (expanded to wider type) multiplication 106 * of 2 normalized numbers. 107 */ 108static void 109lp_build_mul_norm_expand(struct lp_build_context *bld, 110 LLVMValueRef a, LLVMValueRef b, 111 LLVMValueRef *resl, LLVMValueRef *resh, 112 boolean signedness_differs) 113{ 114 const struct lp_type type = bld->type; 115 struct lp_type wide_type = lp_wider_type(type); 116 struct lp_type wide_type2 = wide_type; 117 struct lp_type type2 = type; 118 LLVMValueRef al, ah, bl, bh; 119 120 assert(lp_check_value(type, a)); 121 assert(lp_check_value(type, b)); 122 assert(!type.floating && !type.fixed && type.norm); 123 124 if (a == bld->zero || b == bld->zero) { 125 LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); 126 *resl = zero; 127 *resh = zero; 128 return; 129 } 130 131 if (signedness_differs) { 132 type2.sign = !type.sign; 133 wide_type2.sign = !wide_type2.sign; 134 } 135 136 lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); 137 lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); 138 139 *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); 140 *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); 141} 142 143 144/** 145 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 146 */ 147LLVMValueRef 148lp_build_blend_func(struct lp_build_context *bld, 149 enum pipe_blend_func func, 150 LLVMValueRef term1, 151 LLVMValueRef term2) 152{ 153 switch (func) { 154 case PIPE_BLEND_ADD: 155 return lp_build_add(bld, term1, term2); 156 case PIPE_BLEND_SUBTRACT: 157 return lp_build_sub(bld, term1, term2); 158 case PIPE_BLEND_REVERSE_SUBTRACT: 159 return lp_build_sub(bld, term2, term1); 160 case PIPE_BLEND_MIN: 161 return lp_build_min(bld, term1, term2); 162 case PIPE_BLEND_MAX: 163 return lp_build_max(bld, term1, term2); 164 default: 165 assert(0); 166 return bld->zero; 167 } 168} 169 170 171/** 172 * Performs optimizations and blending independent of SoA/AoS 173 * 174 * @param func the blend function 175 * @param factor_src PIPE_BLENDFACTOR_xxx 176 * @param factor_dst PIPE_BLENDFACTOR_xxx 177 * @param src source rgba 178 * @param dst dest rgba 179 * @param src_factor src factor computed value 180 * @param dst_factor dst factor computed value 181 * @param not_alpha_dependent same factors accross all channels of src/dst 182 * 183 * not_alpha_dependent should be: 184 * SoA: always true as it is only one channel at a time 185 * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor 186 * 187 * Note that pretty much every possible optimisation can only be done on non-unorm targets 188 * due to unorm values not going above 1.0 meaning factorisation can change results. 189 * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. 190 */ 191LLVMValueRef 192lp_build_blend(struct lp_build_context *bld, 193 enum pipe_blend_func func, 194 enum pipe_blendfactor factor_src, 195 enum pipe_blendfactor factor_dst, 196 LLVMValueRef src, 197 LLVMValueRef dst, 198 LLVMValueRef src_factor, 199 LLVMValueRef dst_factor, 200 boolean not_alpha_dependent, 201 boolean optimise_only) 202{ 203 LLVMValueRef result, src_term, dst_term; 204 205 /* If we are not alpha dependent we can mess with the src/dst factors */ 206 if (not_alpha_dependent) { 207 if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { 208 if (func == PIPE_BLEND_ADD) { 209 if (factor_src < factor_dst) { 210 return lp_build_lerp(bld, src_factor, dst, src, 0); 211 } else { 212 return lp_build_lerp(bld, dst_factor, src, dst, 0); 213 } 214 } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { 215 result = lp_build_add(bld, src, dst); 216 217 if (factor_src < factor_dst) { 218 result = lp_build_mul(bld, result, src_factor); 219 return lp_build_sub(bld, result, dst); 220 } else { 221 result = lp_build_mul(bld, result, dst_factor); 222 return lp_build_sub(bld, src, result); 223 } 224 } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { 225 result = lp_build_add(bld, src, dst); 226 227 if (factor_src < factor_dst) { 228 result = lp_build_mul(bld, result, src_factor); 229 return lp_build_sub(bld, dst, result); 230 } else { 231 result = lp_build_mul(bld, result, dst_factor); 232 return lp_build_sub(bld, result, src); 233 } 234 } 235 } 236 237 if (bld->type.floating && factor_src == factor_dst) { 238 if (func == PIPE_BLEND_ADD || 239 func == PIPE_BLEND_SUBTRACT || 240 func == PIPE_BLEND_REVERSE_SUBTRACT) { 241 LLVMValueRef result; 242 result = lp_build_blend_func(bld, func, src, dst); 243 return lp_build_mul(bld, result, src_factor); 244 } 245 } 246 } 247 248 if (optimise_only) 249 return NULL; 250 251 if ((bld->type.norm && bld->type.sign) && 252 (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { 253 /* 254 * With snorm blending, the inverse blend factors range from [0,2] 255 * instead of [-1,1], so the ordinary signed normalized arithmetic 256 * doesn't quite work. Unpack must be unsigned, and the add/sub 257 * must be done with wider type. 258 * (Note that it's not quite obvious what the blend equation wrt to 259 * clamping should actually be based on GL spec in this case, but 260 * really the incoming src values are clamped to [-1,1] (the dst is 261 * always clamped already), and then NO further clamping occurs until 262 * the end.) 263 */ 264 struct lp_build_context bldw; 265 struct lp_type wide_type = lp_wider_type(bld->type); 266 LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; 267 LLVMValueRef resl, resh; 268 269 /* 270 * We don't need saturate math for the sub/add, since we have 271 * x+1 bit numbers in x*2 wide type (result is x+2 bits). 272 * (Doesn't really matter on x86 sse2 though as we use saturated 273 * intrinsics.) 274 */ 275 wide_type.norm = 0; 276 lp_build_context_init(&bldw, bld->gallivm, wide_type); 277 278 /* 279 * XXX This is a bit hackish. Note that -128 really should 280 * be -1.0, the same as -127. However, we did not actually clamp 281 * things anywhere (relying on pack intrinsics instead) therefore 282 * we will get -128, and the inverted factor then 255. But the mul 283 * can overflow in this case (rather the rounding fixups for the mul, 284 * -128*255 will be positive). 285 * So we clamp the src and dst up here but only when necessary (we 286 * should do this before calculating blend factors but it's enough 287 * for avoiding overflow). 288 */ 289 if (is_inverse_factor(factor_src)) { 290 src = lp_build_max(bld, src, 291 lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 292 } 293 if (is_inverse_factor(factor_dst)) { 294 dst = lp_build_max(bld, dst, 295 lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 296 } 297 298 lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, 299 is_inverse_factor(factor_src) ? TRUE : FALSE); 300 lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, 301 is_inverse_factor(factor_dst) ? TRUE : FALSE); 302 resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); 303 resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); 304 305 /* 306 * XXX pack2_native is not ok because the values have to be in dst 307 * range. We need native pack though for the correct order on avx2. 308 * Will break on everything not implementing clamping pack intrinsics 309 * (i.e. everything but sse2 and altivec). 310 */ 311 return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); 312 } else { 313 src_term = lp_build_mul(bld, src, src_factor); 314 dst_term = lp_build_mul(bld, dst, dst_factor); 315 return lp_build_blend_func(bld, func, src_term, dst_term); 316 } 317} 318 319void 320lp_build_alpha_to_coverage(struct gallivm_state *gallivm, 321 struct lp_type type, 322 struct lp_build_mask_context *mask, 323 LLVMValueRef alpha, 324 boolean do_branch) 325{ 326 struct lp_build_context bld; 327 LLVMValueRef test; 328 LLVMValueRef alpha_ref_value; 329 330 lp_build_context_init(&bld, gallivm, type); 331 332 alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); 333 334 test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); 335 336 lp_build_name(test, "alpha_to_coverage"); 337 338 lp_build_mask_update(mask, test); 339 340 if (do_branch) 341 lp_build_mask_check(mask); 342} 343