1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference implementation for per-fragment operations.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include <limits>
28 
29 using tcu::IVec2;
30 using tcu::Vec3;
31 using tcu::Vec4;
32 using tcu::IVec4;
33 using tcu::UVec4;
34 using tcu::min;
35 using tcu::max;
36 using tcu::clamp;
37 using de::min;
38 using de::max;
39 using de::clamp;
40 
41 namespace rr
42 {
43 
44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue, int newValue, deUint32 mask)45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
46 {
47 	return (oldValue & ~mask) | (newValue & mask);
48 }
49 
isInsideRect(const IVec2& point, const WindowRectangle& rect)50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
51 {
52 	return de::inBounds(point.x(), rect.left,		rect.left + rect.width) &&
53 		   de::inBounds(point.y(), rect.bottom,		rect.bottom + rect.height);
54 }
55 
unpremultiply(const Vec4& v)56 static inline Vec4 unpremultiply (const Vec4& v)
57 {
58 	if (v.w() > 0.0f)
59 		return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
60 	else
61 	{
62 		DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
63 		return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
64 	}
65 }
66 
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r)67 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const Vec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r)68 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const IVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r)69 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const UVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());	}
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r)70 void clearMultisampleDepthBuffer	(const tcu::PixelBufferAccess& dst, float v,		const WindowRectangle& r)	{ tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r)71 void clearMultisampleStencilBuffer	(const tcu::PixelBufferAccess& dst, int v,			const WindowRectangle& r)	{ tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
72 
FragmentProcessor(void)73 FragmentProcessor::FragmentProcessor (void)
74 	: m_sampleRegister()
75 {
76 }
77 
executeScissorTest(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
79 {
80 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
81 	{
82 		if (m_sampleRegister[regSampleNdx].isAlive)
83 		{
84 			int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
85 
86 			if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
87 				m_sampleRegister[regSampleNdx].isAlive = false;
88 		}
89 	}
90 }
91 
executeStencilCompare(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
93 {
94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)																					\
95 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)															\
96 	{																																		\
97 		if (m_sampleRegister[regSampleNdx].isAlive)																							\
98 		{																																	\
99 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;													\
100 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];					\
101 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
102 			int					maskedRef			= stencilState.compMask & clampedStencilRef;											\
103 			int					maskedBuf			= stencilState.compMask & stencilBufferValue;											\
104 			DE_UNREF(maskedRef);																											\
105 			DE_UNREF(maskedBuf);																											\
106 																																			\
107 			m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);															\
108 		}																																	\
109 	}
110 
111 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
112 
113 	switch (stencilState.func)
114 	{
115 		case TESTFUNC_NEVER:	SAMPLE_REGISTER_STENCIL_COMPARE(false)						break;
116 		case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_STENCIL_COMPARE(true)						break;
117 		case TESTFUNC_LESS:		SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)		break;
118 		case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)		break;
119 		case TESTFUNC_GREATER:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)		break;
120 		case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)		break;
121 		case TESTFUNC_EQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)		break;
122 		case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)		break;
123 		default:
124 			DE_ASSERT(false);
125 	}
126 
127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
128 }
129 
executeStencilSFail(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
131 {
132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)																																		\
133 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																									\
134 	{																																												\
135 		if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)																				\
136 		{																																											\
137 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																							\
138 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];															\
139 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());											\
140 																																													\
141 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
142 			m_sampleRegister[regSampleNdx].isAlive = false;																															\
143 		}																																											\
144 	}
145 
146 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
147 
148 	switch (stencilState.sFail)
149 	{
150 		case STENCILOP_KEEP:		SAMPLE_REGISTER_SFAIL(stencilBufferValue)												break;
151 		case STENCILOP_ZERO:		SAMPLE_REGISTER_SFAIL(0)																break;
152 		case STENCILOP_REPLACE:		SAMPLE_REGISTER_SFAIL(clampedStencilRef)												break;
153 		case STENCILOP_INCR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))		break;
154 		case STENCILOP_DECR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))		break;
155 		case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))				break;
156 		case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))				break;
157 		case STENCILOP_INVERT:		SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;
158 		default:
159 			DE_ASSERT(false);
160 	}
161 
162 #undef SAMPLE_REGISTER_SFAIL
163 }
164 
165 
executeDepthBoundsTest(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)166 void FragmentProcessor::executeDepthBoundsTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)
167 {
168 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
169 	{
170 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
171 		{
172 			if (m_sampleRegister[regSampleNdx].isAlive)
173 			{
174 				const int			fragSampleNdx		= regSampleNdx % numSamplesPerFragment;
175 				const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
176 				const float			depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
177 
178 				if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound))
179 					m_sampleRegister[regSampleNdx].isAlive = false;
180 			}
181 		}
182 	}
183 	else
184 	{
185 		/* Convert float bounds to target buffer format for comparison */
186 
187 		deUint32 minDepthBoundUint, maxDepthBoundUint;
188 		{
189 			deUint32 buffer[2];
190 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());
191 
192 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
193 			access.setPixDepth(minDepthBound, 0, 0, 0);
194 			minDepthBoundUint = access.getPixelUint(0, 0, 0).x();
195 		}
196 		{
197 			deUint32 buffer[2];
198 
199 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
200 			access.setPixDepth(maxDepthBound, 0, 0, 0);
201 			maxDepthBoundUint = access.getPixelUint(0, 0, 0).x();
202 		}
203 
204 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
205 		{
206 			if (m_sampleRegister[regSampleNdx].isAlive)
207 			{
208 				const int			fragSampleNdx		= regSampleNdx % numSamplesPerFragment;
209 				const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
210 				const deUint32		depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();
211 
212 				if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint))
213 					m_sampleRegister[regSampleNdx].isAlive = false;
214 			}
215 		}
216 	}
217 }
218 
executeDepthCompare(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)219 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
220 {
221 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)																						\
222 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
223 	{																																			\
224 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
225 		{																																		\
226 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
227 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
228 			float				depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());			\
229 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
230 			float				sampleDepth			= de::clamp(sampleDepthFloat, 0.0f, 1.0f);													\
231 																																				\
232 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
233 																																				\
234 			DE_UNREF(depthBufferValue);																											\
235 			DE_UNREF(sampleDepth);																												\
236 		}																																		\
237 	}
238 
239 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)																					\
240 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
241 	{																																			\
242 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
243 		{																																		\
244 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
245 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
246 			deUint32			depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();	\
247 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
248 																																				\
249 			/* Convert input float to target buffer format for comparison */																	\
250 																																				\
251 			deUint32 buffer[2];																													\
252 																																				\
253 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());														\
254 																																				\
255 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);															\
256 			access.setPixDepth(sampleDepthFloat, 0, 0, 0);																						\
257 			deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();																			\
258 																																				\
259 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
260 																																				\
261 			DE_UNREF(depthBufferValue);																											\
262 			DE_UNREF(sampleDepth);																												\
263 		}																																		\
264 	}
265 
266 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
267 	{
268 
269 		switch (depthFunc)
270 		{
271 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(false)							break;
272 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_F(true)								break;
273 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)	break;
274 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)	break;
275 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)	break;
276 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)	break;
277 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)	break;
278 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)	break;
279 			default:
280 				DE_ASSERT(false);
281 		}
282 
283 	}
284 	else
285 	{
286 		switch (depthFunc)
287 		{
288 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)							break;
289 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)								break;
290 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)	break;
291 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)	break;
292 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)	break;
293 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)	break;
294 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)	break;
295 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)	break;
296 			default:
297 				DE_ASSERT(false);
298 		}
299 	}
300 
301 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
302 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
303 }
304 
executeDepthWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)305 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
306 {
307 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
308 	{
309 		if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
310 		{
311 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
312 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
313 			const float			clampedDepth	= de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
314 
315 			depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
316 		}
317 	}
318 }
319 
executeStencilDpFailAndPass(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)320 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
321 {
322 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)																													\
323 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																								\
324 	{																																											\
325 		if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))																												\
326 		{																																										\
327 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																						\
328 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];														\
329 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());										\
330 																																												\
331 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
332 		}																																										\
333 	}
334 
335 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)																											\
336 		switch (stencilState.OP_NAME)																														\
337 		{																																					\
338 			case STENCILOP_KEEP:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)												break;	\
339 			case STENCILOP_ZERO:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)																break;	\
340 			case STENCILOP_REPLACE:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)												break;	\
341 			case STENCILOP_INCR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))	break;	\
342 			case STENCILOP_DECR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))	break;	\
343 			case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))			break;	\
344 			case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))			break;	\
345 			case STENCILOP_INVERT:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;	\
346 			default:																																		\
347 				DE_ASSERT(false);																															\
348 		}
349 
350 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
351 
352 	SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
353 	SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
354 
355 #undef SWITCH_DPFAIL_OR_DPPASS
356 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
357 }
358 
executeBlendFactorComputeRGB(const Vec4& blendColor, const BlendState& blendRGBState)359 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
360 {
361 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)																				\
362 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																	\
363 	{																																				\
364 		if (m_sampleRegister[regSampleNdx].isAlive)																									\
365 		{																																			\
366 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;																\
367 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;																\
368 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;																\
369 			DE_UNREF(src);																															\
370 			DE_UNREF(src1);																															\
371 			DE_UNREF(dst);																															\
372 																																					\
373 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);																		\
374 		}																																			\
375 	}
376 
377 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)																					\
378 	switch (blendRGBState.FUNC_NAME)																											\
379 	{																																			\
380 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))								break;	\
381 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))								break;	\
382 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))						break;	\
383 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))			break;	\
384 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))						break;	\
385 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))			break;	\
386 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))							break;	\
387 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))						break;	\
388 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))							break;	\
389 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))						break;	\
390 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))				break;	\
391 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))	break;	\
392 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))						break;	\
393 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))				break;	\
394 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))	break;	\
395 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))						break;	\
396 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))			break;	\
397 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))							break;	\
398 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))					break;	\
399 		default:																																\
400 			DE_ASSERT(false);																													\
401 	}
402 
403 	SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
404 	SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
405 
406 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
407 #undef SAMPLE_REGISTER_BLEND_FACTOR
408 }
409 
executeBlendFactorComputeA(const Vec4& blendColor, const BlendState& blendAState)410 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
411 {
412 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)														\
413 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)											\
414 	{																														\
415 		if (m_sampleRegister[regSampleNdx].isAlive)																			\
416 		{																													\
417 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;										\
418 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;										\
419 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;										\
420 			DE_UNREF(src);																									\
421 			DE_UNREF(src1);																									\
422 			DE_UNREF(dst);																									\
423 																															\
424 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);												\
425 		}																													\
426 	}
427 
428 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)																		\
429 	switch (blendAState.FUNC_NAME)																								\
430 	{																															\
431 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)						break;	\
432 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
433 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
434 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
435 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
436 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
437 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
438 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
439 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
440 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
441 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
442 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
443 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
444 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
445 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
446 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
447 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
448 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
449 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
450 		default:																												\
451 			DE_ASSERT(false);																									\
452 	}
453 
454 	SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
455 	SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
456 
457 #undef SWITCH_SRC_OR_DST_FACTOR_A
458 #undef SAMPLE_REGISTER_BLEND_FACTOR
459 }
460 
executeBlend(const BlendState& blendRGBState, const BlendState& blendAState)461 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
462 {
463 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)						\
464 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)		\
465 	{																					\
466 		if (m_sampleRegister[regSampleNdx].isAlive)										\
467 		{																				\
468 			SampleData& sample		= m_sampleRegister[regSampleNdx];					\
469 			const Vec4& srcColor	= sample.clampedBlendSrcColor;						\
470 			const Vec4& dstColor	= sample.clampedBlendDstColor;						\
471 																						\
472 			sample.COLOR_NAME = (COLOR_EXPRESSION);										\
473 		}																				\
474 	}
475 
476 	switch (blendRGBState.equation)
477 	{
478 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
479 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
480 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)	break;
481 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
482 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
483 		default:
484 			DE_ASSERT(false);
485 	}
486 
487 	switch (blendAState.equation)
488 	{
489 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)	break;
490 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)	break;
491 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)	break;
492 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))											break;
493 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))											break;
494 		default:
495 			DE_ASSERT(false);
496 	}
497 #undef SAMPLE_REGISTER_BLENDED_COLOR
498 }
499 
500 namespace advblend
501 {
502 
multiply(float src, float dst)503 inline float	multiply	(float src, float dst) { return src*dst;					}
screen(float src, float dst)504 inline float	screen		(float src, float dst) { return src + dst - src*dst;		}
darken(float src, float dst)505 inline float	darken		(float src, float dst) { return de::min(src, dst);			}
lighten(float src, float dst)506 inline float	lighten		(float src, float dst) { return de::max(src, dst);			}
difference(float src, float dst)507 inline float	difference	(float src, float dst) { return de::abs(dst-src);			}
exclusion(float src, float dst)508 inline float	exclusion	(float src, float dst) { return src + dst - 2.0f*src*dst;	}
509 
overlay(float src, float dst)510 inline float overlay (float src, float dst)
511 {
512 	if (dst <= 0.5f)
513 		return 2.0f*src*dst;
514 	else
515 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
516 }
517 
colordodge(float src, float dst)518 inline float colordodge (float src, float dst)
519 {
520 	if (dst <= 0.0f)
521 		return 0.0f;
522 	else if (src < 1.0f)
523 		return de::min(1.0f, dst/(1.0f-src));
524 	else
525 		return 1.0f;
526 }
527 
colorburn(float src, float dst)528 inline float colorburn (float src, float dst)
529 {
530 	if (dst >= 1.0f)
531 		return 1.0f;
532 	else if (src > 0.0f)
533 		return 1.0f - de::min(1.0f, (1.0f-dst)/src);
534 	else
535 		return 0.0f;
536 }
537 
hardlight(float src, float dst)538 inline float hardlight (float src, float dst)
539 {
540 	if (src <= 0.5f)
541 		return 2.0f*src*dst;
542 	else
543 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
544 }
545 
softlight(float src, float dst)546 inline float softlight (float src, float dst)
547 {
548 	if (src <= 0.5f)
549 		return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
550 	else if (dst <= 0.25f)
551 		return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
552 	else
553 		return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
554 }
555 
minComp(const Vec3& v)556 inline float minComp (const Vec3& v)
557 {
558 	return de::min(de::min(v.x(), v.y()), v.z());
559 }
560 
maxComp(const Vec3& v)561 inline float maxComp (const Vec3& v)
562 {
563 	return de::max(de::max(v.x(), v.y()), v.z());
564 }
565 
luminosity(const Vec3& rgb)566 inline float luminosity (const Vec3& rgb)
567 {
568 	return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
569 }
570 
saturation(const Vec3& rgb)571 inline float saturation (const Vec3& rgb)
572 {
573 	return maxComp(rgb) - minComp(rgb);
574 }
575 
setLum(const Vec3& cbase, const Vec3& clum)576 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
577 {
578 	const float		lbase	= luminosity(cbase);
579 	const float		llum	= luminosity(clum);
580 	const float		ldiff	= llum - lbase;
581 	const Vec3		color	= cbase + Vec3(ldiff);
582 	const float		minC	= minComp(color);
583 	const float		maxC	= maxComp(color);
584 
585 	if (minC < 0.0f)
586 		return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
587 	else if (maxC > 1.0f)
588 		return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
589 	else
590 		return color;
591 }
592 
setLumSat(const Vec3& cbase, const Vec3& csat, const Vec3& clum)593 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
594 {
595 	const float		minbase	= minComp(cbase);
596 	const float		sbase	= saturation(cbase);
597 	const float		ssat	= saturation(csat);
598 	Vec3			color	= Vec3(0.0f);
599 
600 	if (sbase > 0.0f)
601 		color = (cbase - minbase) * ssat / sbase;
602 
603 	return setLum(color, clum);
604 }
605 
606 } // advblend
607 
executeAdvancedBlend(BlendEquationAdvanced equation)608 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
609 {
610 	using namespace advblend;
611 
612 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)												\
613 	do {																						\
614 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
615 		{																						\
616 			if (m_sampleRegister[regSampleNdx].isAlive)											\
617 			{																					\
618 				SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
619 				const Vec4&	srcColor	= sample.clampedBlendSrcColor;							\
620 				const Vec4&	dstColor	= sample.clampedBlendDstColor;							\
621 				const Vec3&	bias		= sample.blendSrcFactorRGB;								\
622 				const float	p0			= sample.blendSrcFactorA;								\
623 				const float	r			= FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0];	\
624 				const float	g			= FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1];	\
625 				const float	b			= FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2];	\
626 																								\
627 				sample.blendedRGB = Vec3(r, g, b);												\
628 			}																					\
629 		}																						\
630 	} while (0)
631 
632 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)											\
633 	do {																						\
634 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
635 		{																						\
636 			if (m_sampleRegister[regSampleNdx].isAlive)											\
637 			{																					\
638 				SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
639 				const Vec3	srcColor	= sample.clampedBlendSrcColor.swizzle(0,1,2);			\
640 				const Vec3	dstColor	= sample.clampedBlendDstColor.swizzle(0,1,2);			\
641 				const Vec3&	bias		= sample.blendSrcFactorRGB;								\
642 				const float	p0			= sample.blendSrcFactorA;								\
643 																								\
644 				sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;								\
645 			}																					\
646 		}																						\
647 	} while (0)
648 
649 	// Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
650 	// \note clampedBlend*Color contains clamped & unpremultiplied colors
651 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
652 	{
653 		if (m_sampleRegister[regSampleNdx].isAlive)
654 		{
655 			SampleData&	sample		= m_sampleRegister[regSampleNdx];
656 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;
657 			const Vec4&	dstColor	= sample.clampedBlendDstColor;
658 			const float	srcA		= srcColor.w();
659 			const float	dstA		= dstColor.w();
660 			const float	p0			= srcA*dstA;
661 			const float p1			= srcA*(1.0f-dstA);
662 			const float p2			= dstA*(1.0f-srcA);
663 			const Vec3	bias		(srcColor[0]*p1 + dstColor[0]*p2,
664 									 srcColor[1]*p1 + dstColor[1]*p2,
665 									 srcColor[2]*p1 + dstColor[2]*p2);
666 
667 			sample.blendSrcFactorRGB	= bias;
668 			sample.blendSrcFactorA		= p0;
669 			sample.blendedA				= p0 + p1 + p2;
670 		}
671 	}
672 
673 	switch (equation)
674 	{
675 		case BLENDEQUATION_ADVANCED_MULTIPLY:		SAMPLE_REGISTER_ADV_BLEND(multiply);									break;
676 		case BLENDEQUATION_ADVANCED_SCREEN:			SAMPLE_REGISTER_ADV_BLEND(screen);										break;
677 		case BLENDEQUATION_ADVANCED_OVERLAY:		SAMPLE_REGISTER_ADV_BLEND(overlay);										break;
678 		case BLENDEQUATION_ADVANCED_DARKEN:			SAMPLE_REGISTER_ADV_BLEND(darken);										break;
679 		case BLENDEQUATION_ADVANCED_LIGHTEN:		SAMPLE_REGISTER_ADV_BLEND(lighten);										break;
680 		case BLENDEQUATION_ADVANCED_COLORDODGE:		SAMPLE_REGISTER_ADV_BLEND(colordodge);									break;
681 		case BLENDEQUATION_ADVANCED_COLORBURN:		SAMPLE_REGISTER_ADV_BLEND(colorburn);									break;
682 		case BLENDEQUATION_ADVANCED_HARDLIGHT:		SAMPLE_REGISTER_ADV_BLEND(hardlight);									break;
683 		case BLENDEQUATION_ADVANCED_SOFTLIGHT:		SAMPLE_REGISTER_ADV_BLEND(softlight);									break;
684 		case BLENDEQUATION_ADVANCED_DIFFERENCE:		SAMPLE_REGISTER_ADV_BLEND(difference);									break;
685 		case BLENDEQUATION_ADVANCED_EXCLUSION:		SAMPLE_REGISTER_ADV_BLEND(exclusion);									break;
686 		case BLENDEQUATION_ADVANCED_HSL_HUE:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor));	break;
687 		case BLENDEQUATION_ADVANCED_HSL_SATURATION:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor));	break;
688 		case BLENDEQUATION_ADVANCED_HSL_COLOR:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));				break;
689 		case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));				break;
690 		default:
691 			DE_ASSERT(false);
692 	}
693 
694 #undef SAMPLE_REGISTER_ADV_BLEND
695 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
696 }
697 
executeColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)698 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
699 {
700 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
701 	{
702 		if (m_sampleRegister[regSampleNdx].isAlive)
703 		{
704 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
705 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
706 			Vec4				combinedColor;
707 
708 			combinedColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
709 			combinedColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
710 
711 			if (isSRGB)
712 				combinedColor = tcu::linearToSRGB(combinedColor);
713 
714 			colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
715 		}
716 	}
717 }
718 
executeRGBA8ColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)719 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
720 {
721 	const int		fragStride	= 4;
722 	const int		xStride		= colorBuffer.getRowPitch();
723 	const int		yStride		= colorBuffer.getSlicePitch();
724 	deUint8* const	basePtr		= (deUint8*)colorBuffer.getDataPtr();
725 
726 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727 	{
728 		if (m_sampleRegister[regSampleNdx].isAlive)
729 		{
730 			const int			fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
731 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732 			deUint8*			dstPtr			= basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
733 
734 			dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
735 			dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
736 			dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
737 			dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
738 		}
739 	}
740 }
741 
executeMaskedColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)742 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
743 {
744 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
745 	{
746 		if (m_sampleRegister[regSampleNdx].isAlive)
747 		{
748 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
749 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
750 			Vec4				originalColor	= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
751 			Vec4				newColor;
752 
753 			newColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
754 			newColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
755 
756 			if (isSRGB)
757 				newColor = tcu::linearToSRGB(newColor);
758 
759 			newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
760 
761 			colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
762 		}
763 	}
764 }
765 
executeSignedValueWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)766 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
767 {
768 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
769 	{
770 		if (m_sampleRegister[regSampleNdx].isAlive)
771 		{
772 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
773 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
774 			const IVec4			originalValue	= colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
775 
776 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
777 		}
778 	}
779 }
780 
executeUnsignedValueWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)781 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
782 {
783 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
784 	{
785 		if (m_sampleRegister[regSampleNdx].isAlive)
786 		{
787 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
788 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
789 			const UVec4			originalValue	= colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
790 
791 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
792 		}
793 	}
794 }
795 
render(const rr::MultisamplePixelBufferAccess& msColorBuffer, const rr::MultisamplePixelBufferAccess& msDepthBuffer, const rr::MultisamplePixelBufferAccess& msStencilBuffer, const Fragment* inputFragments, int numFragments, FaceType fragmentFacing, const FragmentOperationState& state)796 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&		msColorBuffer,
797 								const rr::MultisamplePixelBufferAccess&		msDepthBuffer,
798 								const rr::MultisamplePixelBufferAccess&		msStencilBuffer,
799 								const Fragment*								inputFragments,
800 								int											numFragments,
801 								FaceType									fragmentFacing,
802 								const FragmentOperationState&				state)
803 {
804 	DE_ASSERT(fragmentFacing < FACETYPE_LAST);
805 	DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
806 
807 	const tcu::PixelBufferAccess&	colorBuffer			= msColorBuffer.raw();
808 	const tcu::PixelBufferAccess&	depthBuffer			= msDepthBuffer.raw();
809 	const tcu::PixelBufferAccess&	stencilBuffer		= msStencilBuffer.raw();
810 
811 	bool							hasDepth			= depthBuffer.getWidth() > 0	&& depthBuffer.getHeight() > 0		&& depthBuffer.getDepth() > 0;
812 	bool							hasStencil			= stencilBuffer.getWidth() > 0	&& stencilBuffer.getHeight() > 0	&& stencilBuffer.getDepth() > 0;
813 	bool							doDepthBoundsTest	= hasDepth		&& state.depthBoundsTestEnabled;
814 	bool							doDepthTest			= hasDepth		&& state.depthTestEnabled;
815 	bool							doStencilTest		= hasStencil	&& state.stencilTestEnabled;
816 
817 	tcu::TextureChannelClass		colorbufferClass	= tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
818 	rr::GenericVecType				fragmentDataType	= (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
819 
820 	DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())	&& (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
821 	DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())	&& (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
822 	DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())	&& (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
823 
824 	// Combined formats must be separated beforehand
825 	DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
826 	DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
827 
828 	int						numSamplesPerFragment		= colorBuffer.getWidth();
829 	int						totalNumSamples				= numFragments*numSamplesPerFragment;
830 	int						numSampleGroups				= (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
831 	const StencilState&		stencilState				= state.stencilStates[fragmentFacing];
832 	Vec4					colorMaskFactor				(state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
833 	Vec4					colorMaskNegationFactor		(state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
834 	bool					sRGBTarget					= state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
835 
836 	DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
837 
838 	// Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
839 	// the per-sample operations for one group at a time.
840 
841 	for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
842 	{
843 		// The index of the fragment of the sample at the beginning of m_sampleRegisters.
844 		int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
845 
846 		// Initialize sample data in the sample register.
847 
848 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
849 		{
850 			int fragNdx			= groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
851 			int fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
852 
853 			if (fragNdx < numFragments)
854 			{
855 				m_sampleRegister[regSampleNdx].isAlive		= (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
856 				m_sampleRegister[regSampleNdx].depthPassed	= true; // \note This will stay true if depth test is disabled.
857 			}
858 			else
859 				m_sampleRegister[regSampleNdx].isAlive = false;
860 		}
861 
862 		// Scissor test.
863 
864 		if (state.scissorTestEnabled)
865 			executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
866 
867 		// Depth bounds test.
868 
869 		if (doDepthBoundsTest)
870 			executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound, state.maxDepthBound, depthBuffer);
871 
872 		// Stencil test.
873 
874 		if (doStencilTest)
875 		{
876 			executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
877 			executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
878 		}
879 
880 		// Depth test.
881 		// \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
882 
883 		if (doDepthTest)
884 		{
885 			executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
886 
887 			if (state.depthMask)
888 				executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
889 		}
890 
891 		// Do dpFail and dpPass stencil writes.
892 
893 		if (doStencilTest)
894 			executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
895 
896 		// Kill the samples that failed depth test.
897 
898 		if (doDepthTest)
899 		{
900 			for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
901 				m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
902 		}
903 
904 		// Paint fragments to target
905 
906 		switch (fragmentDataType)
907 		{
908 			case rr::GENERICVECTYPE_FLOAT:
909 			{
910 				// Select min/max clamping values for blending factors and operands
911 				Vec4 minClampValue;
912 				Vec4 maxClampValue;
913 
914 				if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
915 				{
916 					minClampValue = Vec4(0.0f);
917 					maxClampValue = Vec4(1.0f);
918 				}
919 				else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
920 				{
921 					minClampValue = Vec4(-1.0f);
922 					maxClampValue = Vec4(1.0f);
923 				}
924 				else
925 				{
926 					// No clamping
927 					minClampValue = Vec4(-std::numeric_limits<float>::infinity());
928 					maxClampValue = Vec4(std::numeric_limits<float>::infinity());
929 				}
930 
931 				// Blend calculation - only if using blend.
932 				if (state.blendMode == BLENDMODE_STANDARD)
933 				{
934 					// Put dst color to register, doing srgb-to-linear conversion if needed.
935 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
936 					{
937 						if (m_sampleRegister[regSampleNdx].isAlive)
938 						{
939 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
940 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
941 							Vec4				dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
942 
943 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= clamp(frag.value.get<float>(), minClampValue, maxClampValue);
944 							m_sampleRegister[regSampleNdx].clampedBlendSrc1Color	= clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
945 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
946 						}
947 					}
948 
949 					// Calculate blend factors to register.
950 					executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
951 					executeBlendFactorComputeA(state.blendColor, state.blendAState);
952 
953 					// Compute blended color.
954 					executeBlend(state.blendRGBState, state.blendAState);
955 				}
956 				else if (state.blendMode == BLENDMODE_ADVANCED)
957 				{
958 					// Unpremultiply colors for blending, and do sRGB->linear if necessary
959 					// \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
960 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
961 					{
962 						if (m_sampleRegister[regSampleNdx].isAlive)
963 						{
964 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
965 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
966 							const Vec4			srcColor		= frag.value.get<float>();
967 							const Vec4			dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
968 
969 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
970 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
971 						}
972 					}
973 
974 					executeAdvancedBlend(state.blendEquationAdvaced);
975 				}
976 				else
977 				{
978 					// Not using blend - just put values to register as-is.
979 					DE_ASSERT(state.blendMode == BLENDMODE_NONE);
980 
981 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
982 					{
983 						if (m_sampleRegister[regSampleNdx].isAlive)
984 						{
985 							const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
986 
987 							m_sampleRegister[regSampleNdx].blendedRGB	= frag.value.get<float>().xyz();
988 							m_sampleRegister[regSampleNdx].blendedA		= frag.value.get<float>().w();
989 						}
990 					}
991 				}
992 
993 				// Clamp result values in sample register
994 				if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
995 				{
996 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
997 					{
998 						if (m_sampleRegister[regSampleNdx].isAlive)
999 						{
1000 							m_sampleRegister[regSampleNdx].blendedRGB	= clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
1001 							m_sampleRegister[regSampleNdx].blendedA		= clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
1002 						}
1003 					}
1004 				}
1005 
1006 				// Finally, write the colors to the color buffer.
1007 
1008 				if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
1009 				{
1010 					if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
1011 						executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
1012 					else
1013 						executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
1014 				}
1015 				else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1016 					executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
1017 				break;
1018 			}
1019 			case rr::GENERICVECTYPE_INT32:
1020 				// Write fragments
1021 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1022 				{
1023 					if (m_sampleRegister[regSampleNdx].isAlive)
1024 					{
1025 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1026 
1027 						m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
1028 					}
1029 				}
1030 
1031 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1032 					executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1033 				break;
1034 
1035 			case rr::GENERICVECTYPE_UINT32:
1036 				// Write fragments
1037 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1038 				{
1039 					if (m_sampleRegister[regSampleNdx].isAlive)
1040 					{
1041 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1042 
1043 						m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
1044 					}
1045 				}
1046 
1047 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1048 					executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1049 				break;
1050 
1051 			default:
1052 				DE_ASSERT(DE_FALSE);
1053 		}
1054 	}
1055 }
1056 
1057 } // rr
1058