1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Reference Renderer
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Reference implementation for per-fragment operations.
22 *//*--------------------------------------------------------------------*/
23
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include <limits>
28
29 using tcu::IVec2;
30 using tcu::Vec3;
31 using tcu::Vec4;
32 using tcu::IVec4;
33 using tcu::UVec4;
34 using tcu::min;
35 using tcu::max;
36 using tcu::clamp;
37 using de::min;
38 using de::max;
39 using de::clamp;
40
41 namespace rr
42 {
43
44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue, int newValue, deUint32 mask)45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
46 {
47 return (oldValue & ~mask) | (newValue & mask);
48 }
49
isInsideRect(const IVec2& point, const WindowRectangle& rect)50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
51 {
52 return de::inBounds(point.x(), rect.left, rect.left + rect.width) &&
53 de::inBounds(point.y(), rect.bottom, rect.bottom + rect.height);
54 }
55
unpremultiply(const Vec4& v)56 static inline Vec4 unpremultiply (const Vec4& v)
57 {
58 if (v.w() > 0.0f)
59 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
60 else
61 {
62 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
63 return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
64 }
65 }
66
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r)67 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r)68 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r)69 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>()); }
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r)70 void clearMultisampleDepthBuffer (const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r) { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r)71 void clearMultisampleStencilBuffer (const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r) { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
72
FragmentProcessor(void)73 FragmentProcessor::FragmentProcessor (void)
74 : m_sampleRegister()
75 {
76 }
77
executeScissorTest(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
79 {
80 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
81 {
82 if (m_sampleRegister[regSampleNdx].isAlive)
83 {
84 int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
85
86 if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
87 m_sampleRegister[regSampleNdx].isAlive = false;
88 }
89 }
90 }
91
executeStencilCompare(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
93 {
94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION) \
95 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
96 { \
97 if (m_sampleRegister[regSampleNdx].isAlive) \
98 { \
99 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
100 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
101 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
102 int maskedRef = stencilState.compMask & clampedStencilRef; \
103 int maskedBuf = stencilState.compMask & stencilBufferValue; \
104 DE_UNREF(maskedRef); \
105 DE_UNREF(maskedBuf); \
106 \
107 m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION); \
108 } \
109 }
110
111 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
112
113 switch (stencilState.func)
114 {
115 case TESTFUNC_NEVER: SAMPLE_REGISTER_STENCIL_COMPARE(false) break;
116 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_STENCIL_COMPARE(true) break;
117 case TESTFUNC_LESS: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef < maskedBuf) break;
118 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf) break;
119 case TESTFUNC_GREATER: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef > maskedBuf) break;
120 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf) break;
121 case TESTFUNC_EQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf) break;
122 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf) break;
123 default:
124 DE_ASSERT(false);
125 }
126
127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
128 }
129
executeStencilSFail(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
131 {
132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION) \
133 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
134 { \
135 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed) \
136 { \
137 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
138 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
139 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
140 \
141 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
142 m_sampleRegister[regSampleNdx].isAlive = false; \
143 } \
144 }
145
146 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
147
148 switch (stencilState.sFail)
149 {
150 case STENCILOP_KEEP: SAMPLE_REGISTER_SFAIL(stencilBufferValue) break;
151 case STENCILOP_ZERO: SAMPLE_REGISTER_SFAIL(0) break;
152 case STENCILOP_REPLACE: SAMPLE_REGISTER_SFAIL(clampedStencilRef) break;
153 case STENCILOP_INCR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break;
154 case STENCILOP_DECR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break;
155 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break;
156 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break;
157 case STENCILOP_INVERT: SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1)) break;
158 default:
159 DE_ASSERT(false);
160 }
161
162 #undef SAMPLE_REGISTER_SFAIL
163 }
164
165
executeDepthBoundsTest(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)166 void FragmentProcessor::executeDepthBoundsTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)
167 {
168 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
169 {
170 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
171 {
172 if (m_sampleRegister[regSampleNdx].isAlive)
173 {
174 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
175 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
176 const float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
177
178 if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound))
179 m_sampleRegister[regSampleNdx].isAlive = false;
180 }
181 }
182 }
183 else
184 {
185 /* Convert float bounds to target buffer format for comparison */
186
187 deUint32 minDepthBoundUint, maxDepthBoundUint;
188 {
189 deUint32 buffer[2];
190 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());
191
192 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
193 access.setPixDepth(minDepthBound, 0, 0, 0);
194 minDepthBoundUint = access.getPixelUint(0, 0, 0).x();
195 }
196 {
197 deUint32 buffer[2];
198
199 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
200 access.setPixDepth(maxDepthBound, 0, 0, 0);
201 maxDepthBoundUint = access.getPixelUint(0, 0, 0).x();
202 }
203
204 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
205 {
206 if (m_sampleRegister[regSampleNdx].isAlive)
207 {
208 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
209 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
210 const deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();
211
212 if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint))
213 m_sampleRegister[regSampleNdx].isAlive = false;
214 }
215 }
216 }
217 }
218
executeDepthCompare(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)219 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
220 {
221 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION) \
222 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
223 { \
224 if (m_sampleRegister[regSampleNdx].isAlive) \
225 { \
226 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
227 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
228 float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
229 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
230 float sampleDepth = de::clamp(sampleDepthFloat, 0.0f, 1.0f); \
231 \
232 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
233 \
234 DE_UNREF(depthBufferValue); \
235 DE_UNREF(sampleDepth); \
236 } \
237 }
238
239 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION) \
240 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
241 { \
242 if (m_sampleRegister[regSampleNdx].isAlive) \
243 { \
244 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
245 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
246 deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x(); \
247 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
248 \
249 /* Convert input float to target buffer format for comparison */ \
250 \
251 deUint32 buffer[2]; \
252 \
253 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize()); \
254 \
255 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); \
256 access.setPixDepth(sampleDepthFloat, 0, 0, 0); \
257 deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x(); \
258 \
259 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
260 \
261 DE_UNREF(depthBufferValue); \
262 DE_UNREF(sampleDepth); \
263 } \
264 }
265
266 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
267 {
268
269 switch (depthFunc)
270 {
271 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_F(false) break;
272 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_F(true) break;
273 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth < depthBufferValue) break;
274 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue) break;
275 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth > depthBufferValue) break;
276 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue) break;
277 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue) break;
278 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue) break;
279 default:
280 DE_ASSERT(false);
281 }
282
283 }
284 else
285 {
286 switch (depthFunc)
287 {
288 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(false) break;
289 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(true) break;
290 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth < depthBufferValue) break;
291 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue) break;
292 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth > depthBufferValue) break;
293 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue) break;
294 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue) break;
295 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue) break;
296 default:
297 DE_ASSERT(false);
298 }
299 }
300
301 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
302 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
303 }
304
executeDepthWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)305 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
306 {
307 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
308 {
309 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
310 {
311 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
312 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
313 const float clampedDepth = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
314
315 depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
316 }
317 }
318 }
319
executeStencilDpFailAndPass(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)320 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
321 {
322 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION) \
323 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
324 { \
325 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION)) \
326 { \
327 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
328 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
329 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
330 \
331 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
332 } \
333 }
334
335 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION) \
336 switch (stencilState.OP_NAME) \
337 { \
338 case STENCILOP_KEEP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue) break; \
339 case STENCILOP_ZERO: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0) break; \
340 case STENCILOP_REPLACE: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef) break; \
341 case STENCILOP_INCR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break; \
342 case STENCILOP_DECR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break; \
343 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break; \
344 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break; \
345 case STENCILOP_INVERT: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1)) break; \
346 default: \
347 DE_ASSERT(false); \
348 }
349
350 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
351
352 SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
353 SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
354
355 #undef SWITCH_DPFAIL_OR_DPPASS
356 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
357 }
358
executeBlendFactorComputeRGB(const Vec4& blendColor, const BlendState& blendRGBState)359 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
360 {
361 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
362 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
363 { \
364 if (m_sampleRegister[regSampleNdx].isAlive) \
365 { \
366 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
367 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
368 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
369 DE_UNREF(src); \
370 DE_UNREF(src1); \
371 DE_UNREF(dst); \
372 \
373 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \
374 } \
375 }
376
377 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME) \
378 switch (blendRGBState.FUNC_NAME) \
379 { \
380 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f)) break; \
381 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f)) break; \
382 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2)) break; \
383 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2)) break; \
384 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2)) break; \
385 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2)) break; \
386 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w())) break; \
387 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w())) break; \
388 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w())) break; \
389 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w())) break; \
390 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2)) break; \
391 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2)) break; \
392 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w())) break; \
393 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w())) break; \
394 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w()))) break; \
395 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2)) break; \
396 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2)) break; \
397 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w())) break; \
398 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w())) break; \
399 default: \
400 DE_ASSERT(false); \
401 }
402
403 SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
404 SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
405
406 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
407 #undef SAMPLE_REGISTER_BLEND_FACTOR
408 }
409
executeBlendFactorComputeA(const Vec4& blendColor, const BlendState& blendAState)410 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
411 {
412 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
413 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
414 { \
415 if (m_sampleRegister[regSampleNdx].isAlive) \
416 { \
417 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
418 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
419 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
420 DE_UNREF(src); \
421 DE_UNREF(src1); \
422 DE_UNREF(dst); \
423 \
424 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \
425 } \
426 }
427
428 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME) \
429 switch (blendAState.FUNC_NAME) \
430 { \
431 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f) break; \
432 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
433 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
434 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
435 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
436 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
437 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
438 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
439 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
440 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
441 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
442 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
443 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
444 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
445 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
446 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
447 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
448 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
449 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
450 default: \
451 DE_ASSERT(false); \
452 }
453
454 SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
455 SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
456
457 #undef SWITCH_SRC_OR_DST_FACTOR_A
458 #undef SAMPLE_REGISTER_BLEND_FACTOR
459 }
460
executeBlend(const BlendState& blendRGBState, const BlendState& blendAState)461 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
462 {
463 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION) \
464 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
465 { \
466 if (m_sampleRegister[regSampleNdx].isAlive) \
467 { \
468 SampleData& sample = m_sampleRegister[regSampleNdx]; \
469 const Vec4& srcColor = sample.clampedBlendSrcColor; \
470 const Vec4& dstColor = sample.clampedBlendDstColor; \
471 \
472 sample.COLOR_NAME = (COLOR_EXPRESSION); \
473 } \
474 }
475
476 switch (blendRGBState.equation)
477 {
478 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
479 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
480 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB) break;
481 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
482 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
483 default:
484 DE_ASSERT(false);
485 }
486
487 switch (blendAState.equation)
488 {
489 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA) break;
490 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA) break;
491 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA) break;
492 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w())) break;
493 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w())) break;
494 default:
495 DE_ASSERT(false);
496 }
497 #undef SAMPLE_REGISTER_BLENDED_COLOR
498 }
499
500 namespace advblend
501 {
502
multiply(float src, float dst)503 inline float multiply (float src, float dst) { return src*dst; }
screen(float src, float dst)504 inline float screen (float src, float dst) { return src + dst - src*dst; }
darken(float src, float dst)505 inline float darken (float src, float dst) { return de::min(src, dst); }
lighten(float src, float dst)506 inline float lighten (float src, float dst) { return de::max(src, dst); }
difference(float src, float dst)507 inline float difference (float src, float dst) { return de::abs(dst-src); }
exclusion(float src, float dst)508 inline float exclusion (float src, float dst) { return src + dst - 2.0f*src*dst; }
509
overlay(float src, float dst)510 inline float overlay (float src, float dst)
511 {
512 if (dst <= 0.5f)
513 return 2.0f*src*dst;
514 else
515 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
516 }
517
colordodge(float src, float dst)518 inline float colordodge (float src, float dst)
519 {
520 if (dst <= 0.0f)
521 return 0.0f;
522 else if (src < 1.0f)
523 return de::min(1.0f, dst/(1.0f-src));
524 else
525 return 1.0f;
526 }
527
colorburn(float src, float dst)528 inline float colorburn (float src, float dst)
529 {
530 if (dst >= 1.0f)
531 return 1.0f;
532 else if (src > 0.0f)
533 return 1.0f - de::min(1.0f, (1.0f-dst)/src);
534 else
535 return 0.0f;
536 }
537
hardlight(float src, float dst)538 inline float hardlight (float src, float dst)
539 {
540 if (src <= 0.5f)
541 return 2.0f*src*dst;
542 else
543 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
544 }
545
softlight(float src, float dst)546 inline float softlight (float src, float dst)
547 {
548 if (src <= 0.5f)
549 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
550 else if (dst <= 0.25f)
551 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
552 else
553 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
554 }
555
minComp(const Vec3& v)556 inline float minComp (const Vec3& v)
557 {
558 return de::min(de::min(v.x(), v.y()), v.z());
559 }
560
maxComp(const Vec3& v)561 inline float maxComp (const Vec3& v)
562 {
563 return de::max(de::max(v.x(), v.y()), v.z());
564 }
565
luminosity(const Vec3& rgb)566 inline float luminosity (const Vec3& rgb)
567 {
568 return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
569 }
570
saturation(const Vec3& rgb)571 inline float saturation (const Vec3& rgb)
572 {
573 return maxComp(rgb) - minComp(rgb);
574 }
575
setLum(const Vec3& cbase, const Vec3& clum)576 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
577 {
578 const float lbase = luminosity(cbase);
579 const float llum = luminosity(clum);
580 const float ldiff = llum - lbase;
581 const Vec3 color = cbase + Vec3(ldiff);
582 const float minC = minComp(color);
583 const float maxC = maxComp(color);
584
585 if (minC < 0.0f)
586 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
587 else if (maxC > 1.0f)
588 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
589 else
590 return color;
591 }
592
setLumSat(const Vec3& cbase, const Vec3& csat, const Vec3& clum)593 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
594 {
595 const float minbase = minComp(cbase);
596 const float sbase = saturation(cbase);
597 const float ssat = saturation(csat);
598 Vec3 color = Vec3(0.0f);
599
600 if (sbase > 0.0f)
601 color = (cbase - minbase) * ssat / sbase;
602
603 return setLum(color, clum);
604 }
605
606 } // advblend
607
executeAdvancedBlend(BlendEquationAdvanced equation)608 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
609 {
610 using namespace advblend;
611
612 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME) \
613 do { \
614 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
615 { \
616 if (m_sampleRegister[regSampleNdx].isAlive) \
617 { \
618 SampleData& sample = m_sampleRegister[regSampleNdx]; \
619 const Vec4& srcColor = sample.clampedBlendSrcColor; \
620 const Vec4& dstColor = sample.clampedBlendDstColor; \
621 const Vec3& bias = sample.blendSrcFactorRGB; \
622 const float p0 = sample.blendSrcFactorA; \
623 const float r = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \
624 const float g = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \
625 const float b = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \
626 \
627 sample.blendedRGB = Vec3(r, g, b); \
628 } \
629 } \
630 } while (0)
631
632 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION) \
633 do { \
634 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
635 { \
636 if (m_sampleRegister[regSampleNdx].isAlive) \
637 { \
638 SampleData& sample = m_sampleRegister[regSampleNdx]; \
639 const Vec3 srcColor = sample.clampedBlendSrcColor.swizzle(0,1,2); \
640 const Vec3 dstColor = sample.clampedBlendDstColor.swizzle(0,1,2); \
641 const Vec3& bias = sample.blendSrcFactorRGB; \
642 const float p0 = sample.blendSrcFactorA; \
643 \
644 sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias; \
645 } \
646 } \
647 } while (0)
648
649 // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
650 // \note clampedBlend*Color contains clamped & unpremultiplied colors
651 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
652 {
653 if (m_sampleRegister[regSampleNdx].isAlive)
654 {
655 SampleData& sample = m_sampleRegister[regSampleNdx];
656 const Vec4& srcColor = sample.clampedBlendSrcColor;
657 const Vec4& dstColor = sample.clampedBlendDstColor;
658 const float srcA = srcColor.w();
659 const float dstA = dstColor.w();
660 const float p0 = srcA*dstA;
661 const float p1 = srcA*(1.0f-dstA);
662 const float p2 = dstA*(1.0f-srcA);
663 const Vec3 bias (srcColor[0]*p1 + dstColor[0]*p2,
664 srcColor[1]*p1 + dstColor[1]*p2,
665 srcColor[2]*p1 + dstColor[2]*p2);
666
667 sample.blendSrcFactorRGB = bias;
668 sample.blendSrcFactorA = p0;
669 sample.blendedA = p0 + p1 + p2;
670 }
671 }
672
673 switch (equation)
674 {
675 case BLENDEQUATION_ADVANCED_MULTIPLY: SAMPLE_REGISTER_ADV_BLEND(multiply); break;
676 case BLENDEQUATION_ADVANCED_SCREEN: SAMPLE_REGISTER_ADV_BLEND(screen); break;
677 case BLENDEQUATION_ADVANCED_OVERLAY: SAMPLE_REGISTER_ADV_BLEND(overlay); break;
678 case BLENDEQUATION_ADVANCED_DARKEN: SAMPLE_REGISTER_ADV_BLEND(darken); break;
679 case BLENDEQUATION_ADVANCED_LIGHTEN: SAMPLE_REGISTER_ADV_BLEND(lighten); break;
680 case BLENDEQUATION_ADVANCED_COLORDODGE: SAMPLE_REGISTER_ADV_BLEND(colordodge); break;
681 case BLENDEQUATION_ADVANCED_COLORBURN: SAMPLE_REGISTER_ADV_BLEND(colorburn); break;
682 case BLENDEQUATION_ADVANCED_HARDLIGHT: SAMPLE_REGISTER_ADV_BLEND(hardlight); break;
683 case BLENDEQUATION_ADVANCED_SOFTLIGHT: SAMPLE_REGISTER_ADV_BLEND(softlight); break;
684 case BLENDEQUATION_ADVANCED_DIFFERENCE: SAMPLE_REGISTER_ADV_BLEND(difference); break;
685 case BLENDEQUATION_ADVANCED_EXCLUSION: SAMPLE_REGISTER_ADV_BLEND(exclusion); break;
686 case BLENDEQUATION_ADVANCED_HSL_HUE: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break;
687 case BLENDEQUATION_ADVANCED_HSL_SATURATION: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break;
688 case BLENDEQUATION_ADVANCED_HSL_COLOR: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor)); break;
689 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor)); break;
690 default:
691 DE_ASSERT(false);
692 }
693
694 #undef SAMPLE_REGISTER_ADV_BLEND
695 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
696 }
697
executeColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)698 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
699 {
700 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
701 {
702 if (m_sampleRegister[regSampleNdx].isAlive)
703 {
704 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
705 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
706 Vec4 combinedColor;
707
708 combinedColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
709 combinedColor.w() = m_sampleRegister[regSampleNdx].blendedA;
710
711 if (isSRGB)
712 combinedColor = tcu::linearToSRGB(combinedColor);
713
714 colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
715 }
716 }
717 }
718
executeRGBA8ColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)719 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
720 {
721 const int fragStride = 4;
722 const int xStride = colorBuffer.getRowPitch();
723 const int yStride = colorBuffer.getSlicePitch();
724 deUint8* const basePtr = (deUint8*)colorBuffer.getDataPtr();
725
726 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727 {
728 if (m_sampleRegister[regSampleNdx].isAlive)
729 {
730 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
731 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732 deUint8* dstPtr = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
733
734 dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
735 dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
736 dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
737 dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
738 }
739 }
740 }
741
executeMaskedColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)742 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
743 {
744 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
745 {
746 if (m_sampleRegister[regSampleNdx].isAlive)
747 {
748 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
749 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
750 Vec4 originalColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
751 Vec4 newColor;
752
753 newColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
754 newColor.w() = m_sampleRegister[regSampleNdx].blendedA;
755
756 if (isSRGB)
757 newColor = tcu::linearToSRGB(newColor);
758
759 newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
760
761 colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
762 }
763 }
764 }
765
executeSignedValueWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)766 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
767 {
768 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
769 {
770 if (m_sampleRegister[regSampleNdx].isAlive)
771 {
772 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
773 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
774 const IVec4 originalValue = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
775
776 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
777 }
778 }
779 }
780
executeUnsignedValueWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)781 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
782 {
783 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
784 {
785 if (m_sampleRegister[regSampleNdx].isAlive)
786 {
787 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
788 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
789 const UVec4 originalValue = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
790
791 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
792 }
793 }
794 }
795
render(const rr::MultisamplePixelBufferAccess& msColorBuffer, const rr::MultisamplePixelBufferAccess& msDepthBuffer, const rr::MultisamplePixelBufferAccess& msStencilBuffer, const Fragment* inputFragments, int numFragments, FaceType fragmentFacing, const FragmentOperationState& state)796 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess& msColorBuffer,
797 const rr::MultisamplePixelBufferAccess& msDepthBuffer,
798 const rr::MultisamplePixelBufferAccess& msStencilBuffer,
799 const Fragment* inputFragments,
800 int numFragments,
801 FaceType fragmentFacing,
802 const FragmentOperationState& state)
803 {
804 DE_ASSERT(fragmentFacing < FACETYPE_LAST);
805 DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
806
807 const tcu::PixelBufferAccess& colorBuffer = msColorBuffer.raw();
808 const tcu::PixelBufferAccess& depthBuffer = msDepthBuffer.raw();
809 const tcu::PixelBufferAccess& stencilBuffer = msStencilBuffer.raw();
810
811 bool hasDepth = depthBuffer.getWidth() > 0 && depthBuffer.getHeight() > 0 && depthBuffer.getDepth() > 0;
812 bool hasStencil = stencilBuffer.getWidth() > 0 && stencilBuffer.getHeight() > 0 && stencilBuffer.getDepth() > 0;
813 bool doDepthBoundsTest = hasDepth && state.depthBoundsTestEnabled;
814 bool doDepthTest = hasDepth && state.depthTestEnabled;
815 bool doStencilTest = hasStencil && state.stencilTestEnabled;
816
817 tcu::TextureChannelClass colorbufferClass = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
818 rr::GenericVecType fragmentDataType = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
819
820 DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth()) && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
821 DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight()) && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
822 DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth()) && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
823
824 // Combined formats must be separated beforehand
825 DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
826 DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
827
828 int numSamplesPerFragment = colorBuffer.getWidth();
829 int totalNumSamples = numFragments*numSamplesPerFragment;
830 int numSampleGroups = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
831 const StencilState& stencilState = state.stencilStates[fragmentFacing];
832 Vec4 colorMaskFactor (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
833 Vec4 colorMaskNegationFactor (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
834 bool sRGBTarget = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
835
836 DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
837
838 // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
839 // the per-sample operations for one group at a time.
840
841 for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
842 {
843 // The index of the fragment of the sample at the beginning of m_sampleRegisters.
844 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
845
846 // Initialize sample data in the sample register.
847
848 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
849 {
850 int fragNdx = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
851 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
852
853 if (fragNdx < numFragments)
854 {
855 m_sampleRegister[regSampleNdx].isAlive = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
856 m_sampleRegister[regSampleNdx].depthPassed = true; // \note This will stay true if depth test is disabled.
857 }
858 else
859 m_sampleRegister[regSampleNdx].isAlive = false;
860 }
861
862 // Scissor test.
863
864 if (state.scissorTestEnabled)
865 executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
866
867 // Depth bounds test.
868
869 if (doDepthBoundsTest)
870 executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound, state.maxDepthBound, depthBuffer);
871
872 // Stencil test.
873
874 if (doStencilTest)
875 {
876 executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
877 executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
878 }
879
880 // Depth test.
881 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
882
883 if (doDepthTest)
884 {
885 executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
886
887 if (state.depthMask)
888 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
889 }
890
891 // Do dpFail and dpPass stencil writes.
892
893 if (doStencilTest)
894 executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
895
896 // Kill the samples that failed depth test.
897
898 if (doDepthTest)
899 {
900 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
901 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
902 }
903
904 // Paint fragments to target
905
906 switch (fragmentDataType)
907 {
908 case rr::GENERICVECTYPE_FLOAT:
909 {
910 // Select min/max clamping values for blending factors and operands
911 Vec4 minClampValue;
912 Vec4 maxClampValue;
913
914 if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
915 {
916 minClampValue = Vec4(0.0f);
917 maxClampValue = Vec4(1.0f);
918 }
919 else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
920 {
921 minClampValue = Vec4(-1.0f);
922 maxClampValue = Vec4(1.0f);
923 }
924 else
925 {
926 // No clamping
927 minClampValue = Vec4(-std::numeric_limits<float>::infinity());
928 maxClampValue = Vec4(std::numeric_limits<float>::infinity());
929 }
930
931 // Blend calculation - only if using blend.
932 if (state.blendMode == BLENDMODE_STANDARD)
933 {
934 // Put dst color to register, doing srgb-to-linear conversion if needed.
935 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
936 {
937 if (m_sampleRegister[regSampleNdx].isAlive)
938 {
939 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
940 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
941 Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
942
943 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = clamp(frag.value.get<float>(), minClampValue, maxClampValue);
944 m_sampleRegister[regSampleNdx].clampedBlendSrc1Color = clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
945 m_sampleRegister[regSampleNdx].clampedBlendDstColor = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
946 }
947 }
948
949 // Calculate blend factors to register.
950 executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
951 executeBlendFactorComputeA(state.blendColor, state.blendAState);
952
953 // Compute blended color.
954 executeBlend(state.blendRGBState, state.blendAState);
955 }
956 else if (state.blendMode == BLENDMODE_ADVANCED)
957 {
958 // Unpremultiply colors for blending, and do sRGB->linear if necessary
959 // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
960 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
961 {
962 if (m_sampleRegister[regSampleNdx].isAlive)
963 {
964 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
965 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
966 const Vec4 srcColor = frag.value.get<float>();
967 const Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
968
969 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
970 m_sampleRegister[regSampleNdx].clampedBlendDstColor = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
971 }
972 }
973
974 executeAdvancedBlend(state.blendEquationAdvaced);
975 }
976 else
977 {
978 // Not using blend - just put values to register as-is.
979 DE_ASSERT(state.blendMode == BLENDMODE_NONE);
980
981 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
982 {
983 if (m_sampleRegister[regSampleNdx].isAlive)
984 {
985 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
986
987 m_sampleRegister[regSampleNdx].blendedRGB = frag.value.get<float>().xyz();
988 m_sampleRegister[regSampleNdx].blendedA = frag.value.get<float>().w();
989 }
990 }
991 }
992
993 // Clamp result values in sample register
994 if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
995 {
996 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
997 {
998 if (m_sampleRegister[regSampleNdx].isAlive)
999 {
1000 m_sampleRegister[regSampleNdx].blendedRGB = clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
1001 m_sampleRegister[regSampleNdx].blendedA = clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
1002 }
1003 }
1004 }
1005
1006 // Finally, write the colors to the color buffer.
1007
1008 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
1009 {
1010 if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
1011 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
1012 else
1013 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
1014 }
1015 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1016 executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
1017 break;
1018 }
1019 case rr::GENERICVECTYPE_INT32:
1020 // Write fragments
1021 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1022 {
1023 if (m_sampleRegister[regSampleNdx].isAlive)
1024 {
1025 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1026
1027 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
1028 }
1029 }
1030
1031 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1032 executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1033 break;
1034
1035 case rr::GENERICVECTYPE_UINT32:
1036 // Write fragments
1037 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1038 {
1039 if (m_sampleRegister[regSampleNdx].isAlive)
1040 {
1041 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1042
1043 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
1044 }
1045 }
1046
1047 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1048 executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1049 break;
1050
1051 default:
1052 DE_ASSERT(DE_FALSE);
1053 }
1054 }
1055 }
1056
1057 } // rr
1058