1cb93a386Sopenharmony_ci// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2cb93a386Sopenharmony_ci//
3cb93a386Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
4cb93a386Sopenharmony_ci// you may not use this file except in compliance with the License.
5cb93a386Sopenharmony_ci// You may obtain a copy of the License at
6cb93a386Sopenharmony_ci//
7cb93a386Sopenharmony_ci//    http://www.apache.org/licenses/LICENSE-2.0
8cb93a386Sopenharmony_ci//
9cb93a386Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software
10cb93a386Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
11cb93a386Sopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12cb93a386Sopenharmony_ci// See the License for the specific language governing permissions and
13cb93a386Sopenharmony_ci// limitations under the License.
14cb93a386Sopenharmony_ci
15cb93a386Sopenharmony_ci#include "PixelRoutine.hpp"
16cb93a386Sopenharmony_ci
17cb93a386Sopenharmony_ci#include "Constants.hpp"
18cb93a386Sopenharmony_ci#include "SamplerCore.hpp"
19cb93a386Sopenharmony_ci#include "Device/Primitive.hpp"
20cb93a386Sopenharmony_ci#include "Device/QuadRasterizer.hpp"
21cb93a386Sopenharmony_ci#include "Device/Renderer.hpp"
22cb93a386Sopenharmony_ci#include "System/Debug.hpp"
23cb93a386Sopenharmony_ci#include "Vulkan/VkPipelineLayout.hpp"
24cb93a386Sopenharmony_ci#include "Vulkan/VkStringify.hpp"
25cb93a386Sopenharmony_ci
26cb93a386Sopenharmony_cinamespace sw {
27cb93a386Sopenharmony_ci
28cb93a386Sopenharmony_ciPixelRoutine::PixelRoutine(
29cb93a386Sopenharmony_ci    const PixelProcessor::State &state,
30cb93a386Sopenharmony_ci    vk::PipelineLayout const *pipelineLayout,
31cb93a386Sopenharmony_ci    SpirvShader const *spirvShader,
32cb93a386Sopenharmony_ci    const vk::DescriptorSet::Bindings &descriptorSets)
33cb93a386Sopenharmony_ci    : QuadRasterizer(state, spirvShader)
34cb93a386Sopenharmony_ci    , routine(pipelineLayout)
35cb93a386Sopenharmony_ci    , descriptorSets(descriptorSets)
36cb93a386Sopenharmony_ci    , shaderContainsInterpolation(spirvShader && spirvShader->getUsedCapabilities().InterpolationFunction)
37cb93a386Sopenharmony_ci    , shaderContainsSampleQualifier(spirvShader && spirvShader->getAnalysis().ContainsSampleQualifier)
38cb93a386Sopenharmony_ci    , perSampleShading((state.sampleShadingEnabled && (state.minSampleShading * state.multiSampleCount > 1.0f)) ||
39cb93a386Sopenharmony_ci                       shaderContainsSampleQualifier || shaderContainsInterpolation)  // TODO(b/194714095)
40cb93a386Sopenharmony_ci    , invocationCount(perSampleShading ? state.multiSampleCount : 1)
41cb93a386Sopenharmony_ci{
42cb93a386Sopenharmony_ci	if(spirvShader)
43cb93a386Sopenharmony_ci	{
44cb93a386Sopenharmony_ci		spirvShader->emitProlog(&routine);
45cb93a386Sopenharmony_ci
46cb93a386Sopenharmony_ci		// Clearing inputs to 0 is not demanded by the spec,
47cb93a386Sopenharmony_ci		// but it makes the undefined behavior deterministic.
48cb93a386Sopenharmony_ci		// TODO(b/155148722): Remove to detect UB.
49cb93a386Sopenharmony_ci		for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
50cb93a386Sopenharmony_ci		{
51cb93a386Sopenharmony_ci			routine.inputs[i] = Float4(0.0f);
52cb93a386Sopenharmony_ci		}
53cb93a386Sopenharmony_ci	}
54cb93a386Sopenharmony_ci}
55cb93a386Sopenharmony_ci
56cb93a386Sopenharmony_ciPixelRoutine::~PixelRoutine()
57cb93a386Sopenharmony_ci{
58cb93a386Sopenharmony_ci}
59cb93a386Sopenharmony_ci
60cb93a386Sopenharmony_ciPixelRoutine::SampleSet PixelRoutine::getSampleSet(int invocation) const
61cb93a386Sopenharmony_ci{
62cb93a386Sopenharmony_ci	unsigned int sampleBegin = perSampleShading ? invocation : 0;
63cb93a386Sopenharmony_ci	unsigned int sampleEnd = perSampleShading ? (invocation + 1) : state.multiSampleCount;
64cb93a386Sopenharmony_ci
65cb93a386Sopenharmony_ci	SampleSet samples;
66cb93a386Sopenharmony_ci
67cb93a386Sopenharmony_ci	for(unsigned int q = sampleBegin; q < sampleEnd; q++)
68cb93a386Sopenharmony_ci	{
69cb93a386Sopenharmony_ci		if(state.multiSampleMask & (1 << q))
70cb93a386Sopenharmony_ci		{
71cb93a386Sopenharmony_ci			samples.push_back(q);
72cb93a386Sopenharmony_ci		}
73cb93a386Sopenharmony_ci	}
74cb93a386Sopenharmony_ci
75cb93a386Sopenharmony_ci	return samples;
76cb93a386Sopenharmony_ci}
77cb93a386Sopenharmony_ci
78cb93a386Sopenharmony_civoid PixelRoutine::quad(Pointer<Byte> cBuffer[MAX_COLOR_BUFFERS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
79cb93a386Sopenharmony_ci{
80cb93a386Sopenharmony_ci	const bool earlyFragmentTests = !spirvShader || spirvShader->getExecutionModes().EarlyFragmentTests;
81cb93a386Sopenharmony_ci
82cb93a386Sopenharmony_ci	Int zMask[4];  // Depth mask
83cb93a386Sopenharmony_ci	Int sMask[4];  // Stencil mask
84cb93a386Sopenharmony_ci	Float4 unclampedZ[4];
85cb93a386Sopenharmony_ci
86cb93a386Sopenharmony_ci	for(int invocation = 0; invocation < invocationCount; invocation++)
87cb93a386Sopenharmony_ci	{
88cb93a386Sopenharmony_ci		SampleSet samples = getSampleSet(invocation);
89cb93a386Sopenharmony_ci
90cb93a386Sopenharmony_ci		if(samples.empty())
91cb93a386Sopenharmony_ci		{
92cb93a386Sopenharmony_ci			continue;
93cb93a386Sopenharmony_ci		}
94cb93a386Sopenharmony_ci
95cb93a386Sopenharmony_ci		for(unsigned int q : samples)
96cb93a386Sopenharmony_ci		{
97cb93a386Sopenharmony_ci			zMask[q] = cMask[q];
98cb93a386Sopenharmony_ci			sMask[q] = cMask[q];
99cb93a386Sopenharmony_ci		}
100cb93a386Sopenharmony_ci
101cb93a386Sopenharmony_ci		stencilTest(sBuffer, x, sMask, samples);
102cb93a386Sopenharmony_ci
103cb93a386Sopenharmony_ci		Float4 f;
104cb93a386Sopenharmony_ci		Float4 rhwCentroid;
105cb93a386Sopenharmony_ci
106cb93a386Sopenharmony_ci		Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
107cb93a386Sopenharmony_ci
108cb93a386Sopenharmony_ci		if(interpolateZ())
109cb93a386Sopenharmony_ci		{
110cb93a386Sopenharmony_ci			for(unsigned int q : samples)
111cb93a386Sopenharmony_ci			{
112cb93a386Sopenharmony_ci				Float4 x = xxxx;
113cb93a386Sopenharmony_ci
114cb93a386Sopenharmony_ci				if(state.enableMultiSampling)
115cb93a386Sopenharmony_ci				{
116cb93a386Sopenharmony_ci					x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
117cb93a386Sopenharmony_ci				}
118cb93a386Sopenharmony_ci
119cb93a386Sopenharmony_ci				z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false);
120cb93a386Sopenharmony_ci
121cb93a386Sopenharmony_ci				if(state.depthBias)
122cb93a386Sopenharmony_ci				{
123cb93a386Sopenharmony_ci					z[q] += *Pointer<Float4>(primitive + OFFSET(Primitive, zBias), 16);
124cb93a386Sopenharmony_ci				}
125cb93a386Sopenharmony_ci
126cb93a386Sopenharmony_ci				unclampedZ[q] = z[q];
127cb93a386Sopenharmony_ci			}
128cb93a386Sopenharmony_ci		}
129cb93a386Sopenharmony_ci
130cb93a386Sopenharmony_ci		Bool depthPass = false;
131cb93a386Sopenharmony_ci
132cb93a386Sopenharmony_ci		if(earlyFragmentTests)
133cb93a386Sopenharmony_ci		{
134cb93a386Sopenharmony_ci			for(unsigned int q : samples)
135cb93a386Sopenharmony_ci			{
136cb93a386Sopenharmony_ci				z[q] = clampDepth(z[q]);
137cb93a386Sopenharmony_ci				depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
138cb93a386Sopenharmony_ci				depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]);
139cb93a386Sopenharmony_ci			}
140cb93a386Sopenharmony_ci		}
141cb93a386Sopenharmony_ci
142cb93a386Sopenharmony_ci		If(depthPass || !earlyFragmentTests)
143cb93a386Sopenharmony_ci		{
144cb93a386Sopenharmony_ci			if(earlyFragmentTests)
145cb93a386Sopenharmony_ci			{
146cb93a386Sopenharmony_ci				writeDepth(zBuffer, x, zMask, samples);
147cb93a386Sopenharmony_ci			}
148cb93a386Sopenharmony_ci
149cb93a386Sopenharmony_ci			Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
150cb93a386Sopenharmony_ci
151cb93a386Sopenharmony_ci			// Centroid locations
152cb93a386Sopenharmony_ci			Float4 XXXX = Float4(0.0f);
153cb93a386Sopenharmony_ci			Float4 YYYY = Float4(0.0f);
154cb93a386Sopenharmony_ci
155cb93a386Sopenharmony_ci			if(state.centroid || shaderContainsInterpolation)  // TODO(b/194714095)
156cb93a386Sopenharmony_ci			{
157cb93a386Sopenharmony_ci				Float4 WWWW(1.0e-9f);
158cb93a386Sopenharmony_ci
159cb93a386Sopenharmony_ci				for(unsigned int q : samples)
160cb93a386Sopenharmony_ci				{
161cb93a386Sopenharmony_ci					XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
162cb93a386Sopenharmony_ci					YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
163cb93a386Sopenharmony_ci					WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
164cb93a386Sopenharmony_ci				}
165cb93a386Sopenharmony_ci
166cb93a386Sopenharmony_ci				WWWW = Rcp(WWWW, Precision::Relaxed);
167cb93a386Sopenharmony_ci				XXXX *= WWWW;
168cb93a386Sopenharmony_ci				YYYY *= WWWW;
169cb93a386Sopenharmony_ci
170cb93a386Sopenharmony_ci				XXXX += xxxx;
171cb93a386Sopenharmony_ci				YYYY += yyyy;
172cb93a386Sopenharmony_ci			}
173cb93a386Sopenharmony_ci
174cb93a386Sopenharmony_ci			if(interpolateW())
175cb93a386Sopenharmony_ci			{
176cb93a386Sopenharmony_ci				w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false);
177cb93a386Sopenharmony_ci				rhw = reciprocal(w, false, false, true);
178cb93a386Sopenharmony_ci
179cb93a386Sopenharmony_ci				if(state.centroid || shaderContainsInterpolation)  // TODO(b/194714095)
180cb93a386Sopenharmony_ci				{
181cb93a386Sopenharmony_ci					rhwCentroid = reciprocal(SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false));
182cb93a386Sopenharmony_ci				}
183cb93a386Sopenharmony_ci			}
184cb93a386Sopenharmony_ci
185cb93a386Sopenharmony_ci			if(spirvShader)
186cb93a386Sopenharmony_ci			{
187cb93a386Sopenharmony_ci				if(shaderContainsInterpolation)  // TODO(b/194714095)
188cb93a386Sopenharmony_ci				{
189cb93a386Sopenharmony_ci					routine.interpolationData.primitive = primitive;
190cb93a386Sopenharmony_ci
191cb93a386Sopenharmony_ci					routine.interpolationData.x = xxxx;
192cb93a386Sopenharmony_ci					routine.interpolationData.y = yyyy;
193cb93a386Sopenharmony_ci					routine.interpolationData.rhw = rhw;
194cb93a386Sopenharmony_ci
195cb93a386Sopenharmony_ci					routine.interpolationData.xCentroid = XXXX;
196cb93a386Sopenharmony_ci					routine.interpolationData.yCentroid = YYYY;
197cb93a386Sopenharmony_ci					routine.interpolationData.rhwCentroid = rhwCentroid;
198cb93a386Sopenharmony_ci				}
199cb93a386Sopenharmony_ci
200cb93a386Sopenharmony_ci				if(perSampleShading && (state.multiSampleCount > 1))
201cb93a386Sopenharmony_ci				{
202cb93a386Sopenharmony_ci					xxxx += Float4(Constants::SampleLocationsX[samples[0]]);
203cb93a386Sopenharmony_ci					yyyy += Float4(Constants::SampleLocationsY[samples[0]]);
204cb93a386Sopenharmony_ci				}
205cb93a386Sopenharmony_ci
206cb93a386Sopenharmony_ci				int packedInterpolant = 0;
207cb93a386Sopenharmony_ci				for(int interfaceInterpolant = 0; interfaceInterpolant < MAX_INTERFACE_COMPONENTS; interfaceInterpolant++)
208cb93a386Sopenharmony_ci				{
209cb93a386Sopenharmony_ci					auto const &input = spirvShader->inputs[interfaceInterpolant];
210cb93a386Sopenharmony_ci					if(input.Type != SpirvShader::ATTRIBTYPE_UNUSED)
211cb93a386Sopenharmony_ci					{
212cb93a386Sopenharmony_ci						if(input.Centroid && state.enableMultiSampling)
213cb93a386Sopenharmony_ci						{
214cb93a386Sopenharmony_ci							routine.inputs[interfaceInterpolant] =
215cb93a386Sopenharmony_ci							    SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid,
216cb93a386Sopenharmony_ci							                                  primitive + OFFSET(Primitive, V[packedInterpolant]),
217cb93a386Sopenharmony_ci							                                  input.Flat, !input.NoPerspective);
218cb93a386Sopenharmony_ci						}
219cb93a386Sopenharmony_ci						else if(perSampleShading)
220cb93a386Sopenharmony_ci						{
221cb93a386Sopenharmony_ci							routine.inputs[interfaceInterpolant] =
222cb93a386Sopenharmony_ci							    SpirvRoutine::interpolateAtXY(xxxx, yyyy, rhw,
223cb93a386Sopenharmony_ci							                                  primitive + OFFSET(Primitive, V[packedInterpolant]),
224cb93a386Sopenharmony_ci							                                  input.Flat, !input.NoPerspective);
225cb93a386Sopenharmony_ci						}
226cb93a386Sopenharmony_ci						else
227cb93a386Sopenharmony_ci						{
228cb93a386Sopenharmony_ci							routine.inputs[interfaceInterpolant] =
229cb93a386Sopenharmony_ci							    interpolate(xxxx, Dv[interfaceInterpolant], rhw,
230cb93a386Sopenharmony_ci							                primitive + OFFSET(Primitive, V[packedInterpolant]),
231cb93a386Sopenharmony_ci							                input.Flat, !input.NoPerspective);
232cb93a386Sopenharmony_ci						}
233cb93a386Sopenharmony_ci						packedInterpolant++;
234cb93a386Sopenharmony_ci					}
235cb93a386Sopenharmony_ci				}
236cb93a386Sopenharmony_ci
237cb93a386Sopenharmony_ci				setBuiltins(x, y, unclampedZ, w, cMask, samples);
238cb93a386Sopenharmony_ci
239cb93a386Sopenharmony_ci				for(uint32_t i = 0; i < state.numClipDistances; i++)
240cb93a386Sopenharmony_ci				{
241cb93a386Sopenharmony_ci					auto distance = interpolate(xxxx, DclipDistance[i], rhw,
242cb93a386Sopenharmony_ci					                            primitive + OFFSET(Primitive, clipDistance[i]),
243cb93a386Sopenharmony_ci					                            false, true);
244cb93a386Sopenharmony_ci
245cb93a386Sopenharmony_ci					auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
246cb93a386Sopenharmony_ci					for(unsigned int q : samples)
247cb93a386Sopenharmony_ci					{
248cb93a386Sopenharmony_ci						// FIXME(b/148105887): Fragments discarded by clipping do not exist at
249cb93a386Sopenharmony_ci						// all -- they should not be counted in queries or have their Z/S effects
250cb93a386Sopenharmony_ci						// performed when early fragment tests are enabled.
251cb93a386Sopenharmony_ci						cMask[q] &= clipMask;
252cb93a386Sopenharmony_ci					}
253cb93a386Sopenharmony_ci
254cb93a386Sopenharmony_ci					if(spirvShader->getUsedCapabilities().ClipDistance)
255cb93a386Sopenharmony_ci					{
256cb93a386Sopenharmony_ci						auto it = spirvShader->inputBuiltins.find(spv::BuiltInClipDistance);
257cb93a386Sopenharmony_ci						if(it != spirvShader->inputBuiltins.end())
258cb93a386Sopenharmony_ci						{
259cb93a386Sopenharmony_ci							if(i < it->second.SizeInComponents)
260cb93a386Sopenharmony_ci							{
261cb93a386Sopenharmony_ci								routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = distance;
262cb93a386Sopenharmony_ci							}
263cb93a386Sopenharmony_ci						}
264cb93a386Sopenharmony_ci					}
265cb93a386Sopenharmony_ci				}
266cb93a386Sopenharmony_ci
267cb93a386Sopenharmony_ci				if(spirvShader->getUsedCapabilities().CullDistance)
268cb93a386Sopenharmony_ci				{
269cb93a386Sopenharmony_ci					auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
270cb93a386Sopenharmony_ci					if(it != spirvShader->inputBuiltins.end())
271cb93a386Sopenharmony_ci					{
272cb93a386Sopenharmony_ci						for(uint32_t i = 0; i < state.numCullDistances; i++)
273cb93a386Sopenharmony_ci						{
274cb93a386Sopenharmony_ci							if(i < it->second.SizeInComponents)
275cb93a386Sopenharmony_ci							{
276cb93a386Sopenharmony_ci								routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
277cb93a386Sopenharmony_ci								    interpolate(xxxx, DcullDistance[i], rhw,
278cb93a386Sopenharmony_ci								                primitive + OFFSET(Primitive, cullDistance[i]),
279cb93a386Sopenharmony_ci								                false, true);
280cb93a386Sopenharmony_ci							}
281cb93a386Sopenharmony_ci						}
282cb93a386Sopenharmony_ci					}
283cb93a386Sopenharmony_ci				}
284cb93a386Sopenharmony_ci			}
285cb93a386Sopenharmony_ci
286cb93a386Sopenharmony_ci			if(spirvShader)
287cb93a386Sopenharmony_ci			{
288cb93a386Sopenharmony_ci				executeShader(cMask, earlyFragmentTests ? sMask : cMask, earlyFragmentTests ? zMask : cMask, samples);
289cb93a386Sopenharmony_ci			}
290cb93a386Sopenharmony_ci
291cb93a386Sopenharmony_ci			Bool alphaPass = alphaTest(cMask, samples);
292cb93a386Sopenharmony_ci
293cb93a386Sopenharmony_ci			if((spirvShader && spirvShader->getAnalysis().ContainsKill) || state.alphaToCoverage)
294cb93a386Sopenharmony_ci			{
295cb93a386Sopenharmony_ci				for(unsigned int q : samples)
296cb93a386Sopenharmony_ci				{
297cb93a386Sopenharmony_ci					zMask[q] &= cMask[q];
298cb93a386Sopenharmony_ci					sMask[q] &= cMask[q];
299cb93a386Sopenharmony_ci				}
300cb93a386Sopenharmony_ci			}
301cb93a386Sopenharmony_ci
302cb93a386Sopenharmony_ci			If(alphaPass)
303cb93a386Sopenharmony_ci			{
304cb93a386Sopenharmony_ci				if(!earlyFragmentTests)
305cb93a386Sopenharmony_ci				{
306cb93a386Sopenharmony_ci					for(unsigned int q : samples)
307cb93a386Sopenharmony_ci					{
308cb93a386Sopenharmony_ci						z[q] = clampDepth(z[q]);
309cb93a386Sopenharmony_ci						depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
310cb93a386Sopenharmony_ci						depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]);
311cb93a386Sopenharmony_ci					}
312cb93a386Sopenharmony_ci				}
313cb93a386Sopenharmony_ci
314cb93a386Sopenharmony_ci				If(depthPass)
315cb93a386Sopenharmony_ci				{
316cb93a386Sopenharmony_ci					if(!earlyFragmentTests)
317cb93a386Sopenharmony_ci					{
318cb93a386Sopenharmony_ci						writeDepth(zBuffer, x, zMask, samples);
319cb93a386Sopenharmony_ci					}
320cb93a386Sopenharmony_ci
321cb93a386Sopenharmony_ci					blendColor(cBuffer, x, sMask, zMask, cMask, samples);
322cb93a386Sopenharmony_ci
323cb93a386Sopenharmony_ci					occlusionSampleCount(zMask, sMask, samples);
324cb93a386Sopenharmony_ci				}
325cb93a386Sopenharmony_ci			}
326cb93a386Sopenharmony_ci		}
327cb93a386Sopenharmony_ci
328cb93a386Sopenharmony_ci		writeStencil(sBuffer, x, sMask, zMask, cMask, samples);
329cb93a386Sopenharmony_ci	}
330cb93a386Sopenharmony_ci}
331cb93a386Sopenharmony_ci
332cb93a386Sopenharmony_civoid PixelRoutine::stencilTest(const Pointer<Byte> &sBuffer, const Int &x, Int sMask[4], const SampleSet &samples)
333cb93a386Sopenharmony_ci{
334cb93a386Sopenharmony_ci	if(!state.stencilActive)
335cb93a386Sopenharmony_ci	{
336cb93a386Sopenharmony_ci		return;
337cb93a386Sopenharmony_ci	}
338cb93a386Sopenharmony_ci
339cb93a386Sopenharmony_ci	for(unsigned int q : samples)
340cb93a386Sopenharmony_ci	{
341cb93a386Sopenharmony_ci		// (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
342cb93a386Sopenharmony_ci
343cb93a386Sopenharmony_ci		Pointer<Byte> buffer = sBuffer + x;
344cb93a386Sopenharmony_ci
345cb93a386Sopenharmony_ci		if(q > 0)
346cb93a386Sopenharmony_ci		{
347cb93a386Sopenharmony_ci			buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
348cb93a386Sopenharmony_ci		}
349cb93a386Sopenharmony_ci
350cb93a386Sopenharmony_ci		Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
351cb93a386Sopenharmony_ci		Byte8 value = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
352cb93a386Sopenharmony_ci		value = value | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
353cb93a386Sopenharmony_ci		Byte8 valueBack = value;
354cb93a386Sopenharmony_ci
355cb93a386Sopenharmony_ci		if(state.frontStencil.compareMask != 0xff)
356cb93a386Sopenharmony_ci		{
357cb93a386Sopenharmony_ci			value &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].testMaskQ));
358cb93a386Sopenharmony_ci		}
359cb93a386Sopenharmony_ci
360cb93a386Sopenharmony_ci		stencilTest(value, state.frontStencil.compareOp, false);
361cb93a386Sopenharmony_ci
362cb93a386Sopenharmony_ci		if(state.backStencil.compareMask != 0xff)
363cb93a386Sopenharmony_ci		{
364cb93a386Sopenharmony_ci			valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].testMaskQ));
365cb93a386Sopenharmony_ci		}
366cb93a386Sopenharmony_ci
367cb93a386Sopenharmony_ci		stencilTest(valueBack, state.backStencil.compareOp, true);
368cb93a386Sopenharmony_ci
369cb93a386Sopenharmony_ci		value &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
370cb93a386Sopenharmony_ci		valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
371cb93a386Sopenharmony_ci		value |= valueBack;
372cb93a386Sopenharmony_ci
373cb93a386Sopenharmony_ci		sMask[q] &= SignMask(value);
374cb93a386Sopenharmony_ci	}
375cb93a386Sopenharmony_ci}
376cb93a386Sopenharmony_ci
377cb93a386Sopenharmony_civoid PixelRoutine::stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack)
378cb93a386Sopenharmony_ci{
379cb93a386Sopenharmony_ci	Byte8 equal;
380cb93a386Sopenharmony_ci
381cb93a386Sopenharmony_ci	switch(stencilCompareMode)
382cb93a386Sopenharmony_ci	{
383cb93a386Sopenharmony_ci	case VK_COMPARE_OP_ALWAYS:
384cb93a386Sopenharmony_ci		value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
385cb93a386Sopenharmony_ci		break;
386cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NEVER:
387cb93a386Sopenharmony_ci		value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
388cb93a386Sopenharmony_ci		break;
389cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS:  // a < b ~ b > a
390cb93a386Sopenharmony_ci		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
391cb93a386Sopenharmony_ci		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
392cb93a386Sopenharmony_ci		break;
393cb93a386Sopenharmony_ci	case VK_COMPARE_OP_EQUAL:
394cb93a386Sopenharmony_ci		value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
395cb93a386Sopenharmony_ci		break;
396cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NOT_EQUAL:  // a != b ~ !(a == b)
397cb93a386Sopenharmony_ci		value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
398cb93a386Sopenharmony_ci		value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
399cb93a386Sopenharmony_ci		break;
400cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS_OR_EQUAL:  // a <= b ~ (b > a) || (a == b)
401cb93a386Sopenharmony_ci		equal = value;
402cb93a386Sopenharmony_ci		equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
403cb93a386Sopenharmony_ci		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
404cb93a386Sopenharmony_ci		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
405cb93a386Sopenharmony_ci		value |= equal;
406cb93a386Sopenharmony_ci		break;
407cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER:  // a > b
408cb93a386Sopenharmony_ci		equal = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ));
409cb93a386Sopenharmony_ci		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
410cb93a386Sopenharmony_ci		equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
411cb93a386Sopenharmony_ci		value = equal;
412cb93a386Sopenharmony_ci		break;
413cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER_OR_EQUAL:  // a >= b ~ !(a < b) ~ !(b > a)
414cb93a386Sopenharmony_ci		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
415cb93a386Sopenharmony_ci		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
416cb93a386Sopenharmony_ci		value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
417cb93a386Sopenharmony_ci		break;
418cb93a386Sopenharmony_ci	default:
419cb93a386Sopenharmony_ci		UNSUPPORTED("VkCompareOp: %d", int(stencilCompareMode));
420cb93a386Sopenharmony_ci	}
421cb93a386Sopenharmony_ci}
422cb93a386Sopenharmony_ci
423cb93a386Sopenharmony_ciBool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
424cb93a386Sopenharmony_ci{
425cb93a386Sopenharmony_ci	Float4 Z = z;
426cb93a386Sopenharmony_ci
427cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 4 * x;
428cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
429cb93a386Sopenharmony_ci
430cb93a386Sopenharmony_ci	if(q > 0)
431cb93a386Sopenharmony_ci	{
432cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
433cb93a386Sopenharmony_ci	}
434cb93a386Sopenharmony_ci
435cb93a386Sopenharmony_ci	Float4 zValue;
436cb93a386Sopenharmony_ci
437cb93a386Sopenharmony_ci	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
438cb93a386Sopenharmony_ci	{
439cb93a386Sopenharmony_ci		zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
440cb93a386Sopenharmony_ci	}
441cb93a386Sopenharmony_ci
442cb93a386Sopenharmony_ci	Int4 zTest;
443cb93a386Sopenharmony_ci
444cb93a386Sopenharmony_ci	switch(state.depthCompareMode)
445cb93a386Sopenharmony_ci	{
446cb93a386Sopenharmony_ci	case VK_COMPARE_OP_ALWAYS:
447cb93a386Sopenharmony_ci		// Optimized
448cb93a386Sopenharmony_ci		break;
449cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NEVER:
450cb93a386Sopenharmony_ci		// Optimized
451cb93a386Sopenharmony_ci		break;
452cb93a386Sopenharmony_ci	case VK_COMPARE_OP_EQUAL:
453cb93a386Sopenharmony_ci		zTest = CmpEQ(zValue, Z);
454cb93a386Sopenharmony_ci		break;
455cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NOT_EQUAL:
456cb93a386Sopenharmony_ci		zTest = CmpNEQ(zValue, Z);
457cb93a386Sopenharmony_ci		break;
458cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS:
459cb93a386Sopenharmony_ci		zTest = CmpNLE(zValue, Z);
460cb93a386Sopenharmony_ci		break;
461cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER_OR_EQUAL:
462cb93a386Sopenharmony_ci		zTest = CmpLE(zValue, Z);
463cb93a386Sopenharmony_ci		break;
464cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS_OR_EQUAL:
465cb93a386Sopenharmony_ci		zTest = CmpNLT(zValue, Z);
466cb93a386Sopenharmony_ci		break;
467cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER:
468cb93a386Sopenharmony_ci		zTest = CmpLT(zValue, Z);
469cb93a386Sopenharmony_ci		break;
470cb93a386Sopenharmony_ci	default:
471cb93a386Sopenharmony_ci		UNSUPPORTED("VkCompareOp: %d", int(state.depthCompareMode));
472cb93a386Sopenharmony_ci	}
473cb93a386Sopenharmony_ci
474cb93a386Sopenharmony_ci	switch(state.depthCompareMode)
475cb93a386Sopenharmony_ci	{
476cb93a386Sopenharmony_ci	case VK_COMPARE_OP_ALWAYS:
477cb93a386Sopenharmony_ci		zMask = cMask;
478cb93a386Sopenharmony_ci		break;
479cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NEVER:
480cb93a386Sopenharmony_ci		zMask = 0x0;
481cb93a386Sopenharmony_ci		break;
482cb93a386Sopenharmony_ci	default:
483cb93a386Sopenharmony_ci		zMask = SignMask(zTest) & cMask;
484cb93a386Sopenharmony_ci		break;
485cb93a386Sopenharmony_ci	}
486cb93a386Sopenharmony_ci
487cb93a386Sopenharmony_ci	if(state.stencilActive)
488cb93a386Sopenharmony_ci	{
489cb93a386Sopenharmony_ci		zMask &= sMask;
490cb93a386Sopenharmony_ci	}
491cb93a386Sopenharmony_ci
492cb93a386Sopenharmony_ci	return zMask != 0;
493cb93a386Sopenharmony_ci}
494cb93a386Sopenharmony_ci
495cb93a386Sopenharmony_ciBool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
496cb93a386Sopenharmony_ci{
497cb93a386Sopenharmony_ci	Short4 Z = convertFixed16(z, true);
498cb93a386Sopenharmony_ci
499cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 2 * x;
500cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
501cb93a386Sopenharmony_ci
502cb93a386Sopenharmony_ci	if(q > 0)
503cb93a386Sopenharmony_ci	{
504cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
505cb93a386Sopenharmony_ci	}
506cb93a386Sopenharmony_ci
507cb93a386Sopenharmony_ci	Short4 zValue;
508cb93a386Sopenharmony_ci
509cb93a386Sopenharmony_ci	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
510cb93a386Sopenharmony_ci	{
511cb93a386Sopenharmony_ci		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
512cb93a386Sopenharmony_ci		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
513cb93a386Sopenharmony_ci	}
514cb93a386Sopenharmony_ci
515cb93a386Sopenharmony_ci	Int4 zTest;
516cb93a386Sopenharmony_ci
517cb93a386Sopenharmony_ci	// Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
518cb93a386Sopenharmony_ci	zValue = zValue - Short4(0x8000u);
519cb93a386Sopenharmony_ci	Z = Z - Short4(0x8000u);
520cb93a386Sopenharmony_ci
521cb93a386Sopenharmony_ci	switch(state.depthCompareMode)
522cb93a386Sopenharmony_ci	{
523cb93a386Sopenharmony_ci	case VK_COMPARE_OP_ALWAYS:
524cb93a386Sopenharmony_ci		// Optimized
525cb93a386Sopenharmony_ci		break;
526cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NEVER:
527cb93a386Sopenharmony_ci		// Optimized
528cb93a386Sopenharmony_ci		break;
529cb93a386Sopenharmony_ci	case VK_COMPARE_OP_EQUAL:
530cb93a386Sopenharmony_ci		zTest = Int4(CmpEQ(zValue, Z));
531cb93a386Sopenharmony_ci		break;
532cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NOT_EQUAL:
533cb93a386Sopenharmony_ci		zTest = ~Int4(CmpEQ(zValue, Z));
534cb93a386Sopenharmony_ci		break;
535cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS:
536cb93a386Sopenharmony_ci		zTest = Int4(CmpGT(zValue, Z));
537cb93a386Sopenharmony_ci		break;
538cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER_OR_EQUAL:
539cb93a386Sopenharmony_ci		zTest = ~Int4(CmpGT(zValue, Z));
540cb93a386Sopenharmony_ci		break;
541cb93a386Sopenharmony_ci	case VK_COMPARE_OP_LESS_OR_EQUAL:
542cb93a386Sopenharmony_ci		zTest = ~Int4(CmpGT(Z, zValue));
543cb93a386Sopenharmony_ci		break;
544cb93a386Sopenharmony_ci	case VK_COMPARE_OP_GREATER:
545cb93a386Sopenharmony_ci		zTest = Int4(CmpGT(Z, zValue));
546cb93a386Sopenharmony_ci		break;
547cb93a386Sopenharmony_ci	default:
548cb93a386Sopenharmony_ci		UNSUPPORTED("VkCompareOp: %d", int(state.depthCompareMode));
549cb93a386Sopenharmony_ci	}
550cb93a386Sopenharmony_ci
551cb93a386Sopenharmony_ci	switch(state.depthCompareMode)
552cb93a386Sopenharmony_ci	{
553cb93a386Sopenharmony_ci	case VK_COMPARE_OP_ALWAYS:
554cb93a386Sopenharmony_ci		zMask = cMask;
555cb93a386Sopenharmony_ci		break;
556cb93a386Sopenharmony_ci	case VK_COMPARE_OP_NEVER:
557cb93a386Sopenharmony_ci		zMask = 0x0;
558cb93a386Sopenharmony_ci		break;
559cb93a386Sopenharmony_ci	default:
560cb93a386Sopenharmony_ci		zMask = SignMask(zTest) & cMask;
561cb93a386Sopenharmony_ci		break;
562cb93a386Sopenharmony_ci	}
563cb93a386Sopenharmony_ci
564cb93a386Sopenharmony_ci	if(state.stencilActive)
565cb93a386Sopenharmony_ci	{
566cb93a386Sopenharmony_ci		zMask &= sMask;
567cb93a386Sopenharmony_ci	}
568cb93a386Sopenharmony_ci
569cb93a386Sopenharmony_ci	return zMask != 0;
570cb93a386Sopenharmony_ci}
571cb93a386Sopenharmony_ci
572cb93a386Sopenharmony_ciFloat4 PixelRoutine::clampDepth(const Float4 &z)
573cb93a386Sopenharmony_ci{
574cb93a386Sopenharmony_ci	if(!state.depthClamp)
575cb93a386Sopenharmony_ci	{
576cb93a386Sopenharmony_ci		return z;
577cb93a386Sopenharmony_ci	}
578cb93a386Sopenharmony_ci
579cb93a386Sopenharmony_ci	return Min(Max(z, Float4(state.minDepthClamp)), Float4(state.maxDepthClamp));
580cb93a386Sopenharmony_ci}
581cb93a386Sopenharmony_ci
582cb93a386Sopenharmony_ciBool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
583cb93a386Sopenharmony_ci{
584cb93a386Sopenharmony_ci	if(!state.depthTestActive)
585cb93a386Sopenharmony_ci	{
586cb93a386Sopenharmony_ci		return true;
587cb93a386Sopenharmony_ci	}
588cb93a386Sopenharmony_ci
589cb93a386Sopenharmony_ci	switch(state.depthFormat)
590cb93a386Sopenharmony_ci	{
591cb93a386Sopenharmony_ci	case VK_FORMAT_D16_UNORM:
592cb93a386Sopenharmony_ci		return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
593cb93a386Sopenharmony_ci	case VK_FORMAT_D32_SFLOAT:
594cb93a386Sopenharmony_ci	case VK_FORMAT_D32_SFLOAT_S8_UINT:
595cb93a386Sopenharmony_ci		return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
596cb93a386Sopenharmony_ci	default:
597cb93a386Sopenharmony_ci		UNSUPPORTED("Depth format: %d", int(state.depthFormat));
598cb93a386Sopenharmony_ci		return false;
599cb93a386Sopenharmony_ci	}
600cb93a386Sopenharmony_ci}
601cb93a386Sopenharmony_ci
602cb93a386Sopenharmony_ciInt4 PixelRoutine::depthBoundsTest16(const Pointer<Byte> &zBuffer, int q, const Int &x)
603cb93a386Sopenharmony_ci{
604cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 2 * x;
605cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
606cb93a386Sopenharmony_ci
607cb93a386Sopenharmony_ci	if(q > 0)
608cb93a386Sopenharmony_ci	{
609cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
610cb93a386Sopenharmony_ci	}
611cb93a386Sopenharmony_ci
612cb93a386Sopenharmony_ci	Float4 minDepthBound(state.minDepthBounds);
613cb93a386Sopenharmony_ci	Float4 maxDepthBound(state.maxDepthBounds);
614cb93a386Sopenharmony_ci
615cb93a386Sopenharmony_ci	Int2 z;
616cb93a386Sopenharmony_ci	z = Insert(z, *Pointer<Int>(buffer), 0);
617cb93a386Sopenharmony_ci	z = Insert(z, *Pointer<Int>(buffer + pitch), 1);
618cb93a386Sopenharmony_ci
619cb93a386Sopenharmony_ci	Float4 zValue = convertFloat32(As<UShort4>(z));
620cb93a386Sopenharmony_ci	return Int4(CmpLE(minDepthBound, zValue) & CmpLE(zValue, maxDepthBound));
621cb93a386Sopenharmony_ci}
622cb93a386Sopenharmony_ci
623cb93a386Sopenharmony_ciInt4 PixelRoutine::depthBoundsTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x)
624cb93a386Sopenharmony_ci{
625cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 4 * x;
626cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
627cb93a386Sopenharmony_ci
628cb93a386Sopenharmony_ci	if(q > 0)
629cb93a386Sopenharmony_ci	{
630cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
631cb93a386Sopenharmony_ci	}
632cb93a386Sopenharmony_ci
633cb93a386Sopenharmony_ci	Float4 zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
634cb93a386Sopenharmony_ci	return Int4(CmpLE(Float4(state.minDepthBounds), zValue) & CmpLE(zValue, Float4(state.maxDepthBounds)));
635cb93a386Sopenharmony_ci}
636cb93a386Sopenharmony_ci
637cb93a386Sopenharmony_civoid PixelRoutine::depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask)
638cb93a386Sopenharmony_ci{
639cb93a386Sopenharmony_ci	if(!state.depthBoundsTestActive)
640cb93a386Sopenharmony_ci	{
641cb93a386Sopenharmony_ci		return;
642cb93a386Sopenharmony_ci	}
643cb93a386Sopenharmony_ci
644cb93a386Sopenharmony_ci	Int4 zTest;
645cb93a386Sopenharmony_ci	switch(state.depthFormat)
646cb93a386Sopenharmony_ci	{
647cb93a386Sopenharmony_ci	case VK_FORMAT_D16_UNORM:
648cb93a386Sopenharmony_ci		zTest = depthBoundsTest16(zBuffer, q, x);
649cb93a386Sopenharmony_ci		break;
650cb93a386Sopenharmony_ci	case VK_FORMAT_D32_SFLOAT:
651cb93a386Sopenharmony_ci	case VK_FORMAT_D32_SFLOAT_S8_UINT:
652cb93a386Sopenharmony_ci		zTest = depthBoundsTest32F(zBuffer, q, x);
653cb93a386Sopenharmony_ci		break;
654cb93a386Sopenharmony_ci	default:
655cb93a386Sopenharmony_ci		UNSUPPORTED("Depth format: %d", int(state.depthFormat));
656cb93a386Sopenharmony_ci		break;
657cb93a386Sopenharmony_ci	}
658cb93a386Sopenharmony_ci
659cb93a386Sopenharmony_ci	if(!state.depthTestActive)
660cb93a386Sopenharmony_ci	{
661cb93a386Sopenharmony_ci		cMask &= zMask & SignMask(zTest);
662cb93a386Sopenharmony_ci	}
663cb93a386Sopenharmony_ci	else
664cb93a386Sopenharmony_ci	{
665cb93a386Sopenharmony_ci		zMask &= cMask & SignMask(zTest);
666cb93a386Sopenharmony_ci	}
667cb93a386Sopenharmony_ci}
668cb93a386Sopenharmony_ci
669cb93a386Sopenharmony_civoid PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples)
670cb93a386Sopenharmony_ci{
671cb93a386Sopenharmony_ci	static const int a2c[4] = {
672cb93a386Sopenharmony_ci		OFFSET(DrawData, a2c0),
673cb93a386Sopenharmony_ci		OFFSET(DrawData, a2c1),
674cb93a386Sopenharmony_ci		OFFSET(DrawData, a2c2),
675cb93a386Sopenharmony_ci		OFFSET(DrawData, a2c3),
676cb93a386Sopenharmony_ci	};
677cb93a386Sopenharmony_ci
678cb93a386Sopenharmony_ci	for(unsigned int q : samples)
679cb93a386Sopenharmony_ci	{
680cb93a386Sopenharmony_ci		Int4 coverage = CmpNLT(alpha, *Pointer<Float4>(data + a2c[q]));
681cb93a386Sopenharmony_ci		Int aMask = SignMask(coverage);
682cb93a386Sopenharmony_ci		cMask[q] &= aMask;
683cb93a386Sopenharmony_ci	}
684cb93a386Sopenharmony_ci}
685cb93a386Sopenharmony_ci
686cb93a386Sopenharmony_civoid PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
687cb93a386Sopenharmony_ci{
688cb93a386Sopenharmony_ci	Float4 Z = z;
689cb93a386Sopenharmony_ci
690cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 4 * x;
691cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
692cb93a386Sopenharmony_ci
693cb93a386Sopenharmony_ci	if(q > 0)
694cb93a386Sopenharmony_ci	{
695cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
696cb93a386Sopenharmony_ci	}
697cb93a386Sopenharmony_ci
698cb93a386Sopenharmony_ci	Float4 zValue;
699cb93a386Sopenharmony_ci
700cb93a386Sopenharmony_ci	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
701cb93a386Sopenharmony_ci	{
702cb93a386Sopenharmony_ci		zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
703cb93a386Sopenharmony_ci	}
704cb93a386Sopenharmony_ci
705cb93a386Sopenharmony_ci	Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
706cb93a386Sopenharmony_ci	zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
707cb93a386Sopenharmony_ci	Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
708cb93a386Sopenharmony_ci
709cb93a386Sopenharmony_ci	*Pointer<Float2>(buffer) = Float2(Z.xy);
710cb93a386Sopenharmony_ci	*Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
711cb93a386Sopenharmony_ci}
712cb93a386Sopenharmony_ci
713cb93a386Sopenharmony_civoid PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
714cb93a386Sopenharmony_ci{
715cb93a386Sopenharmony_ci	Short4 Z = As<Short4>(convertFixed16(z, true));
716cb93a386Sopenharmony_ci
717cb93a386Sopenharmony_ci	Pointer<Byte> buffer = zBuffer + 2 * x;
718cb93a386Sopenharmony_ci	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
719cb93a386Sopenharmony_ci
720cb93a386Sopenharmony_ci	if(q > 0)
721cb93a386Sopenharmony_ci	{
722cb93a386Sopenharmony_ci		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
723cb93a386Sopenharmony_ci	}
724cb93a386Sopenharmony_ci
725cb93a386Sopenharmony_ci	Short4 zValue;
726cb93a386Sopenharmony_ci
727cb93a386Sopenharmony_ci	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
728cb93a386Sopenharmony_ci	{
729cb93a386Sopenharmony_ci		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
730cb93a386Sopenharmony_ci		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
731cb93a386Sopenharmony_ci	}
732cb93a386Sopenharmony_ci
733cb93a386Sopenharmony_ci	Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
734cb93a386Sopenharmony_ci	zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
735cb93a386Sopenharmony_ci	Z = Z | zValue;
736cb93a386Sopenharmony_ci
737cb93a386Sopenharmony_ci	*Pointer<Int>(buffer) = Extract(As<Int2>(Z), 0);
738cb93a386Sopenharmony_ci	*Pointer<Int>(buffer + pitch) = Extract(As<Int2>(Z), 1);
739cb93a386Sopenharmony_ci}
740cb93a386Sopenharmony_ci
741cb93a386Sopenharmony_civoid PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, const Int &x, const Int zMask[4], const SampleSet &samples)
742cb93a386Sopenharmony_ci{
743cb93a386Sopenharmony_ci	if(!state.depthWriteEnable)
744cb93a386Sopenharmony_ci	{
745cb93a386Sopenharmony_ci		return;
746cb93a386Sopenharmony_ci	}
747cb93a386Sopenharmony_ci
748cb93a386Sopenharmony_ci	for(unsigned int q : samples)
749cb93a386Sopenharmony_ci	{
750cb93a386Sopenharmony_ci		switch(state.depthFormat)
751cb93a386Sopenharmony_ci		{
752cb93a386Sopenharmony_ci		case VK_FORMAT_D16_UNORM:
753cb93a386Sopenharmony_ci			writeDepth16(zBuffer, q, x, z[q], zMask[q]);
754cb93a386Sopenharmony_ci			break;
755cb93a386Sopenharmony_ci		case VK_FORMAT_D32_SFLOAT:
756cb93a386Sopenharmony_ci		case VK_FORMAT_D32_SFLOAT_S8_UINT:
757cb93a386Sopenharmony_ci			writeDepth32F(zBuffer, q, x, z[q], zMask[q]);
758cb93a386Sopenharmony_ci			break;
759cb93a386Sopenharmony_ci		default:
760cb93a386Sopenharmony_ci			UNSUPPORTED("Depth format: %d", int(state.depthFormat));
761cb93a386Sopenharmony_ci			break;
762cb93a386Sopenharmony_ci		}
763cb93a386Sopenharmony_ci	}
764cb93a386Sopenharmony_ci}
765cb93a386Sopenharmony_ci
766cb93a386Sopenharmony_civoid PixelRoutine::occlusionSampleCount(const Int zMask[4], const Int sMask[4], const SampleSet &samples)
767cb93a386Sopenharmony_ci{
768cb93a386Sopenharmony_ci	if(!state.occlusionEnabled)
769cb93a386Sopenharmony_ci	{
770cb93a386Sopenharmony_ci		return;
771cb93a386Sopenharmony_ci	}
772cb93a386Sopenharmony_ci
773cb93a386Sopenharmony_ci	for(unsigned int q : samples)
774cb93a386Sopenharmony_ci	{
775cb93a386Sopenharmony_ci		occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q]));
776cb93a386Sopenharmony_ci	}
777cb93a386Sopenharmony_ci}
778cb93a386Sopenharmony_ci
779cb93a386Sopenharmony_civoid PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, const Int &x, const Int sMask[4], const Int zMask[4], const Int cMask[4], const SampleSet &samples)
780cb93a386Sopenharmony_ci{
781cb93a386Sopenharmony_ci	if(!state.stencilActive)
782cb93a386Sopenharmony_ci	{
783cb93a386Sopenharmony_ci		return;
784cb93a386Sopenharmony_ci	}
785cb93a386Sopenharmony_ci
786cb93a386Sopenharmony_ci	if(state.frontStencil.passOp == VK_STENCIL_OP_KEEP && state.frontStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.frontStencil.failOp == VK_STENCIL_OP_KEEP)
787cb93a386Sopenharmony_ci	{
788cb93a386Sopenharmony_ci		if(state.backStencil.passOp == VK_STENCIL_OP_KEEP && state.backStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.backStencil.failOp == VK_STENCIL_OP_KEEP)
789cb93a386Sopenharmony_ci		{
790cb93a386Sopenharmony_ci			return;
791cb93a386Sopenharmony_ci		}
792cb93a386Sopenharmony_ci	}
793cb93a386Sopenharmony_ci
794cb93a386Sopenharmony_ci	if((state.frontStencil.writeMask == 0) && (state.backStencil.writeMask == 0))
795cb93a386Sopenharmony_ci	{
796cb93a386Sopenharmony_ci		return;
797cb93a386Sopenharmony_ci	}
798cb93a386Sopenharmony_ci
799cb93a386Sopenharmony_ci	for(unsigned int q : samples)
800cb93a386Sopenharmony_ci	{
801cb93a386Sopenharmony_ci		Pointer<Byte> buffer = sBuffer + x;
802cb93a386Sopenharmony_ci
803cb93a386Sopenharmony_ci		if(q > 0)
804cb93a386Sopenharmony_ci		{
805cb93a386Sopenharmony_ci			buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
806cb93a386Sopenharmony_ci		}
807cb93a386Sopenharmony_ci
808cb93a386Sopenharmony_ci		Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
809cb93a386Sopenharmony_ci		Byte8 bufferValue = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
810cb93a386Sopenharmony_ci		bufferValue = bufferValue | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
811cb93a386Sopenharmony_ci		Byte8 newValue;
812cb93a386Sopenharmony_ci		stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask[q], sMask[q]);
813cb93a386Sopenharmony_ci
814cb93a386Sopenharmony_ci		if((state.frontStencil.writeMask & 0xFF) != 0xFF)  // Assume 8-bit stencil buffer
815cb93a386Sopenharmony_ci		{
816cb93a386Sopenharmony_ci			Byte8 maskedValue = bufferValue;
817cb93a386Sopenharmony_ci			newValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].writeMaskQ));
818cb93a386Sopenharmony_ci			maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].invWriteMaskQ));
819cb93a386Sopenharmony_ci			newValue |= maskedValue;
820cb93a386Sopenharmony_ci		}
821cb93a386Sopenharmony_ci
822cb93a386Sopenharmony_ci		Byte8 newValueBack;
823cb93a386Sopenharmony_ci
824cb93a386Sopenharmony_ci		stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask[q], sMask[q]);
825cb93a386Sopenharmony_ci
826cb93a386Sopenharmony_ci		if((state.backStencil.writeMask & 0xFF) != 0xFF)  // Assume 8-bit stencil buffer
827cb93a386Sopenharmony_ci		{
828cb93a386Sopenharmony_ci			Byte8 maskedValue = bufferValue;
829cb93a386Sopenharmony_ci			newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].writeMaskQ));
830cb93a386Sopenharmony_ci			maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].invWriteMaskQ));
831cb93a386Sopenharmony_ci			newValueBack |= maskedValue;
832cb93a386Sopenharmony_ci		}
833cb93a386Sopenharmony_ci
834cb93a386Sopenharmony_ci		newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
835cb93a386Sopenharmony_ci		newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
836cb93a386Sopenharmony_ci		newValue |= newValueBack;
837cb93a386Sopenharmony_ci
838cb93a386Sopenharmony_ci		newValue &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * cMask[q]);
839cb93a386Sopenharmony_ci		bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * cMask[q]);
840cb93a386Sopenharmony_ci		newValue |= bufferValue;
841cb93a386Sopenharmony_ci
842cb93a386Sopenharmony_ci		*Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0);
843cb93a386Sopenharmony_ci		*Pointer<Short>(buffer + pitch) = Extract(As<Short4>(newValue), 1);
844cb93a386Sopenharmony_ci	}
845cb93a386Sopenharmony_ci}
846cb93a386Sopenharmony_ci
847cb93a386Sopenharmony_civoid PixelRoutine::stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask)
848cb93a386Sopenharmony_ci{
849cb93a386Sopenharmony_ci	Byte8 &pass = newValue;
850cb93a386Sopenharmony_ci	Byte8 fail;
851cb93a386Sopenharmony_ci	Byte8 zFail;
852cb93a386Sopenharmony_ci
853cb93a386Sopenharmony_ci	stencilOperation(pass, bufferValue, ops.passOp, isBack);
854cb93a386Sopenharmony_ci
855cb93a386Sopenharmony_ci	if(ops.depthFailOp != ops.passOp)
856cb93a386Sopenharmony_ci	{
857cb93a386Sopenharmony_ci		stencilOperation(zFail, bufferValue, ops.depthFailOp, isBack);
858cb93a386Sopenharmony_ci	}
859cb93a386Sopenharmony_ci
860cb93a386Sopenharmony_ci	if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
861cb93a386Sopenharmony_ci	{
862cb93a386Sopenharmony_ci		stencilOperation(fail, bufferValue, ops.failOp, isBack);
863cb93a386Sopenharmony_ci	}
864cb93a386Sopenharmony_ci
865cb93a386Sopenharmony_ci	if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
866cb93a386Sopenharmony_ci	{
867cb93a386Sopenharmony_ci		if(state.depthTestActive && ops.depthFailOp != ops.passOp)  // zMask valid and values not the same
868cb93a386Sopenharmony_ci		{
869cb93a386Sopenharmony_ci			pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * zMask);
870cb93a386Sopenharmony_ci			zFail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * zMask);
871cb93a386Sopenharmony_ci			pass |= zFail;
872cb93a386Sopenharmony_ci		}
873cb93a386Sopenharmony_ci
874cb93a386Sopenharmony_ci		pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * sMask);
875cb93a386Sopenharmony_ci		fail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * sMask);
876cb93a386Sopenharmony_ci		pass |= fail;
877cb93a386Sopenharmony_ci	}
878cb93a386Sopenharmony_ci}
879cb93a386Sopenharmony_ci
880cb93a386Sopenharmony_ciByte8 PixelRoutine::stencilReplaceRef(bool isBack)
881cb93a386Sopenharmony_ci{
882cb93a386Sopenharmony_ci	if(spirvShader)
883cb93a386Sopenharmony_ci	{
884cb93a386Sopenharmony_ci		auto it = spirvShader->outputBuiltins.find(spv::BuiltInFragStencilRefEXT);
885cb93a386Sopenharmony_ci		if(it != spirvShader->outputBuiltins.end())
886cb93a386Sopenharmony_ci		{
887cb93a386Sopenharmony_ci			UInt4 sRef = As<UInt4>(routine.getVariable(it->second.Id)[it->second.FirstComponent]) & UInt4(0xff);
888cb93a386Sopenharmony_ci			// TODO (b/148295813): Could be done with a single pshufb instruction. Optimize the
889cb93a386Sopenharmony_ci			//                     following line by either adding a rr::Shuffle() variant to do
890cb93a386Sopenharmony_ci			//                     it explicitly or adding a Byte4(Int4) constructor would work.
891cb93a386Sopenharmony_ci			sRef.x = rr::UInt(sRef.x) | (rr::UInt(sRef.y) << 8) | (rr::UInt(sRef.z) << 16) | (rr::UInt(sRef.w) << 24);
892cb93a386Sopenharmony_ci
893cb93a386Sopenharmony_ci			UInt2 sRefDuplicated;
894cb93a386Sopenharmony_ci			sRefDuplicated = Insert(sRefDuplicated, sRef.x, 0);
895cb93a386Sopenharmony_ci			sRefDuplicated = Insert(sRefDuplicated, sRef.x, 1);
896cb93a386Sopenharmony_ci			return As<Byte8>(sRefDuplicated);
897cb93a386Sopenharmony_ci		}
898cb93a386Sopenharmony_ci	}
899cb93a386Sopenharmony_ci
900cb93a386Sopenharmony_ci	return *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
901cb93a386Sopenharmony_ci}
902cb93a386Sopenharmony_ci
903cb93a386Sopenharmony_civoid PixelRoutine::stencilOperation(Byte8 &output, const Byte8 &bufferValue, VkStencilOp operation, bool isBack)
904cb93a386Sopenharmony_ci{
905cb93a386Sopenharmony_ci	switch(operation)
906cb93a386Sopenharmony_ci	{
907cb93a386Sopenharmony_ci	case VK_STENCIL_OP_KEEP:
908cb93a386Sopenharmony_ci		output = bufferValue;
909cb93a386Sopenharmony_ci		break;
910cb93a386Sopenharmony_ci	case VK_STENCIL_OP_ZERO:
911cb93a386Sopenharmony_ci		output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
912cb93a386Sopenharmony_ci		break;
913cb93a386Sopenharmony_ci	case VK_STENCIL_OP_REPLACE:
914cb93a386Sopenharmony_ci		output = stencilReplaceRef(isBack);
915cb93a386Sopenharmony_ci		break;
916cb93a386Sopenharmony_ci	case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
917cb93a386Sopenharmony_ci		output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
918cb93a386Sopenharmony_ci		break;
919cb93a386Sopenharmony_ci	case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
920cb93a386Sopenharmony_ci		output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
921cb93a386Sopenharmony_ci		break;
922cb93a386Sopenharmony_ci	case VK_STENCIL_OP_INVERT:
923cb93a386Sopenharmony_ci		output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
924cb93a386Sopenharmony_ci		break;
925cb93a386Sopenharmony_ci	case VK_STENCIL_OP_INCREMENT_AND_WRAP:
926cb93a386Sopenharmony_ci		output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
927cb93a386Sopenharmony_ci		break;
928cb93a386Sopenharmony_ci	case VK_STENCIL_OP_DECREMENT_AND_WRAP:
929cb93a386Sopenharmony_ci		output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
930cb93a386Sopenharmony_ci		break;
931cb93a386Sopenharmony_ci	default:
932cb93a386Sopenharmony_ci		UNSUPPORTED("VkStencilOp: %d", int(operation));
933cb93a386Sopenharmony_ci	}
934cb93a386Sopenharmony_ci}
935cb93a386Sopenharmony_ci
936cb93a386Sopenharmony_cibool PixelRoutine::isSRGB(int index) const
937cb93a386Sopenharmony_ci{
938cb93a386Sopenharmony_ci	return vk::Format(state.colorFormat[index]).isSRGBformat();
939cb93a386Sopenharmony_ci}
940cb93a386Sopenharmony_ci
941cb93a386Sopenharmony_civoid PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &pixel)
942cb93a386Sopenharmony_ci{
943cb93a386Sopenharmony_ci	Short4 c01;
944cb93a386Sopenharmony_ci	Short4 c23;
945cb93a386Sopenharmony_ci	Pointer<Byte> buffer = cBuffer;
946cb93a386Sopenharmony_ci	Pointer<Byte> buffer2;
947cb93a386Sopenharmony_ci
948cb93a386Sopenharmony_ci	Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
949cb93a386Sopenharmony_ci
950cb93a386Sopenharmony_ci	switch(state.colorFormat[index])
951cb93a386Sopenharmony_ci	{
952cb93a386Sopenharmony_ci	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
953cb93a386Sopenharmony_ci		buffer += 2 * x;
954cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
955cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
956cb93a386Sopenharmony_ci
957cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0xF000u));
958cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x0F00u)) << 4;
959cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x00F0u)) << 8;
960cb93a386Sopenharmony_ci		pixel.w = (c01 & Short4(0x000Fu)) << 12;
961cb93a386Sopenharmony_ci
962cb93a386Sopenharmony_ci		// Expand to 16 bit range
963cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
964cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
965cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
966cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
967cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
968cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
969cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
970cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
971cb93a386Sopenharmony_ci		break;
972cb93a386Sopenharmony_ci	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
973cb93a386Sopenharmony_ci		buffer += 2 * x;
974cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
975cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
976cb93a386Sopenharmony_ci
977cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0xF000u));
978cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x0F00u)) << 4;
979cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x00F0u)) << 8;
980cb93a386Sopenharmony_ci		pixel.w = (c01 & Short4(0x000Fu)) << 12;
981cb93a386Sopenharmony_ci
982cb93a386Sopenharmony_ci		// Expand to 16 bit range
983cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
984cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
985cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
986cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
987cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
988cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
989cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
990cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
991cb93a386Sopenharmony_ci		break;
992cb93a386Sopenharmony_ci	case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
993cb93a386Sopenharmony_ci		buffer += 2 * x;
994cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
995cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
996cb93a386Sopenharmony_ci
997cb93a386Sopenharmony_ci		pixel.w = (c01 & Short4(0xF000u));
998cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x0F00u)) << 4;
999cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x00F0u)) << 8;
1000cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x000Fu)) << 12;
1001cb93a386Sopenharmony_ci
1002cb93a386Sopenharmony_ci		// Expand to 16 bit range
1003cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
1004cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
1005cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
1006cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
1007cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
1008cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
1009cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
1010cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
1011cb93a386Sopenharmony_ci		break;
1012cb93a386Sopenharmony_ci	case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1013cb93a386Sopenharmony_ci		buffer += 2 * x;
1014cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1015cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1016cb93a386Sopenharmony_ci
1017cb93a386Sopenharmony_ci		pixel.w = (c01 & Short4(0xF000u));
1018cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x0F00u)) << 4;
1019cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x00F0u)) << 8;
1020cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x000Fu)) << 12;
1021cb93a386Sopenharmony_ci
1022cb93a386Sopenharmony_ci		// Expand to 16 bit range
1023cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
1024cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
1025cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
1026cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
1027cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
1028cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
1029cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
1030cb93a386Sopenharmony_ci		pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
1031cb93a386Sopenharmony_ci		break;
1032cb93a386Sopenharmony_ci	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1033cb93a386Sopenharmony_ci		buffer += 2 * x;
1034cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1035cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1036cb93a386Sopenharmony_ci
1037cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0xF800u));
1038cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x07C0u)) << 5;
1039cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x003Eu)) << 10;
1040cb93a386Sopenharmony_ci		pixel.w = ((c01 & Short4(0x0001u)) << 15) >> 15;
1041cb93a386Sopenharmony_ci
1042cb93a386Sopenharmony_ci		// Expand to 16 bit range
1043cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
1044cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1045cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
1046cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
1047cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
1048cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1049cb93a386Sopenharmony_ci		break;
1050cb93a386Sopenharmony_ci	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1051cb93a386Sopenharmony_ci		buffer += 2 * x;
1052cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1053cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1054cb93a386Sopenharmony_ci
1055cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0xF800u));
1056cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x07C0u)) << 5;
1057cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x003Eu)) << 10;
1058cb93a386Sopenharmony_ci		pixel.w = ((c01 & Short4(0x0001u)) << 15) >> 15;
1059cb93a386Sopenharmony_ci
1060cb93a386Sopenharmony_ci		// Expand to 16 bit range
1061cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
1062cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1063cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
1064cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
1065cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
1066cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1067cb93a386Sopenharmony_ci		break;
1068cb93a386Sopenharmony_ci	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1069cb93a386Sopenharmony_ci		buffer += 2 * x;
1070cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1071cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1072cb93a386Sopenharmony_ci
1073cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x7C00u)) << 1;
1074cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x03E0u)) << 6;
1075cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x001Fu)) << 11;
1076cb93a386Sopenharmony_ci		pixel.w = (c01 & Short4(0x8000u)) >> 15;
1077cb93a386Sopenharmony_ci
1078cb93a386Sopenharmony_ci		// Expand to 16 bit range
1079cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
1080cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1081cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
1082cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
1083cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
1084cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1085cb93a386Sopenharmony_ci		break;
1086cb93a386Sopenharmony_ci	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1087cb93a386Sopenharmony_ci		buffer += 2 * x;
1088cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1089cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1090cb93a386Sopenharmony_ci
1091cb93a386Sopenharmony_ci		pixel.x = c01 & Short4(0xF800u);
1092cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x07E0u)) << 5;
1093cb93a386Sopenharmony_ci		pixel.z = (c01 & Short4(0x001Fu)) << 11;
1094cb93a386Sopenharmony_ci		pixel.w = Short4(0xFFFFu);
1095cb93a386Sopenharmony_ci
1096cb93a386Sopenharmony_ci		// Expand to 16 bit range
1097cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
1098cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1099cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
1100cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
1101cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
1102cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1103cb93a386Sopenharmony_ci		break;
1104cb93a386Sopenharmony_ci	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1105cb93a386Sopenharmony_ci		buffer += 2 * x;
1106cb93a386Sopenharmony_ci		buffer2 = buffer + pitchB;
1107cb93a386Sopenharmony_ci		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
1108cb93a386Sopenharmony_ci
1109cb93a386Sopenharmony_ci		pixel.z = c01 & Short4(0xF800u);
1110cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0x07E0u)) << 5;
1111cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x001Fu)) << 11;
1112cb93a386Sopenharmony_ci		pixel.w = Short4(0xFFFFu);
1113cb93a386Sopenharmony_ci
1114cb93a386Sopenharmony_ci		// Expand to 16 bit range
1115cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
1116cb93a386Sopenharmony_ci		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1117cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
1118cb93a386Sopenharmony_ci		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
1119cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
1120cb93a386Sopenharmony_ci		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1121cb93a386Sopenharmony_ci		break;
1122cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_UNORM:
1123cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_SRGB:
1124cb93a386Sopenharmony_ci		buffer += 4 * x;
1125cb93a386Sopenharmony_ci		c01 = *Pointer<Short4>(buffer);
1126cb93a386Sopenharmony_ci		buffer += pitchB;
1127cb93a386Sopenharmony_ci		c23 = *Pointer<Short4>(buffer);
1128cb93a386Sopenharmony_ci		pixel.z = c01;
1129cb93a386Sopenharmony_ci		pixel.y = c01;
1130cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1131cb93a386Sopenharmony_ci		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1132cb93a386Sopenharmony_ci		pixel.x = pixel.z;
1133cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1134cb93a386Sopenharmony_ci		pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1135cb93a386Sopenharmony_ci		pixel.y = pixel.z;
1136cb93a386Sopenharmony_ci		pixel.w = pixel.x;
1137cb93a386Sopenharmony_ci		pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1138cb93a386Sopenharmony_ci		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1139cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1140cb93a386Sopenharmony_ci		pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1141cb93a386Sopenharmony_ci		break;
1142cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UNORM:
1143cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SRGB:
1144cb93a386Sopenharmony_ci		buffer += 4 * x;
1145cb93a386Sopenharmony_ci		c01 = *Pointer<Short4>(buffer);
1146cb93a386Sopenharmony_ci		buffer += pitchB;
1147cb93a386Sopenharmony_ci		c23 = *Pointer<Short4>(buffer);
1148cb93a386Sopenharmony_ci		pixel.z = c01;
1149cb93a386Sopenharmony_ci		pixel.y = c01;
1150cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1151cb93a386Sopenharmony_ci		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1152cb93a386Sopenharmony_ci		pixel.x = pixel.z;
1153cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1154cb93a386Sopenharmony_ci		pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1155cb93a386Sopenharmony_ci		pixel.y = pixel.z;
1156cb93a386Sopenharmony_ci		pixel.w = pixel.x;
1157cb93a386Sopenharmony_ci		pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1158cb93a386Sopenharmony_ci		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1159cb93a386Sopenharmony_ci		pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1160cb93a386Sopenharmony_ci		pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1161cb93a386Sopenharmony_ci		break;
1162cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UNORM:
1163cb93a386Sopenharmony_ci		buffer += 1 * x;
1164cb93a386Sopenharmony_ci		pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1165cb93a386Sopenharmony_ci		buffer += pitchB;
1166cb93a386Sopenharmony_ci		pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1167cb93a386Sopenharmony_ci		pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1168cb93a386Sopenharmony_ci		pixel.y = Short4(0x0000);
1169cb93a386Sopenharmony_ci		pixel.z = Short4(0x0000);
1170cb93a386Sopenharmony_ci		pixel.w = Short4(0xFFFFu);
1171cb93a386Sopenharmony_ci		break;
1172cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UNORM:
1173cb93a386Sopenharmony_ci		buffer += 2 * x;
1174cb93a386Sopenharmony_ci		c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
1175cb93a386Sopenharmony_ci		buffer += pitchB;
1176cb93a386Sopenharmony_ci		c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1177cb93a386Sopenharmony_ci		pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1178cb93a386Sopenharmony_ci		pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1179cb93a386Sopenharmony_ci		pixel.z = Short4(0x0000u);
1180cb93a386Sopenharmony_ci		pixel.w = Short4(0xFFFFu);
1181cb93a386Sopenharmony_ci		break;
1182cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1183cb93a386Sopenharmony_ci		{
1184cb93a386Sopenharmony_ci			Int4 v = Int4(0);
1185cb93a386Sopenharmony_ci			buffer += 4 * x;
1186cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 0), 0);
1187cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4), 1);
1188cb93a386Sopenharmony_ci			buffer += pitchB;
1189cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 0), 2);
1190cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4), 3);
1191cb93a386Sopenharmony_ci
1192cb93a386Sopenharmony_ci			pixel.x = Short4(v << 6) & Short4(0xFFC0u);
1193cb93a386Sopenharmony_ci			pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
1194cb93a386Sopenharmony_ci			pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
1195cb93a386Sopenharmony_ci			pixel.w = Short4(v >> 16) & Short4(0xC000u);
1196cb93a386Sopenharmony_ci
1197cb93a386Sopenharmony_ci			// Expand to 16 bit range
1198cb93a386Sopenharmony_ci			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1199cb93a386Sopenharmony_ci			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
1200cb93a386Sopenharmony_ci			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1201cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 2);
1202cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
1203cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
1204cb93a386Sopenharmony_ci		}
1205cb93a386Sopenharmony_ci		break;
1206cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1207cb93a386Sopenharmony_ci		{
1208cb93a386Sopenharmony_ci			Int4 v = Int4(0);
1209cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
1210cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
1211cb93a386Sopenharmony_ci			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1212cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
1213cb93a386Sopenharmony_ci			v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
1214cb93a386Sopenharmony_ci
1215cb93a386Sopenharmony_ci			pixel.x = Short4(v >> 14) & Short4(0xFFC0u);
1216cb93a386Sopenharmony_ci			pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
1217cb93a386Sopenharmony_ci			pixel.z = Short4(v << 6) & Short4(0xFFC0u);
1218cb93a386Sopenharmony_ci			pixel.w = Short4(v >> 16) & Short4(0xC000u);
1219cb93a386Sopenharmony_ci
1220cb93a386Sopenharmony_ci			// Expand to 16 bit range
1221cb93a386Sopenharmony_ci			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
1222cb93a386Sopenharmony_ci			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
1223cb93a386Sopenharmony_ci			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
1224cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 2);
1225cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
1226cb93a386Sopenharmony_ci			pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
1227cb93a386Sopenharmony_ci		}
1228cb93a386Sopenharmony_ci		break;
1229cb93a386Sopenharmony_ci	default:
1230cb93a386Sopenharmony_ci		UNSUPPORTED("VkFormat %d", int(state.colorFormat[index]));
1231cb93a386Sopenharmony_ci	}
1232cb93a386Sopenharmony_ci
1233cb93a386Sopenharmony_ci	if(isSRGB(index))
1234cb93a386Sopenharmony_ci	{
1235cb93a386Sopenharmony_ci		sRGBtoLinear16_12_16(pixel);
1236cb93a386Sopenharmony_ci	}
1237cb93a386Sopenharmony_ci}
1238cb93a386Sopenharmony_ci
1239cb93a386Sopenharmony_civoid PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &current, const Int &sMask, const Int &zMask, const Int &cMask)
1240cb93a386Sopenharmony_ci{
1241cb93a386Sopenharmony_ci	if(isSRGB(index))
1242cb93a386Sopenharmony_ci	{
1243cb93a386Sopenharmony_ci		linearToSRGB16_12_16(current);
1244cb93a386Sopenharmony_ci	}
1245cb93a386Sopenharmony_ci
1246cb93a386Sopenharmony_ci	switch(state.colorFormat[index])
1247cb93a386Sopenharmony_ci	{
1248cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_UNORM:
1249cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_SRGB:
1250cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UNORM:
1251cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SRGB:
1252cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UNORM:
1253cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UNORM:
1254cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1255cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1256cb93a386Sopenharmony_ci		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1257cb93a386Sopenharmony_ci		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1258cb93a386Sopenharmony_ci		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1259cb93a386Sopenharmony_ci		current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
1260cb93a386Sopenharmony_ci		break;
1261cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1262cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1263cb93a386Sopenharmony_ci		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 10) + Short4(0x0020);
1264cb93a386Sopenharmony_ci		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 10) + Short4(0x0020);
1265cb93a386Sopenharmony_ci		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 10) + Short4(0x0020);
1266cb93a386Sopenharmony_ci		current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 2) + Short4(0x2000);
1267cb93a386Sopenharmony_ci		break;
1268cb93a386Sopenharmony_ci	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1269cb93a386Sopenharmony_ci	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1270cb93a386Sopenharmony_ci	case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1271cb93a386Sopenharmony_ci	case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1272cb93a386Sopenharmony_ci		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 4) + Short4(0x0800);
1273cb93a386Sopenharmony_ci		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 4) + Short4(0x0800);
1274cb93a386Sopenharmony_ci		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 4) + Short4(0x0800);
1275cb93a386Sopenharmony_ci		current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 4) + Short4(0x0800);
1276cb93a386Sopenharmony_ci		break;
1277cb93a386Sopenharmony_ci	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1278cb93a386Sopenharmony_ci	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1279cb93a386Sopenharmony_ci	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1280cb93a386Sopenharmony_ci		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
1281cb93a386Sopenharmony_ci		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 5) + Short4(0x0400);
1282cb93a386Sopenharmony_ci		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
1283cb93a386Sopenharmony_ci		current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 1) + Short4(0x4000);
1284cb93a386Sopenharmony_ci		break;
1285cb93a386Sopenharmony_ci	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1286cb93a386Sopenharmony_ci	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1287cb93a386Sopenharmony_ci		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
1288cb93a386Sopenharmony_ci		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 6) + Short4(0x0200);
1289cb93a386Sopenharmony_ci		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
1290cb93a386Sopenharmony_ci		break;
1291cb93a386Sopenharmony_ci	default:
1292cb93a386Sopenharmony_ci		break;
1293cb93a386Sopenharmony_ci	}
1294cb93a386Sopenharmony_ci
1295cb93a386Sopenharmony_ci	int rgbaWriteMask = state.colorWriteActive(index);
1296cb93a386Sopenharmony_ci	int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1297cb93a386Sopenharmony_ci
1298cb93a386Sopenharmony_ci	switch(state.colorFormat[index])
1299cb93a386Sopenharmony_ci	{
1300cb93a386Sopenharmony_ci	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1301cb93a386Sopenharmony_ci		{
1302cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF000));
1303cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
1304cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF000)) >> 8;
1305cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
1306cb93a386Sopenharmony_ci
1307cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1308cb93a386Sopenharmony_ci		}
1309cb93a386Sopenharmony_ci		break;
1310cb93a386Sopenharmony_ci	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1311cb93a386Sopenharmony_ci		{
1312cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF000));
1313cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
1314cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF000)) >> 8;
1315cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
1316cb93a386Sopenharmony_ci
1317cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1318cb93a386Sopenharmony_ci		}
1319cb93a386Sopenharmony_ci		break;
1320cb93a386Sopenharmony_ci	case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1321cb93a386Sopenharmony_ci		{
1322cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0xF000));
1323cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF000)) >> 4;
1324cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF000)) >> 8;
1325cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF000u)) >> 12;
1326cb93a386Sopenharmony_ci
1327cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1328cb93a386Sopenharmony_ci		}
1329cb93a386Sopenharmony_ci		break;
1330cb93a386Sopenharmony_ci	case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1331cb93a386Sopenharmony_ci		{
1332cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0xF000));
1333cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF000)) >> 4;
1334cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF000)) >> 8;
1335cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF000u)) >> 12;
1336cb93a386Sopenharmony_ci
1337cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1338cb93a386Sopenharmony_ci		}
1339cb93a386Sopenharmony_ci		break;
1340cb93a386Sopenharmony_ci	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1341cb93a386Sopenharmony_ci		{
1342cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF800));
1343cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
1344cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF800)) >> 10;
1345cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
1346cb93a386Sopenharmony_ci
1347cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1348cb93a386Sopenharmony_ci		}
1349cb93a386Sopenharmony_ci		break;
1350cb93a386Sopenharmony_ci	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1351cb93a386Sopenharmony_ci		{
1352cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF800));
1353cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
1354cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF800)) >> 10;
1355cb93a386Sopenharmony_ci			current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
1356cb93a386Sopenharmony_ci
1357cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1358cb93a386Sopenharmony_ci		}
1359cb93a386Sopenharmony_ci		break;
1360cb93a386Sopenharmony_ci	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1361cb93a386Sopenharmony_ci		{
1362cb93a386Sopenharmony_ci			current.w = current.w & Short4(0x8000u);
1363cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1;
1364cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xF800)) >> 6;
1365cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z & Short4(0xF800)) >> 11;
1366cb93a386Sopenharmony_ci
1367cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z | current.w;
1368cb93a386Sopenharmony_ci		}
1369cb93a386Sopenharmony_ci		break;
1370cb93a386Sopenharmony_ci	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1371cb93a386Sopenharmony_ci		{
1372cb93a386Sopenharmony_ci			current.x = current.x & Short4(0xF800u);
1373cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1374cb93a386Sopenharmony_ci			current.z = As<UShort4>(current.z) >> 11;
1375cb93a386Sopenharmony_ci
1376cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z;
1377cb93a386Sopenharmony_ci		}
1378cb93a386Sopenharmony_ci		break;
1379cb93a386Sopenharmony_ci	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1380cb93a386Sopenharmony_ci		{
1381cb93a386Sopenharmony_ci			current.z = current.z & Short4(0xF800u);
1382cb93a386Sopenharmony_ci			current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1383cb93a386Sopenharmony_ci			current.x = As<UShort4>(current.x) >> 11;
1384cb93a386Sopenharmony_ci
1385cb93a386Sopenharmony_ci			current.x = current.x | current.y | current.z;
1386cb93a386Sopenharmony_ci		}
1387cb93a386Sopenharmony_ci		break;
1388cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_UNORM:
1389cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_SRGB:
1390cb93a386Sopenharmony_ci		if(rgbaWriteMask == 0x7)
1391cb93a386Sopenharmony_ci		{
1392cb93a386Sopenharmony_ci			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1393cb93a386Sopenharmony_ci			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1394cb93a386Sopenharmony_ci			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1395cb93a386Sopenharmony_ci
1396cb93a386Sopenharmony_ci			current.z = As<Short4>(PackUnsigned(current.z, current.x));
1397cb93a386Sopenharmony_ci			current.y = As<Short4>(PackUnsigned(current.y, current.y));
1398cb93a386Sopenharmony_ci
1399cb93a386Sopenharmony_ci			current.x = current.z;
1400cb93a386Sopenharmony_ci			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1401cb93a386Sopenharmony_ci			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1402cb93a386Sopenharmony_ci			current.y = current.z;
1403cb93a386Sopenharmony_ci			current.z = As<Short4>(UnpackLow(current.z, current.x));
1404cb93a386Sopenharmony_ci			current.y = As<Short4>(UnpackHigh(current.y, current.x));
1405cb93a386Sopenharmony_ci		}
1406cb93a386Sopenharmony_ci		else
1407cb93a386Sopenharmony_ci		{
1408cb93a386Sopenharmony_ci			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1409cb93a386Sopenharmony_ci			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1410cb93a386Sopenharmony_ci			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1411cb93a386Sopenharmony_ci			current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1412cb93a386Sopenharmony_ci
1413cb93a386Sopenharmony_ci			current.z = As<Short4>(PackUnsigned(current.z, current.x));
1414cb93a386Sopenharmony_ci			current.y = As<Short4>(PackUnsigned(current.y, current.w));
1415cb93a386Sopenharmony_ci
1416cb93a386Sopenharmony_ci			current.x = current.z;
1417cb93a386Sopenharmony_ci			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1418cb93a386Sopenharmony_ci			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1419cb93a386Sopenharmony_ci			current.y = current.z;
1420cb93a386Sopenharmony_ci			current.z = As<Short4>(UnpackLow(current.z, current.x));
1421cb93a386Sopenharmony_ci			current.y = As<Short4>(UnpackHigh(current.y, current.x));
1422cb93a386Sopenharmony_ci		}
1423cb93a386Sopenharmony_ci		break;
1424cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UNORM:
1425cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SRGB:
1426cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1427cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1428cb93a386Sopenharmony_ci		if(rgbaWriteMask == 0x7)
1429cb93a386Sopenharmony_ci		{
1430cb93a386Sopenharmony_ci			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1431cb93a386Sopenharmony_ci			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1432cb93a386Sopenharmony_ci			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1433cb93a386Sopenharmony_ci
1434cb93a386Sopenharmony_ci			current.z = As<Short4>(PackUnsigned(current.x, current.z));
1435cb93a386Sopenharmony_ci			current.y = As<Short4>(PackUnsigned(current.y, current.y));
1436cb93a386Sopenharmony_ci
1437cb93a386Sopenharmony_ci			current.x = current.z;
1438cb93a386Sopenharmony_ci			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1439cb93a386Sopenharmony_ci			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1440cb93a386Sopenharmony_ci			current.y = current.z;
1441cb93a386Sopenharmony_ci			current.z = As<Short4>(UnpackLow(current.z, current.x));
1442cb93a386Sopenharmony_ci			current.y = As<Short4>(UnpackHigh(current.y, current.x));
1443cb93a386Sopenharmony_ci		}
1444cb93a386Sopenharmony_ci		else
1445cb93a386Sopenharmony_ci		{
1446cb93a386Sopenharmony_ci			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1447cb93a386Sopenharmony_ci			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1448cb93a386Sopenharmony_ci			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1449cb93a386Sopenharmony_ci			current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1450cb93a386Sopenharmony_ci
1451cb93a386Sopenharmony_ci			current.z = As<Short4>(PackUnsigned(current.x, current.z));
1452cb93a386Sopenharmony_ci			current.y = As<Short4>(PackUnsigned(current.y, current.w));
1453cb93a386Sopenharmony_ci
1454cb93a386Sopenharmony_ci			current.x = current.z;
1455cb93a386Sopenharmony_ci			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1456cb93a386Sopenharmony_ci			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1457cb93a386Sopenharmony_ci			current.y = current.z;
1458cb93a386Sopenharmony_ci			current.z = As<Short4>(UnpackLow(current.z, current.x));
1459cb93a386Sopenharmony_ci			current.y = As<Short4>(UnpackHigh(current.y, current.x));
1460cb93a386Sopenharmony_ci		}
1461cb93a386Sopenharmony_ci		break;
1462cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UNORM:
1463cb93a386Sopenharmony_ci		current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1464cb93a386Sopenharmony_ci		current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1465cb93a386Sopenharmony_ci		current.x = As<Short4>(PackUnsigned(current.x, current.x));
1466cb93a386Sopenharmony_ci		current.y = As<Short4>(PackUnsigned(current.y, current.y));
1467cb93a386Sopenharmony_ci		current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1468cb93a386Sopenharmony_ci		break;
1469cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UNORM:
1470cb93a386Sopenharmony_ci		current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1471cb93a386Sopenharmony_ci		current.x = As<Short4>(PackUnsigned(current.x, current.x));
1472cb93a386Sopenharmony_ci		break;
1473cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1474cb93a386Sopenharmony_ci		{
1475cb93a386Sopenharmony_ci			auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
1476cb93a386Sopenharmony_ci			auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
1477cb93a386Sopenharmony_ci			auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
1478cb93a386Sopenharmony_ci			auto a = (Int4(current.w) >> 14) & Int4(0x3);
1479cb93a386Sopenharmony_ci			Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
1480cb93a386Sopenharmony_ci			auto c02 = As<Int2>(Int4(packed.xzzz));  // TODO: auto c02 = packed.xz;
1481cb93a386Sopenharmony_ci			auto c13 = As<Int2>(Int4(packed.ywww));  // TODO: auto c13 = packed.yw;
1482cb93a386Sopenharmony_ci			current.x = UnpackLow(c02, c13);
1483cb93a386Sopenharmony_ci			current.y = UnpackHigh(c02, c13);
1484cb93a386Sopenharmony_ci		}
1485cb93a386Sopenharmony_ci		break;
1486cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1487cb93a386Sopenharmony_ci		{
1488cb93a386Sopenharmony_ci			auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
1489cb93a386Sopenharmony_ci			auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
1490cb93a386Sopenharmony_ci			auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
1491cb93a386Sopenharmony_ci			auto a = (Int4(current.w) >> 14) & Int4(0x3);
1492cb93a386Sopenharmony_ci			Int4 packed = (a << 30) | (r << 20) | (g << 10) | b;
1493cb93a386Sopenharmony_ci			auto c02 = As<Int2>(Int4(packed.xzzz));  // TODO: auto c02 = packed.xz;
1494cb93a386Sopenharmony_ci			auto c13 = As<Int2>(Int4(packed.ywww));  // TODO: auto c13 = packed.yw;
1495cb93a386Sopenharmony_ci			current.x = UnpackLow(c02, c13);
1496cb93a386Sopenharmony_ci			current.y = UnpackHigh(c02, c13);
1497cb93a386Sopenharmony_ci		}
1498cb93a386Sopenharmony_ci		break;
1499cb93a386Sopenharmony_ci	default:
1500cb93a386Sopenharmony_ci		UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
1501cb93a386Sopenharmony_ci	}
1502cb93a386Sopenharmony_ci
1503cb93a386Sopenharmony_ci	Short4 c01 = current.z;
1504cb93a386Sopenharmony_ci	Short4 c23 = current.y;
1505cb93a386Sopenharmony_ci
1506cb93a386Sopenharmony_ci	Int xMask;  // Combination of all masks
1507cb93a386Sopenharmony_ci
1508cb93a386Sopenharmony_ci	if(state.depthTestActive)
1509cb93a386Sopenharmony_ci	{
1510cb93a386Sopenharmony_ci		xMask = zMask;
1511cb93a386Sopenharmony_ci	}
1512cb93a386Sopenharmony_ci	else
1513cb93a386Sopenharmony_ci	{
1514cb93a386Sopenharmony_ci		xMask = cMask;
1515cb93a386Sopenharmony_ci	}
1516cb93a386Sopenharmony_ci
1517cb93a386Sopenharmony_ci	if(state.stencilActive)
1518cb93a386Sopenharmony_ci	{
1519cb93a386Sopenharmony_ci		xMask &= sMask;
1520cb93a386Sopenharmony_ci	}
1521cb93a386Sopenharmony_ci
1522cb93a386Sopenharmony_ci	Pointer<Byte> buffer = cBuffer;
1523cb93a386Sopenharmony_ci	Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1524cb93a386Sopenharmony_ci
1525cb93a386Sopenharmony_ci	switch(state.colorFormat[index])
1526cb93a386Sopenharmony_ci	{
1527cb93a386Sopenharmony_ci	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1528cb93a386Sopenharmony_ci	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1529cb93a386Sopenharmony_ci	case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1530cb93a386Sopenharmony_ci	case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1531cb93a386Sopenharmony_ci		{
1532cb93a386Sopenharmony_ci			buffer += 2 * x;
1533cb93a386Sopenharmony_ci			Int value = *Pointer<Int>(buffer);
1534cb93a386Sopenharmony_ci
1535cb93a386Sopenharmony_ci			Int channelMask;
1536cb93a386Sopenharmony_ci			switch(state.colorFormat[index])
1537cb93a386Sopenharmony_ci			{
1538cb93a386Sopenharmony_ci			case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1539cb93a386Sopenharmony_ci				channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4rgbaQ[bgraWriteMask & 0xF][0]));
1540cb93a386Sopenharmony_ci				break;
1541cb93a386Sopenharmony_ci			case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1542cb93a386Sopenharmony_ci				channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4bgraQ[bgraWriteMask & 0xF][0]));
1543cb93a386Sopenharmony_ci				break;
1544cb93a386Sopenharmony_ci			case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1545cb93a386Sopenharmony_ci				channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4argbQ[bgraWriteMask & 0xF][0]));
1546cb93a386Sopenharmony_ci				break;
1547cb93a386Sopenharmony_ci			case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1548cb93a386Sopenharmony_ci				channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4abgrQ[bgraWriteMask & 0xF][0]));
1549cb93a386Sopenharmony_ci				break;
1550cb93a386Sopenharmony_ci			default:
1551cb93a386Sopenharmony_ci				UNREACHABLE("Format: %s", vk::Stringify(state.colorFormat[index]).c_str());
1552cb93a386Sopenharmony_ci			}
1553cb93a386Sopenharmony_ci
1554cb93a386Sopenharmony_ci			Int c01 = Extract(As<Int2>(current.x), 0);
1555cb93a386Sopenharmony_ci			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
1556cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1557cb93a386Sopenharmony_ci			{
1558cb93a386Sopenharmony_ci				mask01 &= channelMask;
1559cb93a386Sopenharmony_ci			}
1560cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1561cb93a386Sopenharmony_ci
1562cb93a386Sopenharmony_ci			buffer += pitchB;
1563cb93a386Sopenharmony_ci			value = *Pointer<Int>(buffer);
1564cb93a386Sopenharmony_ci
1565cb93a386Sopenharmony_ci			Int c23 = Extract(As<Int2>(current.x), 1);
1566cb93a386Sopenharmony_ci			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
1567cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1568cb93a386Sopenharmony_ci			{
1569cb93a386Sopenharmony_ci				mask23 &= channelMask;
1570cb93a386Sopenharmony_ci			}
1571cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1572cb93a386Sopenharmony_ci		}
1573cb93a386Sopenharmony_ci		break;
1574cb93a386Sopenharmony_ci	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1575cb93a386Sopenharmony_ci		{
1576cb93a386Sopenharmony_ci			buffer += 2 * x;
1577cb93a386Sopenharmony_ci			Int value = *Pointer<Int>(buffer);
1578cb93a386Sopenharmony_ci
1579cb93a386Sopenharmony_ci			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskr5g5b5a1Q[bgraWriteMask & 0xF][0]));
1580cb93a386Sopenharmony_ci
1581cb93a386Sopenharmony_ci			Int c01 = Extract(As<Int2>(current.x), 0);
1582cb93a386Sopenharmony_ci			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
1583cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1584cb93a386Sopenharmony_ci			{
1585cb93a386Sopenharmony_ci				mask01 &= channelMask;
1586cb93a386Sopenharmony_ci			}
1587cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1588cb93a386Sopenharmony_ci
1589cb93a386Sopenharmony_ci			buffer += pitchB;
1590cb93a386Sopenharmony_ci			value = *Pointer<Int>(buffer);
1591cb93a386Sopenharmony_ci
1592cb93a386Sopenharmony_ci			Int c23 = Extract(As<Int2>(current.x), 1);
1593cb93a386Sopenharmony_ci			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
1594cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1595cb93a386Sopenharmony_ci			{
1596cb93a386Sopenharmony_ci				mask23 &= channelMask;
1597cb93a386Sopenharmony_ci			}
1598cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1599cb93a386Sopenharmony_ci		}
1600cb93a386Sopenharmony_ci		break;
1601cb93a386Sopenharmony_ci	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1602cb93a386Sopenharmony_ci		{
1603cb93a386Sopenharmony_ci			buffer += 2 * x;
1604cb93a386Sopenharmony_ci			Int value = *Pointer<Int>(buffer);
1605cb93a386Sopenharmony_ci
1606cb93a386Sopenharmony_ci			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskb5g5r5a1Q[bgraWriteMask & 0xF][0]));
1607cb93a386Sopenharmony_ci
1608cb93a386Sopenharmony_ci			Int c01 = Extract(As<Int2>(current.x), 0);
1609cb93a386Sopenharmony_ci			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
1610cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1611cb93a386Sopenharmony_ci			{
1612cb93a386Sopenharmony_ci				mask01 &= channelMask;
1613cb93a386Sopenharmony_ci			}
1614cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1615cb93a386Sopenharmony_ci
1616cb93a386Sopenharmony_ci			buffer += pitchB;
1617cb93a386Sopenharmony_ci			value = *Pointer<Int>(buffer);
1618cb93a386Sopenharmony_ci
1619cb93a386Sopenharmony_ci			Int c23 = Extract(As<Int2>(current.x), 1);
1620cb93a386Sopenharmony_ci			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
1621cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1622cb93a386Sopenharmony_ci			{
1623cb93a386Sopenharmony_ci				mask23 &= channelMask;
1624cb93a386Sopenharmony_ci			}
1625cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1626cb93a386Sopenharmony_ci		}
1627cb93a386Sopenharmony_ci		break;
1628cb93a386Sopenharmony_ci	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1629cb93a386Sopenharmony_ci		{
1630cb93a386Sopenharmony_ci			buffer += 2 * x;
1631cb93a386Sopenharmony_ci			Int value = *Pointer<Int>(buffer);
1632cb93a386Sopenharmony_ci
1633cb93a386Sopenharmony_ci			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[bgraWriteMask & 0xF][0]));
1634cb93a386Sopenharmony_ci
1635cb93a386Sopenharmony_ci			Int c01 = Extract(As<Int2>(current.x), 0);
1636cb93a386Sopenharmony_ci			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
1637cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1638cb93a386Sopenharmony_ci			{
1639cb93a386Sopenharmony_ci				mask01 &= channelMask;
1640cb93a386Sopenharmony_ci			}
1641cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1642cb93a386Sopenharmony_ci
1643cb93a386Sopenharmony_ci			buffer += pitchB;
1644cb93a386Sopenharmony_ci			value = *Pointer<Int>(buffer);
1645cb93a386Sopenharmony_ci
1646cb93a386Sopenharmony_ci			Int c23 = Extract(As<Int2>(current.x), 1);
1647cb93a386Sopenharmony_ci			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
1648cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1649cb93a386Sopenharmony_ci			{
1650cb93a386Sopenharmony_ci				mask23 &= channelMask;
1651cb93a386Sopenharmony_ci			}
1652cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1653cb93a386Sopenharmony_ci		}
1654cb93a386Sopenharmony_ci		break;
1655cb93a386Sopenharmony_ci	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1656cb93a386Sopenharmony_ci		{
1657cb93a386Sopenharmony_ci			buffer += 2 * x;
1658cb93a386Sopenharmony_ci			Int value = *Pointer<Int>(buffer);
1659cb93a386Sopenharmony_ci
1660cb93a386Sopenharmony_ci			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[bgraWriteMask & 0x7][0]));
1661cb93a386Sopenharmony_ci
1662cb93a386Sopenharmony_ci			Int c01 = Extract(As<Int2>(current.x), 0);
1663cb93a386Sopenharmony_ci			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
1664cb93a386Sopenharmony_ci			if((bgraWriteMask & 0x00000007) != 0x00000007)
1665cb93a386Sopenharmony_ci			{
1666cb93a386Sopenharmony_ci				mask01 &= channelMask;
1667cb93a386Sopenharmony_ci			}
1668cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1669cb93a386Sopenharmony_ci
1670cb93a386Sopenharmony_ci			buffer += pitchB;
1671cb93a386Sopenharmony_ci			value = *Pointer<Int>(buffer);
1672cb93a386Sopenharmony_ci
1673cb93a386Sopenharmony_ci			Int c23 = Extract(As<Int2>(current.x), 1);
1674cb93a386Sopenharmony_ci			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
1675cb93a386Sopenharmony_ci			if((bgraWriteMask & 0x00000007) != 0x00000007)
1676cb93a386Sopenharmony_ci			{
1677cb93a386Sopenharmony_ci				mask23 &= channelMask;
1678cb93a386Sopenharmony_ci			}
1679cb93a386Sopenharmony_ci			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1680cb93a386Sopenharmony_ci		}
1681cb93a386Sopenharmony_ci		break;
1682cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_UNORM:
1683cb93a386Sopenharmony_ci	case VK_FORMAT_B8G8R8A8_SRGB:
1684cb93a386Sopenharmony_ci		{
1685cb93a386Sopenharmony_ci			buffer += x * 4;
1686cb93a386Sopenharmony_ci			Short4 value = *Pointer<Short4>(buffer);
1687cb93a386Sopenharmony_ci			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[bgraWriteMask][0]));
1688cb93a386Sopenharmony_ci
1689cb93a386Sopenharmony_ci			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1690cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1691cb93a386Sopenharmony_ci			{
1692cb93a386Sopenharmony_ci				mask01 &= channelMask;
1693cb93a386Sopenharmony_ci			}
1694cb93a386Sopenharmony_ci			*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
1695cb93a386Sopenharmony_ci
1696cb93a386Sopenharmony_ci			buffer += pitchB;
1697cb93a386Sopenharmony_ci			value = *Pointer<Short4>(buffer);
1698cb93a386Sopenharmony_ci
1699cb93a386Sopenharmony_ci			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1700cb93a386Sopenharmony_ci			if(bgraWriteMask != 0x0000000F)
1701cb93a386Sopenharmony_ci			{
1702cb93a386Sopenharmony_ci				mask23 &= channelMask;
1703cb93a386Sopenharmony_ci			}
1704cb93a386Sopenharmony_ci			*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
1705cb93a386Sopenharmony_ci		}
1706cb93a386Sopenharmony_ci		break;
1707cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UNORM:
1708cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SRGB:
1709cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1710cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1711cb93a386Sopenharmony_ci		{
1712cb93a386Sopenharmony_ci			buffer += x * 4;
1713cb93a386Sopenharmony_ci			Short4 value = *Pointer<Short4>(buffer);
1714cb93a386Sopenharmony_ci			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
1715cb93a386Sopenharmony_ci
1716cb93a386Sopenharmony_ci			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1717cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
1718cb93a386Sopenharmony_ci			{
1719cb93a386Sopenharmony_ci				mask01 &= channelMask;
1720cb93a386Sopenharmony_ci			}
1721cb93a386Sopenharmony_ci			*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
1722cb93a386Sopenharmony_ci
1723cb93a386Sopenharmony_ci			buffer += pitchB;
1724cb93a386Sopenharmony_ci			value = *Pointer<Short4>(buffer);
1725cb93a386Sopenharmony_ci
1726cb93a386Sopenharmony_ci			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1727cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
1728cb93a386Sopenharmony_ci			{
1729cb93a386Sopenharmony_ci				mask23 &= channelMask;
1730cb93a386Sopenharmony_ci			}
1731cb93a386Sopenharmony_ci			*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
1732cb93a386Sopenharmony_ci		}
1733cb93a386Sopenharmony_ci		break;
1734cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UNORM:
1735cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x0)
1736cb93a386Sopenharmony_ci		{
1737cb93a386Sopenharmony_ci			buffer += 2 * x;
1738cb93a386Sopenharmony_ci			Int2 value;
1739cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<Int>(buffer), 0);
1740cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<Int>(buffer + pitchB), 1);
1741cb93a386Sopenharmony_ci
1742cb93a386Sopenharmony_ci			Int2 packedCol = As<Int2>(current.x);
1743cb93a386Sopenharmony_ci
1744cb93a386Sopenharmony_ci			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1745cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
1746cb93a386Sopenharmony_ci			{
1747cb93a386Sopenharmony_ci				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1748cb93a386Sopenharmony_ci				UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1749cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
1750cb93a386Sopenharmony_ci			}
1751cb93a386Sopenharmony_ci
1752cb93a386Sopenharmony_ci			packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1753cb93a386Sopenharmony_ci
1754cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1755cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer + pitchB) = As<UInt>(Extract(packedCol, 1));
1756cb93a386Sopenharmony_ci		}
1757cb93a386Sopenharmony_ci		break;
1758cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UNORM:
1759cb93a386Sopenharmony_ci		if(rgbaWriteMask & 0x00000001)
1760cb93a386Sopenharmony_ci		{
1761cb93a386Sopenharmony_ci			buffer += 1 * x;
1762cb93a386Sopenharmony_ci			Short4 value;
1763cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<Short>(buffer), 0);
1764cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<Short>(buffer + pitchB), 1);
1765cb93a386Sopenharmony_ci
1766cb93a386Sopenharmony_ci			current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1767cb93a386Sopenharmony_ci			value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1768cb93a386Sopenharmony_ci			current.x |= value;
1769cb93a386Sopenharmony_ci
1770cb93a386Sopenharmony_ci			*Pointer<Short>(buffer) = Extract(current.x, 0);
1771cb93a386Sopenharmony_ci			*Pointer<Short>(buffer + pitchB) = Extract(current.x, 1);
1772cb93a386Sopenharmony_ci		}
1773cb93a386Sopenharmony_ci		break;
1774cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1775cb93a386Sopenharmony_ci		rgbaWriteMask = bgraWriteMask;
1776cb93a386Sopenharmony_ci		// [[fallthrough]]
1777cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1778cb93a386Sopenharmony_ci		{
1779cb93a386Sopenharmony_ci			buffer += 4 * x;
1780cb93a386Sopenharmony_ci
1781cb93a386Sopenharmony_ci			Int2 value = *Pointer<Int2>(buffer, 16);
1782cb93a386Sopenharmony_ci			Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1783cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
1784cb93a386Sopenharmony_ci			{
1785cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1786cb93a386Sopenharmony_ci			}
1787cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
1788cb93a386Sopenharmony_ci
1789cb93a386Sopenharmony_ci			buffer += pitchB;
1790cb93a386Sopenharmony_ci
1791cb93a386Sopenharmony_ci			value = *Pointer<Int2>(buffer, 16);
1792cb93a386Sopenharmony_ci			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1793cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
1794cb93a386Sopenharmony_ci			{
1795cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1796cb93a386Sopenharmony_ci			}
1797cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
1798cb93a386Sopenharmony_ci		}
1799cb93a386Sopenharmony_ci		break;
1800cb93a386Sopenharmony_ci	default:
1801cb93a386Sopenharmony_ci		UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
1802cb93a386Sopenharmony_ci	}
1803cb93a386Sopenharmony_ci}
1804cb93a386Sopenharmony_ci
1805cb93a386Sopenharmony_ciFloat PixelRoutine::blendConstant(vk::Format format, int component, BlendFactorModifier modifier)
1806cb93a386Sopenharmony_ci{
1807cb93a386Sopenharmony_ci	bool inverse = (modifier == OneMinus);
1808cb93a386Sopenharmony_ci
1809cb93a386Sopenharmony_ci	if(format.isUnsignedNormalized())
1810cb93a386Sopenharmony_ci	{
1811cb93a386Sopenharmony_ci		return inverse ? *Pointer<Float>(data + OFFSET(DrawData, factor.invBlendConstantU[component]))
1812cb93a386Sopenharmony_ci		               : *Pointer<Float>(data + OFFSET(DrawData, factor.blendConstantU[component]));
1813cb93a386Sopenharmony_ci	}
1814cb93a386Sopenharmony_ci	else if(format.isSignedNormalized())
1815cb93a386Sopenharmony_ci	{
1816cb93a386Sopenharmony_ci		return inverse ? *Pointer<Float>(data + OFFSET(DrawData, factor.invBlendConstantS[component]))
1817cb93a386Sopenharmony_ci		               : *Pointer<Float>(data + OFFSET(DrawData, factor.blendConstantS[component]));
1818cb93a386Sopenharmony_ci	}
1819cb93a386Sopenharmony_ci	else  // Floating-point format
1820cb93a386Sopenharmony_ci	{
1821cb93a386Sopenharmony_ci		ASSERT(format.isFloatFormat());
1822cb93a386Sopenharmony_ci		return inverse ? *Pointer<Float>(data + OFFSET(DrawData, factor.invBlendConstantF[component]))
1823cb93a386Sopenharmony_ci		               : *Pointer<Float>(data + OFFSET(DrawData, factor.blendConstantF[component]));
1824cb93a386Sopenharmony_ci	}
1825cb93a386Sopenharmony_ci}
1826cb93a386Sopenharmony_ci
1827cb93a386Sopenharmony_civoid PixelRoutine::blendFactorRGB(Vector4f &blendFactor, const Vector4f &sourceColor, const Vector4f &destColor, VkBlendFactor colorBlendFactor, vk::Format format)
1828cb93a386Sopenharmony_ci{
1829cb93a386Sopenharmony_ci	switch(colorBlendFactor)
1830cb93a386Sopenharmony_ci	{
1831cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ZERO:
1832cb93a386Sopenharmony_ci		blendFactor.x = Float4(0);
1833cb93a386Sopenharmony_ci		blendFactor.y = Float4(0);
1834cb93a386Sopenharmony_ci		blendFactor.z = Float4(0);
1835cb93a386Sopenharmony_ci		break;
1836cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE:
1837cb93a386Sopenharmony_ci		blendFactor.x = Float4(1);
1838cb93a386Sopenharmony_ci		blendFactor.y = Float4(1);
1839cb93a386Sopenharmony_ci		blendFactor.z = Float4(1);
1840cb93a386Sopenharmony_ci		break;
1841cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_COLOR:
1842cb93a386Sopenharmony_ci		blendFactor.x = sourceColor.x;
1843cb93a386Sopenharmony_ci		blendFactor.y = sourceColor.y;
1844cb93a386Sopenharmony_ci		blendFactor.z = sourceColor.z;
1845cb93a386Sopenharmony_ci		break;
1846cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1847cb93a386Sopenharmony_ci		blendFactor.x = Float4(1.0f) - sourceColor.x;
1848cb93a386Sopenharmony_ci		blendFactor.y = Float4(1.0f) - sourceColor.y;
1849cb93a386Sopenharmony_ci		blendFactor.z = Float4(1.0f) - sourceColor.z;
1850cb93a386Sopenharmony_ci		break;
1851cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_COLOR:
1852cb93a386Sopenharmony_ci		blendFactor.x = destColor.x;
1853cb93a386Sopenharmony_ci		blendFactor.y = destColor.y;
1854cb93a386Sopenharmony_ci		blendFactor.z = destColor.z;
1855cb93a386Sopenharmony_ci		break;
1856cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1857cb93a386Sopenharmony_ci		blendFactor.x = Float4(1.0f) - destColor.x;
1858cb93a386Sopenharmony_ci		blendFactor.y = Float4(1.0f) - destColor.y;
1859cb93a386Sopenharmony_ci		blendFactor.z = Float4(1.0f) - destColor.z;
1860cb93a386Sopenharmony_ci		break;
1861cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA:
1862cb93a386Sopenharmony_ci		blendFactor.x = sourceColor.w;
1863cb93a386Sopenharmony_ci		blendFactor.y = sourceColor.w;
1864cb93a386Sopenharmony_ci		blendFactor.z = sourceColor.w;
1865cb93a386Sopenharmony_ci		break;
1866cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1867cb93a386Sopenharmony_ci		blendFactor.x = Float4(1.0f) - sourceColor.w;
1868cb93a386Sopenharmony_ci		blendFactor.y = Float4(1.0f) - sourceColor.w;
1869cb93a386Sopenharmony_ci		blendFactor.z = Float4(1.0f) - sourceColor.w;
1870cb93a386Sopenharmony_ci		break;
1871cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_ALPHA:
1872cb93a386Sopenharmony_ci		blendFactor.x = destColor.w;
1873cb93a386Sopenharmony_ci		blendFactor.y = destColor.w;
1874cb93a386Sopenharmony_ci		blendFactor.z = destColor.w;
1875cb93a386Sopenharmony_ci		break;
1876cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1877cb93a386Sopenharmony_ci		blendFactor.x = Float4(1.0f) - destColor.w;
1878cb93a386Sopenharmony_ci		blendFactor.y = Float4(1.0f) - destColor.w;
1879cb93a386Sopenharmony_ci		blendFactor.z = Float4(1.0f) - destColor.w;
1880cb93a386Sopenharmony_ci		break;
1881cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1882cb93a386Sopenharmony_ci		blendFactor.x = Float4(1.0f) - destColor.w;
1883cb93a386Sopenharmony_ci		blendFactor.x = Min(blendFactor.x, sourceColor.w);
1884cb93a386Sopenharmony_ci		blendFactor.y = blendFactor.x;
1885cb93a386Sopenharmony_ci		blendFactor.z = blendFactor.x;
1886cb93a386Sopenharmony_ci		break;
1887cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_COLOR:
1888cb93a386Sopenharmony_ci		blendFactor.x = Float4(blendConstant(format, 0));
1889cb93a386Sopenharmony_ci		blendFactor.y = Float4(blendConstant(format, 1));
1890cb93a386Sopenharmony_ci		blendFactor.z = Float4(blendConstant(format, 2));
1891cb93a386Sopenharmony_ci		break;
1892cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1893cb93a386Sopenharmony_ci		blendFactor.x = Float4(blendConstant(format, 3));
1894cb93a386Sopenharmony_ci		blendFactor.y = Float4(blendConstant(format, 3));
1895cb93a386Sopenharmony_ci		blendFactor.z = Float4(blendConstant(format, 3));
1896cb93a386Sopenharmony_ci		break;
1897cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1898cb93a386Sopenharmony_ci		blendFactor.x = Float4(blendConstant(format, 0, OneMinus));
1899cb93a386Sopenharmony_ci		blendFactor.y = Float4(blendConstant(format, 1, OneMinus));
1900cb93a386Sopenharmony_ci		blendFactor.z = Float4(blendConstant(format, 2, OneMinus));
1901cb93a386Sopenharmony_ci		break;
1902cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1903cb93a386Sopenharmony_ci		blendFactor.x = Float4(blendConstant(format, 3, OneMinus));
1904cb93a386Sopenharmony_ci		blendFactor.y = Float4(blendConstant(format, 3, OneMinus));
1905cb93a386Sopenharmony_ci		blendFactor.z = Float4(blendConstant(format, 3, OneMinus));
1906cb93a386Sopenharmony_ci		break;
1907cb93a386Sopenharmony_ci
1908cb93a386Sopenharmony_ci	default:
1909cb93a386Sopenharmony_ci		UNSUPPORTED("VkBlendFactor: %d", int(colorBlendFactor));
1910cb93a386Sopenharmony_ci	}
1911cb93a386Sopenharmony_ci
1912cb93a386Sopenharmony_ci	// "If the color attachment is fixed-point, the components of the source and destination values and blend factors are each clamped
1913cb93a386Sopenharmony_ci	//  to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment prior to evaluating the blend
1914cb93a386Sopenharmony_ci	//  operations. If the color attachment is floating-point, no clamping occurs."
1915cb93a386Sopenharmony_ci	if(blendFactorCanExceedFormatRange(colorBlendFactor, format))
1916cb93a386Sopenharmony_ci	{
1917cb93a386Sopenharmony_ci		if(format.isUnsignedNormalized())
1918cb93a386Sopenharmony_ci		{
1919cb93a386Sopenharmony_ci			blendFactor.x = Min(Max(blendFactor.x, Float4(0.0f)), Float4(1.0f));
1920cb93a386Sopenharmony_ci			blendFactor.y = Min(Max(blendFactor.y, Float4(0.0f)), Float4(1.0f));
1921cb93a386Sopenharmony_ci			blendFactor.z = Min(Max(blendFactor.z, Float4(0.0f)), Float4(1.0f));
1922cb93a386Sopenharmony_ci		}
1923cb93a386Sopenharmony_ci		else if(format.isSignedNormalized())
1924cb93a386Sopenharmony_ci		{
1925cb93a386Sopenharmony_ci			blendFactor.x = Min(Max(blendFactor.x, Float4(-1.0f)), Float4(1.0f));
1926cb93a386Sopenharmony_ci			blendFactor.y = Min(Max(blendFactor.y, Float4(-1.0f)), Float4(1.0f));
1927cb93a386Sopenharmony_ci			blendFactor.z = Min(Max(blendFactor.z, Float4(-1.0f)), Float4(1.0f));
1928cb93a386Sopenharmony_ci		}
1929cb93a386Sopenharmony_ci	}
1930cb93a386Sopenharmony_ci}
1931cb93a386Sopenharmony_ci
1932cb93a386Sopenharmony_civoid PixelRoutine::blendFactorAlpha(Float4 &blendFactorAlpha, const Float4 &sourceAlpha, const Float4 &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format)
1933cb93a386Sopenharmony_ci{
1934cb93a386Sopenharmony_ci	switch(alphaBlendFactor)
1935cb93a386Sopenharmony_ci	{
1936cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ZERO:
1937cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(0);
1938cb93a386Sopenharmony_ci		break;
1939cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE:
1940cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1);
1941cb93a386Sopenharmony_ci		break;
1942cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_COLOR:
1943cb93a386Sopenharmony_ci		blendFactorAlpha = sourceAlpha;
1944cb93a386Sopenharmony_ci		break;
1945cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1946cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1.0f) - sourceAlpha;
1947cb93a386Sopenharmony_ci		break;
1948cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_COLOR:
1949cb93a386Sopenharmony_ci		blendFactorAlpha = destAlpha;
1950cb93a386Sopenharmony_ci		break;
1951cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1952cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1.0f) - destAlpha;
1953cb93a386Sopenharmony_ci		break;
1954cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA:
1955cb93a386Sopenharmony_ci		blendFactorAlpha = sourceAlpha;
1956cb93a386Sopenharmony_ci		break;
1957cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1958cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1.0f) - sourceAlpha;
1959cb93a386Sopenharmony_ci		break;
1960cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_ALPHA:
1961cb93a386Sopenharmony_ci		blendFactorAlpha = destAlpha;
1962cb93a386Sopenharmony_ci		break;
1963cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1964cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1.0f) - destAlpha;
1965cb93a386Sopenharmony_ci		break;
1966cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1967cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(1.0f);
1968cb93a386Sopenharmony_ci		break;
1969cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_COLOR:
1970cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1971cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(blendConstant(format, 3));
1972cb93a386Sopenharmony_ci		break;
1973cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1974cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1975cb93a386Sopenharmony_ci		blendFactorAlpha = Float4(blendConstant(format, 3, OneMinus));
1976cb93a386Sopenharmony_ci		break;
1977cb93a386Sopenharmony_ci	default:
1978cb93a386Sopenharmony_ci		UNSUPPORTED("VkBlendFactor: %d", int(alphaBlendFactor));
1979cb93a386Sopenharmony_ci	}
1980cb93a386Sopenharmony_ci
1981cb93a386Sopenharmony_ci	// "If the color attachment is fixed-point, the components of the source and destination values and blend factors are each clamped
1982cb93a386Sopenharmony_ci	//  to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment prior to evaluating the blend
1983cb93a386Sopenharmony_ci	//  operations. If the color attachment is floating-point, no clamping occurs."
1984cb93a386Sopenharmony_ci	if(blendFactorCanExceedFormatRange(alphaBlendFactor, format))
1985cb93a386Sopenharmony_ci	{
1986cb93a386Sopenharmony_ci		if(format.isUnsignedNormalized())
1987cb93a386Sopenharmony_ci		{
1988cb93a386Sopenharmony_ci			blendFactorAlpha = Min(Max(blendFactorAlpha, Float4(0.0f)), Float4(1.0f));
1989cb93a386Sopenharmony_ci		}
1990cb93a386Sopenharmony_ci		else if(format.isSignedNormalized())
1991cb93a386Sopenharmony_ci		{
1992cb93a386Sopenharmony_ci			blendFactorAlpha = Min(Max(blendFactorAlpha, Float4(-1.0f)), Float4(1.0f));
1993cb93a386Sopenharmony_ci		}
1994cb93a386Sopenharmony_ci	}
1995cb93a386Sopenharmony_ci}
1996cb93a386Sopenharmony_ci
1997cb93a386Sopenharmony_ciFloat4 PixelRoutine::blendOpOverlay(Float4 &src, Float4 &dst)
1998cb93a386Sopenharmony_ci{
1999cb93a386Sopenharmony_ci	Int4 largeDst = CmpGT(dst, Float4(0.5f));
2000cb93a386Sopenharmony_ci	return As<Float4>(
2001cb93a386Sopenharmony_ci	    (~largeDst &
2002cb93a386Sopenharmony_ci	     As<Int4>(Float4(2.0f) * src * dst)) |
2003cb93a386Sopenharmony_ci	    (largeDst &
2004cb93a386Sopenharmony_ci	     As<Int4>(Float4(1.0f) - (Float4(2.0f) * (Float4(1.0f) - src) * (Float4(1.0f) - dst)))));
2005cb93a386Sopenharmony_ci}
2006cb93a386Sopenharmony_ci
2007cb93a386Sopenharmony_ciFloat4 PixelRoutine::blendOpColorDodge(Float4 &src, Float4 &dst)
2008cb93a386Sopenharmony_ci{
2009cb93a386Sopenharmony_ci	Int4 srcBelowOne = CmpLT(src, Float4(1.0f));
2010cb93a386Sopenharmony_ci	Int4 positiveDst = CmpGT(dst, Float4(0.0f));
2011cb93a386Sopenharmony_ci	return As<Float4>(positiveDst & ((~srcBelowOne &
2012cb93a386Sopenharmony_ci	                                  As<Int4>(Float4(1.0f))) |
2013cb93a386Sopenharmony_ci	                                 (srcBelowOne &
2014cb93a386Sopenharmony_ci	                                  As<Int4>(Min(Float4(1.0f), (dst / (Float4(1.0f) - src)))))));
2015cb93a386Sopenharmony_ci}
2016cb93a386Sopenharmony_ci
2017cb93a386Sopenharmony_ciFloat4 PixelRoutine::blendOpColorBurn(Float4 &src, Float4 &dst)
2018cb93a386Sopenharmony_ci{
2019cb93a386Sopenharmony_ci	Int4 dstBelowOne = CmpLT(dst, Float4(1.0f));
2020cb93a386Sopenharmony_ci	Int4 positiveSrc = CmpGT(src, Float4(0.0f));
2021cb93a386Sopenharmony_ci	return As<Float4>(
2022cb93a386Sopenharmony_ci	    (~dstBelowOne &
2023cb93a386Sopenharmony_ci	     As<Int4>(Float4(1.0f))) |
2024cb93a386Sopenharmony_ci	    (dstBelowOne & positiveSrc &
2025cb93a386Sopenharmony_ci	     As<Int4>(Float4(1.0f) - Min(Float4(1.0f), (Float4(1.0f) - dst) / src))));
2026cb93a386Sopenharmony_ci}
2027cb93a386Sopenharmony_ci
2028cb93a386Sopenharmony_ciFloat4 PixelRoutine::blendOpHardlight(Float4 &src, Float4 &dst)
2029cb93a386Sopenharmony_ci{
2030cb93a386Sopenharmony_ci	Int4 largeSrc = CmpGT(src, Float4(0.5f));
2031cb93a386Sopenharmony_ci	return As<Float4>(
2032cb93a386Sopenharmony_ci	    (~largeSrc &
2033cb93a386Sopenharmony_ci	     As<Int4>(Float4(2.0f) * src * dst)) |
2034cb93a386Sopenharmony_ci	    (largeSrc &
2035cb93a386Sopenharmony_ci	     As<Int4>(Float4(1.0f) - (Float4(2.0f) * (Float4(1.0f) - src) * (Float4(1.0f) - dst)))));
2036cb93a386Sopenharmony_ci}
2037cb93a386Sopenharmony_ci
2038cb93a386Sopenharmony_ciFloat4 PixelRoutine::blendOpSoftlight(Float4 &src, Float4 &dst)
2039cb93a386Sopenharmony_ci{
2040cb93a386Sopenharmony_ci	Int4 largeSrc = CmpGT(src, Float4(0.5f));
2041cb93a386Sopenharmony_ci	Int4 largeDst = CmpGT(dst, Float4(0.25f));
2042cb93a386Sopenharmony_ci
2043cb93a386Sopenharmony_ci	return As<Float4>(
2044cb93a386Sopenharmony_ci	    (~largeSrc &
2045cb93a386Sopenharmony_ci	     As<Int4>(dst - ((Float4(1.0f) - (Float4(2.0f) * src)) * dst * (Float4(1.0f) - dst)))) |
2046cb93a386Sopenharmony_ci	    (largeSrc & ((~largeDst &
2047cb93a386Sopenharmony_ci	                  As<Int4>(dst + (((Float4(2.0f) * src) - Float4(1.0f)) * dst * ((((Float4(16.0f) * dst) - Float4(12.0f)) * dst) + Float4(3.0f))))) |
2048cb93a386Sopenharmony_ci	                 (largeDst &
2049cb93a386Sopenharmony_ci	                  As<Int4>(dst + (((Float4(2.0f) * src) - Float4(1.0f)) * (Sqrt(dst) - dst)))))));
2050cb93a386Sopenharmony_ci}
2051cb93a386Sopenharmony_ci
2052cb93a386Sopenharmony_ciFloat4 PixelRoutine::maxRGB(Vector4f &c)
2053cb93a386Sopenharmony_ci{
2054cb93a386Sopenharmony_ci	return Max(Max(c.x, c.y), c.z);
2055cb93a386Sopenharmony_ci}
2056cb93a386Sopenharmony_ci
2057cb93a386Sopenharmony_ciFloat4 PixelRoutine::minRGB(Vector4f &c)
2058cb93a386Sopenharmony_ci{
2059cb93a386Sopenharmony_ci	return Min(Min(c.x, c.y), c.z);
2060cb93a386Sopenharmony_ci}
2061cb93a386Sopenharmony_ci
2062cb93a386Sopenharmony_civoid PixelRoutine::setLumSat(Vector4f &cbase, Vector4f &csat, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z)
2063cb93a386Sopenharmony_ci{
2064cb93a386Sopenharmony_ci	Float4 minbase = minRGB(cbase);
2065cb93a386Sopenharmony_ci	Float4 sbase = maxRGB(cbase) - minbase;
2066cb93a386Sopenharmony_ci	Float4 ssat = maxRGB(csat) - minRGB(csat);
2067cb93a386Sopenharmony_ci	Int4 isNonZero = CmpGT(sbase, Float4(0.0f));
2068cb93a386Sopenharmony_ci	Vector4f color;
2069cb93a386Sopenharmony_ci	color.x = As<Float4>(isNonZero & As<Int4>((cbase.x - minbase) * ssat / sbase));
2070cb93a386Sopenharmony_ci	color.y = As<Float4>(isNonZero & As<Int4>((cbase.y - minbase) * ssat / sbase));
2071cb93a386Sopenharmony_ci	color.z = As<Float4>(isNonZero & As<Int4>((cbase.z - minbase) * ssat / sbase));
2072cb93a386Sopenharmony_ci	setLum(color, clum, x, y, z);
2073cb93a386Sopenharmony_ci}
2074cb93a386Sopenharmony_ci
2075cb93a386Sopenharmony_ciFloat4 PixelRoutine::lumRGB(Vector4f &c)
2076cb93a386Sopenharmony_ci{
2077cb93a386Sopenharmony_ci	return c.x * Float4(0.3f) + c.y * Float4(0.59f) + c.z * Float4(0.11f);
2078cb93a386Sopenharmony_ci}
2079cb93a386Sopenharmony_ci
2080cb93a386Sopenharmony_ciFloat4 PixelRoutine::computeLum(Float4 &color, Float4 &lum, Float4 &mincol, Float4 &maxcol, Int4 &negative, Int4 &aboveOne)
2081cb93a386Sopenharmony_ci{
2082cb93a386Sopenharmony_ci	return As<Float4>(
2083cb93a386Sopenharmony_ci	    (negative &
2084cb93a386Sopenharmony_ci	     As<Int4>(lum + ((color - lum) * lum) / (lum - mincol))) |
2085cb93a386Sopenharmony_ci	    (~negative &
2086cb93a386Sopenharmony_ci	     ((aboveOne &
2087cb93a386Sopenharmony_ci	       As<Int4>(lum + ((color - lum) * (Float4(1.0f) - lum)) / (Float4(maxcol) - lum))) |
2088cb93a386Sopenharmony_ci	      (~aboveOne &
2089cb93a386Sopenharmony_ci	       As<Int4>(color)))));
2090cb93a386Sopenharmony_ci}
2091cb93a386Sopenharmony_ci
2092cb93a386Sopenharmony_civoid PixelRoutine::setLum(Vector4f &cbase, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z)
2093cb93a386Sopenharmony_ci{
2094cb93a386Sopenharmony_ci	Float4 lbase = lumRGB(cbase);
2095cb93a386Sopenharmony_ci	Float4 llum = lumRGB(clum);
2096cb93a386Sopenharmony_ci	Float4 ldiff = llum - lbase;
2097cb93a386Sopenharmony_ci
2098cb93a386Sopenharmony_ci	Vector4f color;
2099cb93a386Sopenharmony_ci	color.x = cbase.x + ldiff;
2100cb93a386Sopenharmony_ci	color.y = cbase.y + ldiff;
2101cb93a386Sopenharmony_ci	color.z = cbase.z + ldiff;
2102cb93a386Sopenharmony_ci
2103cb93a386Sopenharmony_ci	Float4 lum = lumRGB(color);
2104cb93a386Sopenharmony_ci	Float4 mincol = minRGB(color);
2105cb93a386Sopenharmony_ci	Float4 maxcol = maxRGB(color);
2106cb93a386Sopenharmony_ci
2107cb93a386Sopenharmony_ci	Int4 negative = CmpLT(mincol, Float4(0.0f));
2108cb93a386Sopenharmony_ci	Int4 aboveOne = CmpGT(maxcol, Float4(1.0f));
2109cb93a386Sopenharmony_ci
2110cb93a386Sopenharmony_ci	x = computeLum(color.x, lum, mincol, maxcol, negative, aboveOne);
2111cb93a386Sopenharmony_ci	y = computeLum(color.y, lum, mincol, maxcol, negative, aboveOne);
2112cb93a386Sopenharmony_ci	z = computeLum(color.z, lum, mincol, maxcol, negative, aboveOne);
2113cb93a386Sopenharmony_ci}
2114cb93a386Sopenharmony_ci
2115cb93a386Sopenharmony_civoid PixelRoutine::premultiply(Vector4f &c)
2116cb93a386Sopenharmony_ci{
2117cb93a386Sopenharmony_ci	Int4 nonZeroAlpha = CmpNEQ(c.w, Float4(0.0f));
2118cb93a386Sopenharmony_ci	c.x = As<Float4>(nonZeroAlpha & As<Int4>(c.x / c.w));
2119cb93a386Sopenharmony_ci	c.y = As<Float4>(nonZeroAlpha & As<Int4>(c.y / c.w));
2120cb93a386Sopenharmony_ci	c.z = As<Float4>(nonZeroAlpha & As<Int4>(c.z / c.w));
2121cb93a386Sopenharmony_ci}
2122cb93a386Sopenharmony_ci
2123cb93a386Sopenharmony_ciVector4f PixelRoutine::computeAdvancedBlendMode(int index, const Vector4f &src, const Vector4f &dst, const Vector4f &srcFactor, const Vector4f &dstFactor)
2124cb93a386Sopenharmony_ci{
2125cb93a386Sopenharmony_ci	Vector4f srcColor = src;
2126cb93a386Sopenharmony_ci	srcColor.x *= srcFactor.x;
2127cb93a386Sopenharmony_ci	srcColor.y *= srcFactor.y;
2128cb93a386Sopenharmony_ci	srcColor.z *= srcFactor.z;
2129cb93a386Sopenharmony_ci	srcColor.w *= srcFactor.w;
2130cb93a386Sopenharmony_ci
2131cb93a386Sopenharmony_ci	Vector4f dstColor = dst;
2132cb93a386Sopenharmony_ci	dstColor.x *= dstFactor.x;
2133cb93a386Sopenharmony_ci	dstColor.y *= dstFactor.y;
2134cb93a386Sopenharmony_ci	dstColor.z *= dstFactor.z;
2135cb93a386Sopenharmony_ci	dstColor.w *= dstFactor.w;
2136cb93a386Sopenharmony_ci
2137cb93a386Sopenharmony_ci	premultiply(srcColor);
2138cb93a386Sopenharmony_ci	premultiply(dstColor);
2139cb93a386Sopenharmony_ci
2140cb93a386Sopenharmony_ci	Vector4f blendedColor;
2141cb93a386Sopenharmony_ci
2142cb93a386Sopenharmony_ci	switch(state.blendState[index].blendOperation)
2143cb93a386Sopenharmony_ci	{
2144cb93a386Sopenharmony_ci	case VK_BLEND_OP_MULTIPLY_EXT:
2145cb93a386Sopenharmony_ci		blendedColor.x = (srcColor.x * dstColor.x);
2146cb93a386Sopenharmony_ci		blendedColor.y = (srcColor.y * dstColor.y);
2147cb93a386Sopenharmony_ci		blendedColor.z = (srcColor.z * dstColor.z);
2148cb93a386Sopenharmony_ci		break;
2149cb93a386Sopenharmony_ci	case VK_BLEND_OP_SCREEN_EXT:
2150cb93a386Sopenharmony_ci		blendedColor.x = srcColor.x + dstColor.x - (srcColor.x * dstColor.x);
2151cb93a386Sopenharmony_ci		blendedColor.y = srcColor.y + dstColor.y - (srcColor.y * dstColor.y);
2152cb93a386Sopenharmony_ci		blendedColor.z = srcColor.z + dstColor.z - (srcColor.z * dstColor.z);
2153cb93a386Sopenharmony_ci		break;
2154cb93a386Sopenharmony_ci	case VK_BLEND_OP_OVERLAY_EXT:
2155cb93a386Sopenharmony_ci		blendedColor.x = blendOpOverlay(srcColor.x, dstColor.x);
2156cb93a386Sopenharmony_ci		blendedColor.y = blendOpOverlay(srcColor.y, dstColor.y);
2157cb93a386Sopenharmony_ci		blendedColor.z = blendOpOverlay(srcColor.z, dstColor.z);
2158cb93a386Sopenharmony_ci		break;
2159cb93a386Sopenharmony_ci	case VK_BLEND_OP_DARKEN_EXT:
2160cb93a386Sopenharmony_ci		blendedColor.x = Min(srcColor.x, dstColor.x);
2161cb93a386Sopenharmony_ci		blendedColor.y = Min(srcColor.y, dstColor.y);
2162cb93a386Sopenharmony_ci		blendedColor.z = Min(srcColor.z, dstColor.z);
2163cb93a386Sopenharmony_ci		break;
2164cb93a386Sopenharmony_ci	case VK_BLEND_OP_LIGHTEN_EXT:
2165cb93a386Sopenharmony_ci		blendedColor.x = Max(srcColor.x, dstColor.x);
2166cb93a386Sopenharmony_ci		blendedColor.y = Max(srcColor.y, dstColor.y);
2167cb93a386Sopenharmony_ci		blendedColor.z = Max(srcColor.z, dstColor.z);
2168cb93a386Sopenharmony_ci		break;
2169cb93a386Sopenharmony_ci	case VK_BLEND_OP_COLORDODGE_EXT:
2170cb93a386Sopenharmony_ci		blendedColor.x = blendOpColorDodge(srcColor.x, dstColor.x);
2171cb93a386Sopenharmony_ci		blendedColor.y = blendOpColorDodge(srcColor.y, dstColor.y);
2172cb93a386Sopenharmony_ci		blendedColor.z = blendOpColorDodge(srcColor.z, dstColor.z);
2173cb93a386Sopenharmony_ci		break;
2174cb93a386Sopenharmony_ci	case VK_BLEND_OP_COLORBURN_EXT:
2175cb93a386Sopenharmony_ci		blendedColor.x = blendOpColorBurn(srcColor.x, dstColor.x);
2176cb93a386Sopenharmony_ci		blendedColor.y = blendOpColorBurn(srcColor.y, dstColor.y);
2177cb93a386Sopenharmony_ci		blendedColor.z = blendOpColorBurn(srcColor.z, dstColor.z);
2178cb93a386Sopenharmony_ci		break;
2179cb93a386Sopenharmony_ci	case VK_BLEND_OP_HARDLIGHT_EXT:
2180cb93a386Sopenharmony_ci		blendedColor.x = blendOpHardlight(srcColor.x, dstColor.x);
2181cb93a386Sopenharmony_ci		blendedColor.y = blendOpHardlight(srcColor.y, dstColor.y);
2182cb93a386Sopenharmony_ci		blendedColor.z = blendOpHardlight(srcColor.z, dstColor.z);
2183cb93a386Sopenharmony_ci		break;
2184cb93a386Sopenharmony_ci	case VK_BLEND_OP_SOFTLIGHT_EXT:
2185cb93a386Sopenharmony_ci		blendedColor.x = blendOpSoftlight(srcColor.x, dstColor.x);
2186cb93a386Sopenharmony_ci		blendedColor.y = blendOpSoftlight(srcColor.y, dstColor.y);
2187cb93a386Sopenharmony_ci		blendedColor.z = blendOpSoftlight(srcColor.z, dstColor.z);
2188cb93a386Sopenharmony_ci		break;
2189cb93a386Sopenharmony_ci	case VK_BLEND_OP_DIFFERENCE_EXT:
2190cb93a386Sopenharmony_ci		blendedColor.x = Abs(srcColor.x - dstColor.x);
2191cb93a386Sopenharmony_ci		blendedColor.y = Abs(srcColor.y - dstColor.y);
2192cb93a386Sopenharmony_ci		blendedColor.z = Abs(srcColor.z - dstColor.z);
2193cb93a386Sopenharmony_ci		break;
2194cb93a386Sopenharmony_ci	case VK_BLEND_OP_EXCLUSION_EXT:
2195cb93a386Sopenharmony_ci		blendedColor.x = srcColor.x + dstColor.x - (srcColor.x * dstColor.x * Float4(2.0f));
2196cb93a386Sopenharmony_ci		blendedColor.y = srcColor.y + dstColor.y - (srcColor.y * dstColor.y * Float4(2.0f));
2197cb93a386Sopenharmony_ci		blendedColor.z = srcColor.z + dstColor.z - (srcColor.z * dstColor.z * Float4(2.0f));
2198cb93a386Sopenharmony_ci		break;
2199cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_HUE_EXT:
2200cb93a386Sopenharmony_ci		setLumSat(srcColor, dstColor, dstColor, blendedColor.x, blendedColor.y, blendedColor.z);
2201cb93a386Sopenharmony_ci		break;
2202cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_SATURATION_EXT:
2203cb93a386Sopenharmony_ci		setLumSat(dstColor, srcColor, dstColor, blendedColor.x, blendedColor.y, blendedColor.z);
2204cb93a386Sopenharmony_ci		break;
2205cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_COLOR_EXT:
2206cb93a386Sopenharmony_ci		setLum(srcColor, dstColor, blendedColor.x, blendedColor.y, blendedColor.z);
2207cb93a386Sopenharmony_ci		break;
2208cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_LUMINOSITY_EXT:
2209cb93a386Sopenharmony_ci		setLum(dstColor, srcColor, blendedColor.x, blendedColor.y, blendedColor.z);
2210cb93a386Sopenharmony_ci		break;
2211cb93a386Sopenharmony_ci	default:
2212cb93a386Sopenharmony_ci		UNSUPPORTED("Unsupported advanced VkBlendOp: %d", int(state.blendState[index].blendOperation));
2213cb93a386Sopenharmony_ci		break;
2214cb93a386Sopenharmony_ci	}
2215cb93a386Sopenharmony_ci
2216cb93a386Sopenharmony_ci	Float4 p = srcColor.w * dstColor.w;
2217cb93a386Sopenharmony_ci	blendedColor.x *= p;
2218cb93a386Sopenharmony_ci	blendedColor.y *= p;
2219cb93a386Sopenharmony_ci	blendedColor.z *= p;
2220cb93a386Sopenharmony_ci
2221cb93a386Sopenharmony_ci	p = srcColor.w * (Float4(1.0f) - dstColor.w);
2222cb93a386Sopenharmony_ci	blendedColor.x += srcColor.x * p;
2223cb93a386Sopenharmony_ci	blendedColor.y += srcColor.y * p;
2224cb93a386Sopenharmony_ci	blendedColor.z += srcColor.z * p;
2225cb93a386Sopenharmony_ci
2226cb93a386Sopenharmony_ci	p = dstColor.w * (Float4(1.0f) - srcColor.w);
2227cb93a386Sopenharmony_ci	blendedColor.x += dstColor.x * p;
2228cb93a386Sopenharmony_ci	blendedColor.y += dstColor.y * p;
2229cb93a386Sopenharmony_ci	blendedColor.z += dstColor.z * p;
2230cb93a386Sopenharmony_ci
2231cb93a386Sopenharmony_ci	return blendedColor;
2232cb93a386Sopenharmony_ci}
2233cb93a386Sopenharmony_ci
2234cb93a386Sopenharmony_cibool PixelRoutine::blendFactorCanExceedFormatRange(VkBlendFactor blendFactor, vk::Format format)
2235cb93a386Sopenharmony_ci{
2236cb93a386Sopenharmony_ci	switch(blendFactor)
2237cb93a386Sopenharmony_ci	{
2238cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ZERO:
2239cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE:
2240cb93a386Sopenharmony_ci		return false;
2241cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_COLOR:
2242cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA:
2243cb93a386Sopenharmony_ci		// Source values have been clamped after fragment shader execution if the attachment format is normalized.
2244cb93a386Sopenharmony_ci		return false;
2245cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_COLOR:
2246cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_DST_ALPHA:
2247cb93a386Sopenharmony_ci		// Dest values have a valid range due to being read from the attachment.
2248cb93a386Sopenharmony_ci		return false;
2249cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
2250cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
2251cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
2252cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
2253cb93a386Sopenharmony_ci		// For signed formats, negative values cause the result to exceed 1.0.
2254cb93a386Sopenharmony_ci		return format.isSignedNormalized();
2255cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
2256cb93a386Sopenharmony_ci		// min(As, 1 - Ad)
2257cb93a386Sopenharmony_ci		return false;
2258cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_COLOR:
2259cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
2260cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
2261cb93a386Sopenharmony_ci	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
2262cb93a386Sopenharmony_ci		return false;
2263cb93a386Sopenharmony_ci
2264cb93a386Sopenharmony_ci	default:
2265cb93a386Sopenharmony_ci		UNSUPPORTED("VkBlendFactor: %d", int(blendFactor));
2266cb93a386Sopenharmony_ci		return false;
2267cb93a386Sopenharmony_ci	}
2268cb93a386Sopenharmony_ci}
2269cb93a386Sopenharmony_ci
2270cb93a386Sopenharmony_ciVector4f PixelRoutine::alphaBlend(int index, const Pointer<Byte> &cBuffer, const Vector4f &sourceColor, const Int &x)
2271cb93a386Sopenharmony_ci{
2272cb93a386Sopenharmony_ci	if(!state.blendState[index].alphaBlendEnable)
2273cb93a386Sopenharmony_ci	{
2274cb93a386Sopenharmony_ci		return sourceColor;
2275cb93a386Sopenharmony_ci	}
2276cb93a386Sopenharmony_ci
2277cb93a386Sopenharmony_ci	vk::Format format = state.colorFormat[index];
2278cb93a386Sopenharmony_ci	ASSERT(format.supportsColorAttachmentBlend());
2279cb93a386Sopenharmony_ci
2280cb93a386Sopenharmony_ci	Pointer<Byte> buffer = cBuffer;
2281cb93a386Sopenharmony_ci	Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2282cb93a386Sopenharmony_ci
2283cb93a386Sopenharmony_ci	// destColor holds four texel color values.
2284cb93a386Sopenharmony_ci	// Note: Despite the type being Vector4f, the colors may be stored as
2285cb93a386Sopenharmony_ci	// integers. Half-floats are stored as full 32-bit floats.
2286cb93a386Sopenharmony_ci	// Non-float and non-fixed point formats are not alpha blended.
2287cb93a386Sopenharmony_ci	Vector4f destColor;
2288cb93a386Sopenharmony_ci
2289cb93a386Sopenharmony_ci	switch(format)
2290cb93a386Sopenharmony_ci	{
2291cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SINT:
2292cb93a386Sopenharmony_ci	case VK_FORMAT_R32_UINT:
2293cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SFLOAT:
2294cb93a386Sopenharmony_ci		// FIXME: movlps
2295cb93a386Sopenharmony_ci		buffer += 4 * x;
2296cb93a386Sopenharmony_ci		destColor.x.x = *Pointer<Float>(buffer + 0);
2297cb93a386Sopenharmony_ci		destColor.x.y = *Pointer<Float>(buffer + 4);
2298cb93a386Sopenharmony_ci		buffer += pitchB;
2299cb93a386Sopenharmony_ci		// FIXME: movhps
2300cb93a386Sopenharmony_ci		destColor.x.z = *Pointer<Float>(buffer + 0);
2301cb93a386Sopenharmony_ci		destColor.x.w = *Pointer<Float>(buffer + 4);
2302cb93a386Sopenharmony_ci		destColor.y = destColor.z = destColor.w = Float4(1.0f);
2303cb93a386Sopenharmony_ci		break;
2304cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SINT:
2305cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_UINT:
2306cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SFLOAT:
2307cb93a386Sopenharmony_ci		buffer += 8 * x;
2308cb93a386Sopenharmony_ci		destColor.x = *Pointer<Float4>(buffer, 16);
2309cb93a386Sopenharmony_ci		buffer += pitchB;
2310cb93a386Sopenharmony_ci		destColor.y = *Pointer<Float4>(buffer, 16);
2311cb93a386Sopenharmony_ci		destColor.z = destColor.x;
2312cb93a386Sopenharmony_ci		destColor.x = ShuffleLowHigh(destColor.x, destColor.y, 0x0202);
2313cb93a386Sopenharmony_ci		destColor.z = ShuffleLowHigh(destColor.z, destColor.y, 0x1313);
2314cb93a386Sopenharmony_ci		destColor.y = destColor.z;
2315cb93a386Sopenharmony_ci		destColor.z = destColor.w = Float4(1.0f);
2316cb93a386Sopenharmony_ci		break;
2317cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SFLOAT:
2318cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SINT:
2319cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_UINT:
2320cb93a386Sopenharmony_ci		buffer += 16 * x;
2321cb93a386Sopenharmony_ci		destColor.x = *Pointer<Float4>(buffer + 0, 16);
2322cb93a386Sopenharmony_ci		destColor.y = *Pointer<Float4>(buffer + 16, 16);
2323cb93a386Sopenharmony_ci		buffer += pitchB;
2324cb93a386Sopenharmony_ci		destColor.z = *Pointer<Float4>(buffer + 0, 16);
2325cb93a386Sopenharmony_ci		destColor.w = *Pointer<Float4>(buffer + 16, 16);
2326cb93a386Sopenharmony_ci		transpose4x4(destColor.x, destColor.y, destColor.z, destColor.w);
2327cb93a386Sopenharmony_ci		break;
2328cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UNORM:
2329cb93a386Sopenharmony_ci		buffer += 2 * x;
2330cb93a386Sopenharmony_ci		destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0)));
2331cb93a386Sopenharmony_ci		destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 2)));
2332cb93a386Sopenharmony_ci		buffer += pitchB;
2333cb93a386Sopenharmony_ci		destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0)));
2334cb93a386Sopenharmony_ci		destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 2)));
2335cb93a386Sopenharmony_ci		destColor.x *= Float4(1.0f / 0xFFFF);
2336cb93a386Sopenharmony_ci		destColor.y = destColor.z = destColor.w = Float4(1.0f);
2337cb93a386Sopenharmony_ci		break;
2338cb93a386Sopenharmony_ci	case VK_FORMAT_R16_SFLOAT:
2339cb93a386Sopenharmony_ci		buffer += 2 * x;
2340cb93a386Sopenharmony_ci		destColor.x.x = Float(*Pointer<Half>(buffer + 0));
2341cb93a386Sopenharmony_ci		destColor.x.y = Float(*Pointer<Half>(buffer + 2));
2342cb93a386Sopenharmony_ci		buffer += pitchB;
2343cb93a386Sopenharmony_ci		destColor.x.z = Float(*Pointer<Half>(buffer + 0));
2344cb93a386Sopenharmony_ci		destColor.x.w = Float(*Pointer<Half>(buffer + 2));
2345cb93a386Sopenharmony_ci		destColor.y = destColor.z = destColor.w = Float4(1.0f);
2346cb93a386Sopenharmony_ci		break;
2347cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UNORM:
2348cb93a386Sopenharmony_ci		buffer += 4 * x;
2349cb93a386Sopenharmony_ci		destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0)));
2350cb93a386Sopenharmony_ci		destColor.y.x = Float(Int(*Pointer<UShort>(buffer + 2)));
2351cb93a386Sopenharmony_ci		destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 4)));
2352cb93a386Sopenharmony_ci		destColor.y.y = Float(Int(*Pointer<UShort>(buffer + 6)));
2353cb93a386Sopenharmony_ci		buffer += pitchB;
2354cb93a386Sopenharmony_ci		destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0)));
2355cb93a386Sopenharmony_ci		destColor.y.z = Float(Int(*Pointer<UShort>(buffer + 2)));
2356cb93a386Sopenharmony_ci		destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 4)));
2357cb93a386Sopenharmony_ci		destColor.y.w = Float(Int(*Pointer<UShort>(buffer + 6)));
2358cb93a386Sopenharmony_ci		destColor.x *= Float4(1.0f / 0xFFFF);
2359cb93a386Sopenharmony_ci		destColor.y *= Float4(1.0f / 0xFFFF);
2360cb93a386Sopenharmony_ci		destColor.z = destColor.w = Float4(1.0f);
2361cb93a386Sopenharmony_ci		break;
2362cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_SFLOAT:
2363cb93a386Sopenharmony_ci		buffer += 4 * x;
2364cb93a386Sopenharmony_ci		destColor.x.x = Float(*Pointer<Half>(buffer + 0));
2365cb93a386Sopenharmony_ci		destColor.y.x = Float(*Pointer<Half>(buffer + 2));
2366cb93a386Sopenharmony_ci		destColor.x.y = Float(*Pointer<Half>(buffer + 4));
2367cb93a386Sopenharmony_ci		destColor.y.y = Float(*Pointer<Half>(buffer + 6));
2368cb93a386Sopenharmony_ci		buffer += pitchB;
2369cb93a386Sopenharmony_ci		destColor.x.z = Float(*Pointer<Half>(buffer + 0));
2370cb93a386Sopenharmony_ci		destColor.y.z = Float(*Pointer<Half>(buffer + 2));
2371cb93a386Sopenharmony_ci		destColor.x.w = Float(*Pointer<Half>(buffer + 4));
2372cb93a386Sopenharmony_ci		destColor.y.w = Float(*Pointer<Half>(buffer + 6));
2373cb93a386Sopenharmony_ci		destColor.z = destColor.w = Float4(1.0f);
2374cb93a386Sopenharmony_ci		break;
2375cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UNORM:
2376cb93a386Sopenharmony_ci		buffer += 8 * x;
2377cb93a386Sopenharmony_ci		destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0x0)));
2378cb93a386Sopenharmony_ci		destColor.y.x = Float(Int(*Pointer<UShort>(buffer + 0x2)));
2379cb93a386Sopenharmony_ci		destColor.z.x = Float(Int(*Pointer<UShort>(buffer + 0x4)));
2380cb93a386Sopenharmony_ci		destColor.w.x = Float(Int(*Pointer<UShort>(buffer + 0x6)));
2381cb93a386Sopenharmony_ci		destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 0x8)));
2382cb93a386Sopenharmony_ci		destColor.y.y = Float(Int(*Pointer<UShort>(buffer + 0xa)));
2383cb93a386Sopenharmony_ci		destColor.z.y = Float(Int(*Pointer<UShort>(buffer + 0xc)));
2384cb93a386Sopenharmony_ci		destColor.w.y = Float(Int(*Pointer<UShort>(buffer + 0xe)));
2385cb93a386Sopenharmony_ci		buffer += pitchB;
2386cb93a386Sopenharmony_ci		destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0x0)));
2387cb93a386Sopenharmony_ci		destColor.y.z = Float(Int(*Pointer<UShort>(buffer + 0x2)));
2388cb93a386Sopenharmony_ci		destColor.z.z = Float(Int(*Pointer<UShort>(buffer + 0x4)));
2389cb93a386Sopenharmony_ci		destColor.w.z = Float(Int(*Pointer<UShort>(buffer + 0x6)));
2390cb93a386Sopenharmony_ci		destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 0x8)));
2391cb93a386Sopenharmony_ci		destColor.y.w = Float(Int(*Pointer<UShort>(buffer + 0xa)));
2392cb93a386Sopenharmony_ci		destColor.z.w = Float(Int(*Pointer<UShort>(buffer + 0xc)));
2393cb93a386Sopenharmony_ci		destColor.w.w = Float(Int(*Pointer<UShort>(buffer + 0xe)));
2394cb93a386Sopenharmony_ci		destColor.x *= Float4(1.0f / 0xFFFF);
2395cb93a386Sopenharmony_ci		destColor.y *= Float4(1.0f / 0xFFFF);
2396cb93a386Sopenharmony_ci		destColor.z *= Float4(1.0f / 0xFFFF);
2397cb93a386Sopenharmony_ci		destColor.w *= Float4(1.0f / 0xFFFF);
2398cb93a386Sopenharmony_ci		break;
2399cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_SFLOAT:
2400cb93a386Sopenharmony_ci		buffer += 8 * x;
2401cb93a386Sopenharmony_ci		destColor.x.x = Float(*Pointer<Half>(buffer + 0x0));
2402cb93a386Sopenharmony_ci		destColor.y.x = Float(*Pointer<Half>(buffer + 0x2));
2403cb93a386Sopenharmony_ci		destColor.z.x = Float(*Pointer<Half>(buffer + 0x4));
2404cb93a386Sopenharmony_ci		destColor.w.x = Float(*Pointer<Half>(buffer + 0x6));
2405cb93a386Sopenharmony_ci		destColor.x.y = Float(*Pointer<Half>(buffer + 0x8));
2406cb93a386Sopenharmony_ci		destColor.y.y = Float(*Pointer<Half>(buffer + 0xa));
2407cb93a386Sopenharmony_ci		destColor.z.y = Float(*Pointer<Half>(buffer + 0xc));
2408cb93a386Sopenharmony_ci		destColor.w.y = Float(*Pointer<Half>(buffer + 0xe));
2409cb93a386Sopenharmony_ci		buffer += pitchB;
2410cb93a386Sopenharmony_ci		destColor.x.z = Float(*Pointer<Half>(buffer + 0x0));
2411cb93a386Sopenharmony_ci		destColor.y.z = Float(*Pointer<Half>(buffer + 0x2));
2412cb93a386Sopenharmony_ci		destColor.z.z = Float(*Pointer<Half>(buffer + 0x4));
2413cb93a386Sopenharmony_ci		destColor.w.z = Float(*Pointer<Half>(buffer + 0x6));
2414cb93a386Sopenharmony_ci		destColor.x.w = Float(*Pointer<Half>(buffer + 0x8));
2415cb93a386Sopenharmony_ci		destColor.y.w = Float(*Pointer<Half>(buffer + 0xa));
2416cb93a386Sopenharmony_ci		destColor.z.w = Float(*Pointer<Half>(buffer + 0xc));
2417cb93a386Sopenharmony_ci		destColor.w.w = Float(*Pointer<Half>(buffer + 0xe));
2418cb93a386Sopenharmony_ci		break;
2419cb93a386Sopenharmony_ci	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
2420cb93a386Sopenharmony_ci		buffer += 4 * x;
2421cb93a386Sopenharmony_ci		destColor.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
2422cb93a386Sopenharmony_ci		destColor.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
2423cb93a386Sopenharmony_ci		buffer += pitchB;
2424cb93a386Sopenharmony_ci		destColor.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
2425cb93a386Sopenharmony_ci		destColor.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
2426cb93a386Sopenharmony_ci		transpose4x3(destColor.x, destColor.y, destColor.z, destColor.w);
2427cb93a386Sopenharmony_ci		destColor.w = Float4(1.0f);
2428cb93a386Sopenharmony_ci		break;
2429cb93a386Sopenharmony_ci	default:
2430cb93a386Sopenharmony_ci		{
2431cb93a386Sopenharmony_ci			// Attempt to read an integer based format and convert it to float
2432cb93a386Sopenharmony_ci			Vector4s color;
2433cb93a386Sopenharmony_ci			readPixel(index, cBuffer, x, color);
2434cb93a386Sopenharmony_ci			destColor.x = convertFloat32(As<UShort4>(color.x));
2435cb93a386Sopenharmony_ci			destColor.y = convertFloat32(As<UShort4>(color.y));
2436cb93a386Sopenharmony_ci			destColor.z = convertFloat32(As<UShort4>(color.z));
2437cb93a386Sopenharmony_ci			destColor.w = convertFloat32(As<UShort4>(color.w));
2438cb93a386Sopenharmony_ci		}
2439cb93a386Sopenharmony_ci		break;
2440cb93a386Sopenharmony_ci	}
2441cb93a386Sopenharmony_ci
2442cb93a386Sopenharmony_ci	Vector4f sourceFactor;
2443cb93a386Sopenharmony_ci	Vector4f destFactor;
2444cb93a386Sopenharmony_ci
2445cb93a386Sopenharmony_ci	blendFactorRGB(sourceFactor, sourceColor, destColor, state.blendState[index].sourceBlendFactor, format);
2446cb93a386Sopenharmony_ci	blendFactorRGB(destFactor, sourceColor, destColor, state.blendState[index].destBlendFactor, format);
2447cb93a386Sopenharmony_ci	blendFactorAlpha(sourceFactor.w, sourceColor.w, destColor.w, state.blendState[index].sourceBlendFactorAlpha, format);
2448cb93a386Sopenharmony_ci	blendFactorAlpha(destFactor.w, sourceColor.w, destColor.w, state.blendState[index].destBlendFactorAlpha, format);
2449cb93a386Sopenharmony_ci
2450cb93a386Sopenharmony_ci	Vector4f blendedColor;
2451cb93a386Sopenharmony_ci
2452cb93a386Sopenharmony_ci	switch(state.blendState[index].blendOperation)
2453cb93a386Sopenharmony_ci	{
2454cb93a386Sopenharmony_ci	case VK_BLEND_OP_ADD:
2455cb93a386Sopenharmony_ci		blendedColor.x = sourceColor.x * sourceFactor.x + destColor.x * destFactor.x;
2456cb93a386Sopenharmony_ci		blendedColor.y = sourceColor.y * sourceFactor.y + destColor.y * destFactor.y;
2457cb93a386Sopenharmony_ci		blendedColor.z = sourceColor.z * sourceFactor.z + destColor.z * destFactor.z;
2458cb93a386Sopenharmony_ci		break;
2459cb93a386Sopenharmony_ci	case VK_BLEND_OP_SUBTRACT:
2460cb93a386Sopenharmony_ci		blendedColor.x = sourceColor.x * sourceFactor.x - destColor.x * destFactor.x;
2461cb93a386Sopenharmony_ci		blendedColor.y = sourceColor.y * sourceFactor.y - destColor.y * destFactor.y;
2462cb93a386Sopenharmony_ci		blendedColor.z = sourceColor.z * sourceFactor.z - destColor.z * destFactor.z;
2463cb93a386Sopenharmony_ci		break;
2464cb93a386Sopenharmony_ci	case VK_BLEND_OP_REVERSE_SUBTRACT:
2465cb93a386Sopenharmony_ci		blendedColor.x = destColor.x * destFactor.x - sourceColor.x * sourceFactor.x;
2466cb93a386Sopenharmony_ci		blendedColor.y = destColor.y * destFactor.y - sourceColor.y * sourceFactor.y;
2467cb93a386Sopenharmony_ci		blendedColor.z = destColor.z * destFactor.z - sourceColor.z * sourceFactor.z;
2468cb93a386Sopenharmony_ci		break;
2469cb93a386Sopenharmony_ci	case VK_BLEND_OP_MIN:
2470cb93a386Sopenharmony_ci		blendedColor.x = Min(sourceColor.x, destColor.x);
2471cb93a386Sopenharmony_ci		blendedColor.y = Min(sourceColor.y, destColor.y);
2472cb93a386Sopenharmony_ci		blendedColor.z = Min(sourceColor.z, destColor.z);
2473cb93a386Sopenharmony_ci		break;
2474cb93a386Sopenharmony_ci	case VK_BLEND_OP_MAX:
2475cb93a386Sopenharmony_ci		blendedColor.x = Max(sourceColor.x, destColor.x);
2476cb93a386Sopenharmony_ci		blendedColor.y = Max(sourceColor.y, destColor.y);
2477cb93a386Sopenharmony_ci		blendedColor.z = Max(sourceColor.z, destColor.z);
2478cb93a386Sopenharmony_ci		break;
2479cb93a386Sopenharmony_ci	case VK_BLEND_OP_SRC_EXT:
2480cb93a386Sopenharmony_ci		blendedColor.x = sourceColor.x * sourceFactor.x;  // TODO(b/204583457)
2481cb93a386Sopenharmony_ci		blendedColor.y = sourceColor.y * sourceFactor.y;  // TODO(b/204583457)
2482cb93a386Sopenharmony_ci		blendedColor.z = sourceColor.z * sourceFactor.z;  // TODO(b/204583457)
2483cb93a386Sopenharmony_ci		break;
2484cb93a386Sopenharmony_ci	case VK_BLEND_OP_DST_EXT:
2485cb93a386Sopenharmony_ci		blendedColor.x = destColor.x * destFactor.x;  // TODO(b/204583457)
2486cb93a386Sopenharmony_ci		blendedColor.y = destColor.y * destFactor.y;  // TODO(b/204583457)
2487cb93a386Sopenharmony_ci		blendedColor.z = destColor.z * destFactor.z;  // TODO(b/204583457)
2488cb93a386Sopenharmony_ci		break;
2489cb93a386Sopenharmony_ci	case VK_BLEND_OP_ZERO_EXT:
2490cb93a386Sopenharmony_ci		blendedColor.x = Float4(0.0f);
2491cb93a386Sopenharmony_ci		blendedColor.y = Float4(0.0f);
2492cb93a386Sopenharmony_ci		blendedColor.z = Float4(0.0f);
2493cb93a386Sopenharmony_ci		break;
2494cb93a386Sopenharmony_ci	case VK_BLEND_OP_MULTIPLY_EXT:
2495cb93a386Sopenharmony_ci	case VK_BLEND_OP_SCREEN_EXT:
2496cb93a386Sopenharmony_ci	case VK_BLEND_OP_OVERLAY_EXT:
2497cb93a386Sopenharmony_ci	case VK_BLEND_OP_DARKEN_EXT:
2498cb93a386Sopenharmony_ci	case VK_BLEND_OP_LIGHTEN_EXT:
2499cb93a386Sopenharmony_ci	case VK_BLEND_OP_COLORDODGE_EXT:
2500cb93a386Sopenharmony_ci	case VK_BLEND_OP_COLORBURN_EXT:
2501cb93a386Sopenharmony_ci	case VK_BLEND_OP_HARDLIGHT_EXT:
2502cb93a386Sopenharmony_ci	case VK_BLEND_OP_SOFTLIGHT_EXT:
2503cb93a386Sopenharmony_ci	case VK_BLEND_OP_DIFFERENCE_EXT:
2504cb93a386Sopenharmony_ci	case VK_BLEND_OP_EXCLUSION_EXT:
2505cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_HUE_EXT:
2506cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_SATURATION_EXT:
2507cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_COLOR_EXT:
2508cb93a386Sopenharmony_ci	case VK_BLEND_OP_HSL_LUMINOSITY_EXT:
2509cb93a386Sopenharmony_ci		blendedColor = computeAdvancedBlendMode(index, sourceColor, destColor, sourceFactor, destFactor);
2510cb93a386Sopenharmony_ci		break;
2511cb93a386Sopenharmony_ci	default:
2512cb93a386Sopenharmony_ci		UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
2513cb93a386Sopenharmony_ci	}
2514cb93a386Sopenharmony_ci
2515cb93a386Sopenharmony_ci	switch(state.blendState[index].blendOperationAlpha)
2516cb93a386Sopenharmony_ci	{
2517cb93a386Sopenharmony_ci	case VK_BLEND_OP_ADD:
2518cb93a386Sopenharmony_ci		blendedColor.w = sourceColor.w * sourceFactor.w + destColor.w * destFactor.w;
2519cb93a386Sopenharmony_ci		break;
2520cb93a386Sopenharmony_ci	case VK_BLEND_OP_SUBTRACT:
2521cb93a386Sopenharmony_ci		blendedColor.w = sourceColor.w * sourceFactor.w - destColor.w * destFactor.w;
2522cb93a386Sopenharmony_ci		break;
2523cb93a386Sopenharmony_ci	case VK_BLEND_OP_REVERSE_SUBTRACT:
2524cb93a386Sopenharmony_ci		blendedColor.w = destColor.w * destFactor.w - sourceColor.w * sourceFactor.w;
2525cb93a386Sopenharmony_ci		break;
2526cb93a386Sopenharmony_ci	case VK_BLEND_OP_MIN:
2527cb93a386Sopenharmony_ci		blendedColor.w = Min(sourceColor.w, destColor.w);
2528cb93a386Sopenharmony_ci		break;
2529cb93a386Sopenharmony_ci	case VK_BLEND_OP_MAX:
2530cb93a386Sopenharmony_ci		blendedColor.w = Max(sourceColor.w, destColor.w);
2531cb93a386Sopenharmony_ci		break;
2532cb93a386Sopenharmony_ci	case VK_BLEND_OP_SRC_EXT:
2533cb93a386Sopenharmony_ci		blendedColor.w = sourceColor.w * sourceFactor.w;  // TODO(b/204583457)
2534cb93a386Sopenharmony_ci		break;
2535cb93a386Sopenharmony_ci	case VK_BLEND_OP_DST_EXT:
2536cb93a386Sopenharmony_ci		blendedColor.w = destColor.w * destFactor.w;  // TODO(b/204583457)
2537cb93a386Sopenharmony_ci		break;
2538cb93a386Sopenharmony_ci	case VK_BLEND_OP_ZERO_EXT:
2539cb93a386Sopenharmony_ci		blendedColor.w = Float4(0.0f);
2540cb93a386Sopenharmony_ci		break;
2541cb93a386Sopenharmony_ci	case VK_BLEND_OP_MULTIPLY_EXT:
2542cb93a386Sopenharmony_ci		// All of the currently supported advanced blend modes compute the alpha the same way
2543cb93a386Sopenharmony_ci		// Use VK_BLEND_OP_MULTIPLY_EXT as a placeholder
2544cb93a386Sopenharmony_ci		blendedColor.w = sourceColor.w + destColor.w - (sourceColor.w * destColor.w);
2545cb93a386Sopenharmony_ci		break;
2546cb93a386Sopenharmony_ci	default:
2547cb93a386Sopenharmony_ci		UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
2548cb93a386Sopenharmony_ci	}
2549cb93a386Sopenharmony_ci
2550cb93a386Sopenharmony_ci	return blendedColor;
2551cb93a386Sopenharmony_ci}
2552cb93a386Sopenharmony_ci
2553cb93a386Sopenharmony_civoid PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &color, const Int &sMask, const Int &zMask, const Int &cMask)
2554cb93a386Sopenharmony_ci{
2555cb93a386Sopenharmony_ci	vk::Format format = state.colorFormat[index];
2556cb93a386Sopenharmony_ci	switch(format)
2557cb93a386Sopenharmony_ci	{
2558cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UNORM:
2559cb93a386Sopenharmony_ci		color.w = Min(Max(color.w, Float4(0.0f)), Float4(1.0f));  // TODO(b/204560089): Omit clamp if redundant
2560cb93a386Sopenharmony_ci		color.w = As<Float4>(RoundInt(color.w * Float4(0xFFFF)));
2561cb93a386Sopenharmony_ci		color.z = Min(Max(color.z, Float4(0.0f)), Float4(1.0f));  // TODO(b/204560089): Omit clamp if redundant
2562cb93a386Sopenharmony_ci		color.z = As<Float4>(RoundInt(color.z * Float4(0xFFFF)));
2563cb93a386Sopenharmony_ci		// [[fallthrough]]
2564cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UNORM:
2565cb93a386Sopenharmony_ci		color.y = Min(Max(color.y, Float4(0.0f)), Float4(1.0f));  // TODO(b/204560089): Omit clamp if redundant
2566cb93a386Sopenharmony_ci		color.y = As<Float4>(RoundInt(color.y * Float4(0xFFFF)));
2567cb93a386Sopenharmony_ci		//[[fallthrough]]
2568cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UNORM:
2569cb93a386Sopenharmony_ci		color.x = Min(Max(color.x, Float4(0.0f)), Float4(1.0f));  // TODO(b/204560089): Omit clamp if redundant
2570cb93a386Sopenharmony_ci		color.x = As<Float4>(RoundInt(color.x * Float4(0xFFFF)));
2571cb93a386Sopenharmony_ci		break;
2572cb93a386Sopenharmony_ci	default:
2573cb93a386Sopenharmony_ci		// TODO(b/204560089): Omit clamp if redundant
2574cb93a386Sopenharmony_ci		if(format.isUnsignedNormalized())
2575cb93a386Sopenharmony_ci		{
2576cb93a386Sopenharmony_ci			color.x = Min(Max(color.x, Float4(0.0f)), Float4(1.0f));
2577cb93a386Sopenharmony_ci			color.y = Min(Max(color.y, Float4(0.0f)), Float4(1.0f));
2578cb93a386Sopenharmony_ci			color.z = Min(Max(color.z, Float4(0.0f)), Float4(1.0f));
2579cb93a386Sopenharmony_ci			color.w = Min(Max(color.w, Float4(0.0f)), Float4(1.0f));
2580cb93a386Sopenharmony_ci		}
2581cb93a386Sopenharmony_ci		else if(format.isSignedNormalized())
2582cb93a386Sopenharmony_ci		{
2583cb93a386Sopenharmony_ci			color.x = Min(Max(color.x, Float4(-1.0f)), Float4(1.0f));
2584cb93a386Sopenharmony_ci			color.y = Min(Max(color.y, Float4(-1.0f)), Float4(1.0f));
2585cb93a386Sopenharmony_ci			color.z = Min(Max(color.z, Float4(-1.0f)), Float4(1.0f));
2586cb93a386Sopenharmony_ci			color.w = Min(Max(color.w, Float4(-1.0f)), Float4(1.0f));
2587cb93a386Sopenharmony_ci		}
2588cb93a386Sopenharmony_ci	}
2589cb93a386Sopenharmony_ci
2590cb93a386Sopenharmony_ci	switch(format)
2591cb93a386Sopenharmony_ci	{
2592cb93a386Sopenharmony_ci	case VK_FORMAT_R16_SFLOAT:
2593cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SFLOAT:
2594cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SINT:
2595cb93a386Sopenharmony_ci	case VK_FORMAT_R32_UINT:
2596cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UNORM:
2597cb93a386Sopenharmony_ci	case VK_FORMAT_R16_SINT:
2598cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UINT:
2599cb93a386Sopenharmony_ci	case VK_FORMAT_R8_SINT:
2600cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UINT:
2601cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2602cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
2603cb93a386Sopenharmony_ci		break;
2604cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_SFLOAT:
2605cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SFLOAT:
2606cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SINT:
2607cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_UINT:
2608cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UNORM:
2609cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_SINT:
2610cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UINT:
2611cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_SINT:
2612cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UINT:
2613cb93a386Sopenharmony_ci		color.z = color.x;
2614cb93a386Sopenharmony_ci		color.x = UnpackLow(color.x, color.y);
2615cb93a386Sopenharmony_ci		color.z = UnpackHigh(color.z, color.y);
2616cb93a386Sopenharmony_ci		color.y = color.z;
2617cb93a386Sopenharmony_ci		break;
2618cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_SFLOAT:
2619cb93a386Sopenharmony_ci	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
2620cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SFLOAT:
2621cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SINT:
2622cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_UINT:
2623cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UNORM:
2624cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_SINT:
2625cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UINT:
2626cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SINT:
2627cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UINT:
2628cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2629cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
2630cb93a386Sopenharmony_ci		transpose4x4(color.x, color.y, color.z, color.w);
2631cb93a386Sopenharmony_ci		break;
2632cb93a386Sopenharmony_ci	default:
2633cb93a386Sopenharmony_ci		UNSUPPORTED("VkFormat: %d", int(format));
2634cb93a386Sopenharmony_ci	}
2635cb93a386Sopenharmony_ci
2636cb93a386Sopenharmony_ci	int rgbaWriteMask = state.colorWriteActive(index);
2637cb93a386Sopenharmony_ci	int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
2638cb93a386Sopenharmony_ci
2639cb93a386Sopenharmony_ci	Int xMask;  // Combination of all masks
2640cb93a386Sopenharmony_ci
2641cb93a386Sopenharmony_ci	if(state.depthTestActive)
2642cb93a386Sopenharmony_ci	{
2643cb93a386Sopenharmony_ci		xMask = zMask;
2644cb93a386Sopenharmony_ci	}
2645cb93a386Sopenharmony_ci	else
2646cb93a386Sopenharmony_ci	{
2647cb93a386Sopenharmony_ci		xMask = cMask;
2648cb93a386Sopenharmony_ci	}
2649cb93a386Sopenharmony_ci
2650cb93a386Sopenharmony_ci	if(state.stencilActive)
2651cb93a386Sopenharmony_ci	{
2652cb93a386Sopenharmony_ci		xMask &= sMask;
2653cb93a386Sopenharmony_ci	}
2654cb93a386Sopenharmony_ci
2655cb93a386Sopenharmony_ci	Pointer<Byte> buffer = cBuffer;
2656cb93a386Sopenharmony_ci	Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2657cb93a386Sopenharmony_ci	Float4 value;
2658cb93a386Sopenharmony_ci
2659cb93a386Sopenharmony_ci	switch(format)
2660cb93a386Sopenharmony_ci	{
2661cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SFLOAT:
2662cb93a386Sopenharmony_ci	case VK_FORMAT_R32_SINT:
2663cb93a386Sopenharmony_ci	case VK_FORMAT_R32_UINT:
2664cb93a386Sopenharmony_ci		if(rgbaWriteMask & 0x00000001)
2665cb93a386Sopenharmony_ci		{
2666cb93a386Sopenharmony_ci			buffer += 4 * x;
2667cb93a386Sopenharmony_ci
2668cb93a386Sopenharmony_ci			// FIXME: movlps
2669cb93a386Sopenharmony_ci			value.x = *Pointer<Float>(buffer + 0);
2670cb93a386Sopenharmony_ci			value.y = *Pointer<Float>(buffer + 4);
2671cb93a386Sopenharmony_ci
2672cb93a386Sopenharmony_ci			buffer += pitchB;
2673cb93a386Sopenharmony_ci
2674cb93a386Sopenharmony_ci			// FIXME: movhps
2675cb93a386Sopenharmony_ci			value.z = *Pointer<Float>(buffer + 0);
2676cb93a386Sopenharmony_ci			value.w = *Pointer<Float>(buffer + 4);
2677cb93a386Sopenharmony_ci
2678cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2679cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2680cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(value));
2681cb93a386Sopenharmony_ci
2682cb93a386Sopenharmony_ci			// FIXME: movhps
2683cb93a386Sopenharmony_ci			*Pointer<Float>(buffer + 0) = color.x.z;
2684cb93a386Sopenharmony_ci			*Pointer<Float>(buffer + 4) = color.x.w;
2685cb93a386Sopenharmony_ci
2686cb93a386Sopenharmony_ci			buffer -= pitchB;
2687cb93a386Sopenharmony_ci
2688cb93a386Sopenharmony_ci			// FIXME: movlps
2689cb93a386Sopenharmony_ci			*Pointer<Float>(buffer + 0) = color.x.x;
2690cb93a386Sopenharmony_ci			*Pointer<Float>(buffer + 4) = color.x.y;
2691cb93a386Sopenharmony_ci		}
2692cb93a386Sopenharmony_ci		break;
2693cb93a386Sopenharmony_ci	case VK_FORMAT_R16_SFLOAT:
2694cb93a386Sopenharmony_ci		if(rgbaWriteMask & 0x00000001)
2695cb93a386Sopenharmony_ci		{
2696cb93a386Sopenharmony_ci			buffer += 2 * x;
2697cb93a386Sopenharmony_ci
2698cb93a386Sopenharmony_ci			value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
2699cb93a386Sopenharmony_ci			value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
2700cb93a386Sopenharmony_ci
2701cb93a386Sopenharmony_ci			buffer += pitchB;
2702cb93a386Sopenharmony_ci
2703cb93a386Sopenharmony_ci			value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
2704cb93a386Sopenharmony_ci			value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
2705cb93a386Sopenharmony_ci
2706cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2707cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2708cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(value));
2709cb93a386Sopenharmony_ci
2710cb93a386Sopenharmony_ci			*Pointer<Half>(buffer + 0) = Half(color.x.z);
2711cb93a386Sopenharmony_ci			*Pointer<Half>(buffer + 2) = Half(color.x.w);
2712cb93a386Sopenharmony_ci
2713cb93a386Sopenharmony_ci			buffer -= pitchB;
2714cb93a386Sopenharmony_ci
2715cb93a386Sopenharmony_ci			*Pointer<Half>(buffer + 0) = Half(color.x.x);
2716cb93a386Sopenharmony_ci			*Pointer<Half>(buffer + 2) = Half(color.x.y);
2717cb93a386Sopenharmony_ci		}
2718cb93a386Sopenharmony_ci		break;
2719cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UNORM:
2720cb93a386Sopenharmony_ci	case VK_FORMAT_R16_SINT:
2721cb93a386Sopenharmony_ci	case VK_FORMAT_R16_UINT:
2722cb93a386Sopenharmony_ci		if(rgbaWriteMask & 0x00000001)
2723cb93a386Sopenharmony_ci		{
2724cb93a386Sopenharmony_ci			buffer += 2 * x;
2725cb93a386Sopenharmony_ci
2726cb93a386Sopenharmony_ci			UShort4 xyzw;
2727cb93a386Sopenharmony_ci			xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2728cb93a386Sopenharmony_ci
2729cb93a386Sopenharmony_ci			buffer += pitchB;
2730cb93a386Sopenharmony_ci
2731cb93a386Sopenharmony_ci			xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2732cb93a386Sopenharmony_ci			value = As<Float4>(Int4(xyzw));
2733cb93a386Sopenharmony_ci
2734cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2735cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2736cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(value));
2737cb93a386Sopenharmony_ci
2738cb93a386Sopenharmony_ci			Float component = color.x.z;
2739cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2740cb93a386Sopenharmony_ci			component = color.x.w;
2741cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2742cb93a386Sopenharmony_ci
2743cb93a386Sopenharmony_ci			buffer -= pitchB;
2744cb93a386Sopenharmony_ci
2745cb93a386Sopenharmony_ci			component = color.x.x;
2746cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2747cb93a386Sopenharmony_ci			component = color.x.y;
2748cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2749cb93a386Sopenharmony_ci		}
2750cb93a386Sopenharmony_ci		break;
2751cb93a386Sopenharmony_ci	case VK_FORMAT_R8_SINT:
2752cb93a386Sopenharmony_ci	case VK_FORMAT_R8_UINT:
2753cb93a386Sopenharmony_ci		if(rgbaWriteMask & 0x00000001)
2754cb93a386Sopenharmony_ci		{
2755cb93a386Sopenharmony_ci			buffer += x;
2756cb93a386Sopenharmony_ci
2757cb93a386Sopenharmony_ci			UInt xyzw, packedCol;
2758cb93a386Sopenharmony_ci
2759cb93a386Sopenharmony_ci			xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
2760cb93a386Sopenharmony_ci			buffer += pitchB;
2761cb93a386Sopenharmony_ci			xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
2762cb93a386Sopenharmony_ci
2763cb93a386Sopenharmony_ci			Short4 tmpCol = Short4(As<Int4>(color.x));
2764cb93a386Sopenharmony_ci			if(format == VK_FORMAT_R8_SINT)
2765cb93a386Sopenharmony_ci			{
2766cb93a386Sopenharmony_ci				tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
2767cb93a386Sopenharmony_ci			}
2768cb93a386Sopenharmony_ci			else
2769cb93a386Sopenharmony_ci			{
2770cb93a386Sopenharmony_ci				tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
2771cb93a386Sopenharmony_ci			}
2772cb93a386Sopenharmony_ci			packedCol = Extract(As<Int2>(tmpCol), 0);
2773cb93a386Sopenharmony_ci
2774cb93a386Sopenharmony_ci			packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2775cb93a386Sopenharmony_ci			            (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2776cb93a386Sopenharmony_ci
2777cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2778cb93a386Sopenharmony_ci			buffer -= pitchB;
2779cb93a386Sopenharmony_ci			*Pointer<UShort>(buffer) = UShort(packedCol);
2780cb93a386Sopenharmony_ci		}
2781cb93a386Sopenharmony_ci		break;
2782cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SFLOAT:
2783cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_SINT:
2784cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32_UINT:
2785cb93a386Sopenharmony_ci		buffer += 8 * x;
2786cb93a386Sopenharmony_ci
2787cb93a386Sopenharmony_ci		value = *Pointer<Float4>(buffer);
2788cb93a386Sopenharmony_ci
2789cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x00000003)
2790cb93a386Sopenharmony_ci		{
2791cb93a386Sopenharmony_ci			Float4 masked = value;
2792cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
2793cb93a386Sopenharmony_ci			masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
2794cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(masked));
2795cb93a386Sopenharmony_ci		}
2796cb93a386Sopenharmony_ci
2797cb93a386Sopenharmony_ci		color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16, 16));
2798cb93a386Sopenharmony_ci		value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ01X) + xMask * 16, 16));
2799cb93a386Sopenharmony_ci		color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(value));
2800cb93a386Sopenharmony_ci		*Pointer<Float4>(buffer) = color.x;
2801cb93a386Sopenharmony_ci
2802cb93a386Sopenharmony_ci		buffer += pitchB;
2803cb93a386Sopenharmony_ci
2804cb93a386Sopenharmony_ci		value = *Pointer<Float4>(buffer);
2805cb93a386Sopenharmony_ci
2806cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x00000003)
2807cb93a386Sopenharmony_ci		{
2808cb93a386Sopenharmony_ci			Float4 masked;
2809cb93a386Sopenharmony_ci
2810cb93a386Sopenharmony_ci			masked = value;
2811cb93a386Sopenharmony_ci			color.y = As<Float4>(As<Int4>(color.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
2812cb93a386Sopenharmony_ci			masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
2813cb93a386Sopenharmony_ci			color.y = As<Float4>(As<Int4>(color.y) | As<Int4>(masked));
2814cb93a386Sopenharmony_ci		}
2815cb93a386Sopenharmony_ci
2816cb93a386Sopenharmony_ci		color.y = As<Float4>(As<Int4>(color.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16, 16));
2817cb93a386Sopenharmony_ci		value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ23X) + xMask * 16, 16));
2818cb93a386Sopenharmony_ci		color.y = As<Float4>(As<Int4>(color.y) | As<Int4>(value));
2819cb93a386Sopenharmony_ci		*Pointer<Float4>(buffer) = color.y;
2820cb93a386Sopenharmony_ci		break;
2821cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_SFLOAT:
2822cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x0)
2823cb93a386Sopenharmony_ci		{
2824cb93a386Sopenharmony_ci			buffer += 4 * x;
2825cb93a386Sopenharmony_ci
2826cb93a386Sopenharmony_ci			UInt2 rgbaMask;
2827cb93a386Sopenharmony_ci			UInt2 packedCol;
2828cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.x.y))) << 16) | UInt(As<UShort>(Half(color.x.x))), 0);
2829cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.x.w))) << 16) | UInt(As<UShort>(Half(color.x.z))), 1);
2830cb93a386Sopenharmony_ci
2831cb93a386Sopenharmony_ci			UShort4 value = *Pointer<UShort4>(buffer);
2832cb93a386Sopenharmony_ci			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2833cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
2834cb93a386Sopenharmony_ci			{
2835cb93a386Sopenharmony_ci				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2836cb93a386Sopenharmony_ci				rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2837cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
2838cb93a386Sopenharmony_ci			}
2839cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2840cb93a386Sopenharmony_ci
2841cb93a386Sopenharmony_ci			buffer += pitchB;
2842cb93a386Sopenharmony_ci
2843cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.y.y))) << 16) | UInt(As<UShort>(Half(color.y.x))), 0);
2844cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.y.w))) << 16) | UInt(As<UShort>(Half(color.y.z))), 1);
2845cb93a386Sopenharmony_ci			value = *Pointer<UShort4>(buffer);
2846cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2847cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
2848cb93a386Sopenharmony_ci			{
2849cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
2850cb93a386Sopenharmony_ci			}
2851cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2852cb93a386Sopenharmony_ci		}
2853cb93a386Sopenharmony_ci		break;
2854cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UNORM:
2855cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_SINT:
2856cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16_UINT:
2857cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x0)
2858cb93a386Sopenharmony_ci		{
2859cb93a386Sopenharmony_ci			buffer += 4 * x;
2860cb93a386Sopenharmony_ci
2861cb93a386Sopenharmony_ci			UInt2 rgbaMask;
2862cb93a386Sopenharmony_ci			UShort4 packedCol = UShort4(As<Int4>(color.x));
2863cb93a386Sopenharmony_ci			UShort4 value = *Pointer<UShort4>(buffer);
2864cb93a386Sopenharmony_ci			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2865cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
2866cb93a386Sopenharmony_ci			{
2867cb93a386Sopenharmony_ci				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2868cb93a386Sopenharmony_ci				rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2869cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
2870cb93a386Sopenharmony_ci			}
2871cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2872cb93a386Sopenharmony_ci
2873cb93a386Sopenharmony_ci			buffer += pitchB;
2874cb93a386Sopenharmony_ci
2875cb93a386Sopenharmony_ci			packedCol = UShort4(As<Int4>(color.y));
2876cb93a386Sopenharmony_ci			value = *Pointer<UShort4>(buffer);
2877cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2878cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
2879cb93a386Sopenharmony_ci			{
2880cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
2881cb93a386Sopenharmony_ci			}
2882cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2883cb93a386Sopenharmony_ci		}
2884cb93a386Sopenharmony_ci		break;
2885cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_SINT:
2886cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8_UINT:
2887cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x00000003) != 0x0)
2888cb93a386Sopenharmony_ci		{
2889cb93a386Sopenharmony_ci			buffer += 2 * x;
2890cb93a386Sopenharmony_ci
2891cb93a386Sopenharmony_ci			Int2 xyzw, packedCol;
2892cb93a386Sopenharmony_ci
2893cb93a386Sopenharmony_ci			xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2894cb93a386Sopenharmony_ci			buffer += pitchB;
2895cb93a386Sopenharmony_ci			xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2896cb93a386Sopenharmony_ci
2897cb93a386Sopenharmony_ci			if(format == VK_FORMAT_R8G8_SINT)
2898cb93a386Sopenharmony_ci			{
2899cb93a386Sopenharmony_ci				packedCol = As<Int2>(PackSigned(Short4(As<Int4>(color.x)), Short4(As<Int4>(color.y))));
2900cb93a386Sopenharmony_ci			}
2901cb93a386Sopenharmony_ci			else
2902cb93a386Sopenharmony_ci			{
2903cb93a386Sopenharmony_ci				packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(color.x)), Short4(As<Int4>(color.y))));
2904cb93a386Sopenharmony_ci			}
2905cb93a386Sopenharmony_ci
2906cb93a386Sopenharmony_ci			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2907cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x3) != 0x3)
2908cb93a386Sopenharmony_ci			{
2909cb93a386Sopenharmony_ci				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2910cb93a386Sopenharmony_ci				UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2911cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
2912cb93a386Sopenharmony_ci			}
2913cb93a386Sopenharmony_ci
2914cb93a386Sopenharmony_ci			packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2915cb93a386Sopenharmony_ci
2916cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2917cb93a386Sopenharmony_ci			buffer -= pitchB;
2918cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2919cb93a386Sopenharmony_ci		}
2920cb93a386Sopenharmony_ci		break;
2921cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SFLOAT:
2922cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_SINT:
2923cb93a386Sopenharmony_ci	case VK_FORMAT_R32G32B32A32_UINT:
2924cb93a386Sopenharmony_ci		buffer += 16 * x;
2925cb93a386Sopenharmony_ci
2926cb93a386Sopenharmony_ci		{
2927cb93a386Sopenharmony_ci			value = *Pointer<Float4>(buffer, 16);
2928cb93a386Sopenharmony_ci
2929cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
2930cb93a386Sopenharmony_ci			{
2931cb93a386Sopenharmony_ci				Float4 masked = value;
2932cb93a386Sopenharmony_ci				color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2933cb93a386Sopenharmony_ci				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2934cb93a386Sopenharmony_ci				color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(masked));
2935cb93a386Sopenharmony_ci			}
2936cb93a386Sopenharmony_ci
2937cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskX0X) + xMask * 16, 16));
2938cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX0X) + xMask * 16, 16));
2939cb93a386Sopenharmony_ci			color.x = As<Float4>(As<Int4>(color.x) | As<Int4>(value));
2940cb93a386Sopenharmony_ci			*Pointer<Float4>(buffer, 16) = color.x;
2941cb93a386Sopenharmony_ci		}
2942cb93a386Sopenharmony_ci
2943cb93a386Sopenharmony_ci		{
2944cb93a386Sopenharmony_ci			value = *Pointer<Float4>(buffer + 16, 16);
2945cb93a386Sopenharmony_ci
2946cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
2947cb93a386Sopenharmony_ci			{
2948cb93a386Sopenharmony_ci				Float4 masked = value;
2949cb93a386Sopenharmony_ci				color.y = As<Float4>(As<Int4>(color.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2950cb93a386Sopenharmony_ci				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2951cb93a386Sopenharmony_ci				color.y = As<Float4>(As<Int4>(color.y) | As<Int4>(masked));
2952cb93a386Sopenharmony_ci			}
2953cb93a386Sopenharmony_ci
2954cb93a386Sopenharmony_ci			color.y = As<Float4>(As<Int4>(color.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskX1X) + xMask * 16, 16));
2955cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX1X) + xMask * 16, 16));
2956cb93a386Sopenharmony_ci			color.y = As<Float4>(As<Int4>(color.y) | As<Int4>(value));
2957cb93a386Sopenharmony_ci			*Pointer<Float4>(buffer + 16, 16) = color.y;
2958cb93a386Sopenharmony_ci		}
2959cb93a386Sopenharmony_ci
2960cb93a386Sopenharmony_ci		buffer += pitchB;
2961cb93a386Sopenharmony_ci
2962cb93a386Sopenharmony_ci		{
2963cb93a386Sopenharmony_ci			value = *Pointer<Float4>(buffer, 16);
2964cb93a386Sopenharmony_ci
2965cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
2966cb93a386Sopenharmony_ci			{
2967cb93a386Sopenharmony_ci				Float4 masked = value;
2968cb93a386Sopenharmony_ci				color.z = As<Float4>(As<Int4>(color.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2969cb93a386Sopenharmony_ci				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2970cb93a386Sopenharmony_ci				color.z = As<Float4>(As<Int4>(color.z) | As<Int4>(masked));
2971cb93a386Sopenharmony_ci			}
2972cb93a386Sopenharmony_ci
2973cb93a386Sopenharmony_ci			color.z = As<Float4>(As<Int4>(color.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskX2X) + xMask * 16, 16));
2974cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX2X) + xMask * 16, 16));
2975cb93a386Sopenharmony_ci			color.z = As<Float4>(As<Int4>(color.z) | As<Int4>(value));
2976cb93a386Sopenharmony_ci			*Pointer<Float4>(buffer, 16) = color.z;
2977cb93a386Sopenharmony_ci		}
2978cb93a386Sopenharmony_ci
2979cb93a386Sopenharmony_ci		{
2980cb93a386Sopenharmony_ci			value = *Pointer<Float4>(buffer + 16, 16);
2981cb93a386Sopenharmony_ci
2982cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0x0000000F)
2983cb93a386Sopenharmony_ci			{
2984cb93a386Sopenharmony_ci				Float4 masked = value;
2985cb93a386Sopenharmony_ci				color.w = As<Float4>(As<Int4>(color.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2986cb93a386Sopenharmony_ci				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2987cb93a386Sopenharmony_ci				color.w = As<Float4>(As<Int4>(color.w) | As<Int4>(masked));
2988cb93a386Sopenharmony_ci			}
2989cb93a386Sopenharmony_ci
2990cb93a386Sopenharmony_ci			color.w = As<Float4>(As<Int4>(color.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskX3X) + xMask * 16, 16));
2991cb93a386Sopenharmony_ci			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX3X) + xMask * 16, 16));
2992cb93a386Sopenharmony_ci			color.w = As<Float4>(As<Int4>(color.w) | As<Int4>(value));
2993cb93a386Sopenharmony_ci			*Pointer<Float4>(buffer + 16, 16) = color.w;
2994cb93a386Sopenharmony_ci		}
2995cb93a386Sopenharmony_ci		break;
2996cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_SFLOAT:
2997cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x0000000F) != 0x0)
2998cb93a386Sopenharmony_ci		{
2999cb93a386Sopenharmony_ci			buffer += 8 * x;
3000cb93a386Sopenharmony_ci
3001cb93a386Sopenharmony_ci			UInt4 rgbaMask;
3002cb93a386Sopenharmony_ci			UInt4 value = *Pointer<UInt4>(buffer);
3003cb93a386Sopenharmony_ci			UInt4 packedCol;
3004cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.x.y))) << 16) | UInt(As<UShort>(Half(color.x.x))), 0);
3005cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.x.w))) << 16) | UInt(As<UShort>(Half(color.x.z))), 1);
3006cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.y.y))) << 16) | UInt(As<UShort>(Half(color.y.x))), 2);
3007cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.y.w))) << 16) | UInt(As<UShort>(Half(color.y.z))), 3);
3008cb93a386Sopenharmony_ci			UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
3009cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0xF) != 0xF)
3010cb93a386Sopenharmony_ci			{
3011cb93a386Sopenharmony_ci				UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
3012cb93a386Sopenharmony_ci				rgbaMask = UInt4(tmpMask, tmpMask);
3013cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
3014cb93a386Sopenharmony_ci			}
3015cb93a386Sopenharmony_ci			*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
3016cb93a386Sopenharmony_ci
3017cb93a386Sopenharmony_ci			buffer += pitchB;
3018cb93a386Sopenharmony_ci
3019cb93a386Sopenharmony_ci			value = *Pointer<UInt4>(buffer);
3020cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.z.y))) << 16) | UInt(As<UShort>(Half(color.z.x))), 0);
3021cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.z.w))) << 16) | UInt(As<UShort>(Half(color.z.z))), 1);
3022cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.w.y))) << 16) | UInt(As<UShort>(Half(color.w.x))), 2);
3023cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(color.w.w))) << 16) | UInt(As<UShort>(Half(color.w.z))), 3);
3024cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
3025cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0xF) != 0xF)
3026cb93a386Sopenharmony_ci			{
3027cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
3028cb93a386Sopenharmony_ci			}
3029cb93a386Sopenharmony_ci			*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
3030cb93a386Sopenharmony_ci		}
3031cb93a386Sopenharmony_ci		break;
3032cb93a386Sopenharmony_ci	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
3033cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x7) != 0x0)
3034cb93a386Sopenharmony_ci		{
3035cb93a386Sopenharmony_ci			buffer += 4 * x;
3036cb93a386Sopenharmony_ci
3037cb93a386Sopenharmony_ci			UInt4 packedCol;
3038cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, r11g11b10Pack(color.x), 0);
3039cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, r11g11b10Pack(color.y), 1);
3040cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, r11g11b10Pack(color.z), 2);
3041cb93a386Sopenharmony_ci			packedCol = Insert(packedCol, r11g11b10Pack(color.w), 3);
3042cb93a386Sopenharmony_ci
3043cb93a386Sopenharmony_ci			UInt4 value;
3044cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<UInt>(buffer + 0), 0);
3045cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<UInt>(buffer + 4), 1);
3046cb93a386Sopenharmony_ci			buffer += pitchB;
3047cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<UInt>(buffer + 0), 2);
3048cb93a386Sopenharmony_ci			value = Insert(value, *Pointer<UInt>(buffer + 4), 3);
3049cb93a386Sopenharmony_ci
3050cb93a386Sopenharmony_ci			UInt4 mask = *Pointer<UInt4>(constants + OFFSET(Constants, maskD4X[0][0]) + xMask * 16, 16);
3051cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0x7) != 0x7)
3052cb93a386Sopenharmony_ci			{
3053cb93a386Sopenharmony_ci				mask &= *Pointer<UInt4>(constants + OFFSET(Constants, mask11X[rgbaWriteMask & 0x7][0]), 16);
3054cb93a386Sopenharmony_ci			}
3055cb93a386Sopenharmony_ci			value = (packedCol & mask) | (value & ~mask);
3056cb93a386Sopenharmony_ci
3057cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer + 0) = value.z;
3058cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer + 4) = value.w;
3059cb93a386Sopenharmony_ci			buffer -= pitchB;
3060cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer + 0) = value.x;
3061cb93a386Sopenharmony_ci			*Pointer<UInt>(buffer + 4) = value.y;
3062cb93a386Sopenharmony_ci		}
3063cb93a386Sopenharmony_ci		break;
3064cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UNORM:
3065cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_SINT:
3066cb93a386Sopenharmony_ci	case VK_FORMAT_R16G16B16A16_UINT:
3067cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x0000000F) != 0x0)
3068cb93a386Sopenharmony_ci		{
3069cb93a386Sopenharmony_ci			buffer += 8 * x;
3070cb93a386Sopenharmony_ci
3071cb93a386Sopenharmony_ci			UInt4 rgbaMask;
3072cb93a386Sopenharmony_ci			UShort8 value = *Pointer<UShort8>(buffer);
3073cb93a386Sopenharmony_ci			UShort8 packedCol = UShort8(UShort4(As<Int4>(color.x)), UShort4(As<Int4>(color.y)));
3074cb93a386Sopenharmony_ci			UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
3075cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0xF) != 0xF)
3076cb93a386Sopenharmony_ci			{
3077cb93a386Sopenharmony_ci				UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
3078cb93a386Sopenharmony_ci				rgbaMask = UInt4(tmpMask, tmpMask);
3079cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
3080cb93a386Sopenharmony_ci			}
3081cb93a386Sopenharmony_ci			*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
3082cb93a386Sopenharmony_ci
3083cb93a386Sopenharmony_ci			buffer += pitchB;
3084cb93a386Sopenharmony_ci
3085cb93a386Sopenharmony_ci			value = *Pointer<UShort8>(buffer);
3086cb93a386Sopenharmony_ci			packedCol = UShort8(UShort4(As<Int4>(color.z)), UShort4(As<Int4>(color.w)));
3087cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
3088cb93a386Sopenharmony_ci			if((rgbaWriteMask & 0xF) != 0xF)
3089cb93a386Sopenharmony_ci			{
3090cb93a386Sopenharmony_ci				mergedMask &= rgbaMask;
3091cb93a386Sopenharmony_ci			}
3092cb93a386Sopenharmony_ci			*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
3093cb93a386Sopenharmony_ci		}
3094cb93a386Sopenharmony_ci		break;
3095cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_SINT:
3096cb93a386Sopenharmony_ci	case VK_FORMAT_R8G8B8A8_UINT:
3097cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
3098cb93a386Sopenharmony_ci	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
3099cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x0000000F) != 0x0)
3100cb93a386Sopenharmony_ci		{
3101cb93a386Sopenharmony_ci			UInt2 value, packedCol, mergedMask;
3102cb93a386Sopenharmony_ci
3103cb93a386Sopenharmony_ci			buffer += 4 * x;
3104cb93a386Sopenharmony_ci
3105cb93a386Sopenharmony_ci			bool isSigned = (format == VK_FORMAT_R8G8B8A8_SINT) || (format == VK_FORMAT_A8B8G8R8_SINT_PACK32);
3106cb93a386Sopenharmony_ci
3107cb93a386Sopenharmony_ci			if(isSigned)
3108cb93a386Sopenharmony_ci			{
3109cb93a386Sopenharmony_ci				packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(color.x)), Short4(As<Int4>(color.y))));
3110cb93a386Sopenharmony_ci			}
3111cb93a386Sopenharmony_ci			else
3112cb93a386Sopenharmony_ci			{
3113cb93a386Sopenharmony_ci				packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(color.x)), Short4(As<Int4>(color.y))));
3114cb93a386Sopenharmony_ci			}
3115cb93a386Sopenharmony_ci			value = *Pointer<UInt2>(buffer, 16);
3116cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
3117cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
3118cb93a386Sopenharmony_ci			{
3119cb93a386Sopenharmony_ci				mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
3120cb93a386Sopenharmony_ci			}
3121cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
3122cb93a386Sopenharmony_ci
3123cb93a386Sopenharmony_ci			buffer += pitchB;
3124cb93a386Sopenharmony_ci
3125cb93a386Sopenharmony_ci			if(isSigned)
3126cb93a386Sopenharmony_ci			{
3127cb93a386Sopenharmony_ci				packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(color.z)), Short4(As<Int4>(color.w))));
3128cb93a386Sopenharmony_ci			}
3129cb93a386Sopenharmony_ci			else
3130cb93a386Sopenharmony_ci			{
3131cb93a386Sopenharmony_ci				packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(color.z)), Short4(As<Int4>(color.w))));
3132cb93a386Sopenharmony_ci			}
3133cb93a386Sopenharmony_ci			value = *Pointer<UInt2>(buffer, 16);
3134cb93a386Sopenharmony_ci			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
3135cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
3136cb93a386Sopenharmony_ci			{
3137cb93a386Sopenharmony_ci				mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
3138cb93a386Sopenharmony_ci			}
3139cb93a386Sopenharmony_ci			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
3140cb93a386Sopenharmony_ci		}
3141cb93a386Sopenharmony_ci		break;
3142cb93a386Sopenharmony_ci	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3143cb93a386Sopenharmony_ci		if((rgbaWriteMask & 0x0000000F) != 0x0)
3144cb93a386Sopenharmony_ci		{
3145cb93a386Sopenharmony_ci			Int2 mergedMask, packedCol, value;
3146cb93a386Sopenharmony_ci			Int4 packed = ((As<Int4>(color.w) & Int4(0x3)) << 30) |
3147cb93a386Sopenharmony_ci			              ((As<Int4>(color.z) & Int4(0x3ff)) << 20) |
3148cb93a386Sopenharmony_ci			              ((As<Int4>(color.y) & Int4(0x3ff)) << 10) |
3149cb93a386Sopenharmony_ci			              ((As<Int4>(color.x) & Int4(0x3ff)));
3150cb93a386Sopenharmony_ci
3151cb93a386Sopenharmony_ci			buffer += 4 * x;
3152cb93a386Sopenharmony_ci			value = *Pointer<Int2>(buffer, 16);
3153cb93a386Sopenharmony_ci			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
3154cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
3155cb93a386Sopenharmony_ci			{
3156cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
3157cb93a386Sopenharmony_ci			}
3158cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
3159cb93a386Sopenharmony_ci
3160cb93a386Sopenharmony_ci			buffer += pitchB;
3161cb93a386Sopenharmony_ci
3162cb93a386Sopenharmony_ci			value = *Pointer<Int2>(buffer, 16);
3163cb93a386Sopenharmony_ci			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
3164cb93a386Sopenharmony_ci			if(rgbaWriteMask != 0xF)
3165cb93a386Sopenharmony_ci			{
3166cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
3167cb93a386Sopenharmony_ci			}
3168cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
3169cb93a386Sopenharmony_ci		}
3170cb93a386Sopenharmony_ci		break;
3171cb93a386Sopenharmony_ci	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3172cb93a386Sopenharmony_ci		if((bgraWriteMask & 0x0000000F) != 0x0)
3173cb93a386Sopenharmony_ci		{
3174cb93a386Sopenharmony_ci			Int2 mergedMask, packedCol, value;
3175cb93a386Sopenharmony_ci			Int4 packed = ((As<Int4>(color.w) & Int4(0x3)) << 30) |
3176cb93a386Sopenharmony_ci			              ((As<Int4>(color.x) & Int4(0x3ff)) << 20) |
3177cb93a386Sopenharmony_ci			              ((As<Int4>(color.y) & Int4(0x3ff)) << 10) |
3178cb93a386Sopenharmony_ci			              ((As<Int4>(color.z) & Int4(0x3ff)));
3179cb93a386Sopenharmony_ci
3180cb93a386Sopenharmony_ci			buffer += 4 * x;
3181cb93a386Sopenharmony_ci			value = *Pointer<Int2>(buffer, 16);
3182cb93a386Sopenharmony_ci			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
3183cb93a386Sopenharmony_ci			if(bgraWriteMask != 0xF)
3184cb93a386Sopenharmony_ci			{
3185cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[bgraWriteMask][0]));
3186cb93a386Sopenharmony_ci			}
3187cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
3188cb93a386Sopenharmony_ci
3189cb93a386Sopenharmony_ci			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
3190cb93a386Sopenharmony_ci
3191cb93a386Sopenharmony_ci			value = *Pointer<Int2>(buffer, 16);
3192cb93a386Sopenharmony_ci			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
3193cb93a386Sopenharmony_ci			if(bgraWriteMask != 0xF)
3194cb93a386Sopenharmony_ci			{
3195cb93a386Sopenharmony_ci				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[bgraWriteMask][0]));
3196cb93a386Sopenharmony_ci			}
3197cb93a386Sopenharmony_ci			*Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
3198cb93a386Sopenharmony_ci		}
3199cb93a386Sopenharmony_ci		break;
3200cb93a386Sopenharmony_ci	default:
3201cb93a386Sopenharmony_ci		UNSUPPORTED("VkFormat: %d", int(format));
3202cb93a386Sopenharmony_ci	}
3203cb93a386Sopenharmony_ci}
3204cb93a386Sopenharmony_ci
3205cb93a386Sopenharmony_ciUShort4 PixelRoutine::convertFixed16(const Float4 &cf, bool saturate)
3206cb93a386Sopenharmony_ci{
3207cb93a386Sopenharmony_ci	return UShort4(cf * Float4(0xFFFF), saturate);
3208cb93a386Sopenharmony_ci}
3209cb93a386Sopenharmony_ci
3210cb93a386Sopenharmony_ciFloat4 PixelRoutine::convertFloat32(const UShort4 &cf)
3211cb93a386Sopenharmony_ci{
3212cb93a386Sopenharmony_ci	return Float4(cf) * Float4(1.0f / 65535.0f);
3213cb93a386Sopenharmony_ci}
3214cb93a386Sopenharmony_ci
3215cb93a386Sopenharmony_civoid PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
3216cb93a386Sopenharmony_ci{
3217cb93a386Sopenharmony_ci	Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
3218cb93a386Sopenharmony_ci
3219cb93a386Sopenharmony_ci	c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
3220cb93a386Sopenharmony_ci	c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
3221cb93a386Sopenharmony_ci	c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
3222cb93a386Sopenharmony_ci
3223cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
3224cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
3225cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
3226cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
3227cb93a386Sopenharmony_ci
3228cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
3229cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
3230cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
3231cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
3232cb93a386Sopenharmony_ci
3233cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
3234cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
3235cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
3236cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
3237cb93a386Sopenharmony_ci}
3238cb93a386Sopenharmony_ci
3239cb93a386Sopenharmony_civoid PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
3240cb93a386Sopenharmony_ci{
3241cb93a386Sopenharmony_ci	c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
3242cb93a386Sopenharmony_ci	c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
3243cb93a386Sopenharmony_ci	c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
3244cb93a386Sopenharmony_ci
3245cb93a386Sopenharmony_ci	linearToSRGB12_16(c);
3246cb93a386Sopenharmony_ci}
3247cb93a386Sopenharmony_ci
3248cb93a386Sopenharmony_civoid PixelRoutine::linearToSRGB12_16(Vector4s &c)
3249cb93a386Sopenharmony_ci{
3250cb93a386Sopenharmony_ci	Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16);
3251cb93a386Sopenharmony_ci
3252cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
3253cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
3254cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
3255cb93a386Sopenharmony_ci	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
3256cb93a386Sopenharmony_ci
3257cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
3258cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
3259cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
3260cb93a386Sopenharmony_ci	c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
3261cb93a386Sopenharmony_ci
3262cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
3263cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
3264cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
3265cb93a386Sopenharmony_ci	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
3266cb93a386Sopenharmony_ci}
3267cb93a386Sopenharmony_ci
3268cb93a386Sopenharmony_ciFloat4 PixelRoutine::sRGBtoLinear(const Float4 &x)  // Approximates x^2.2
3269cb93a386Sopenharmony_ci{
3270cb93a386Sopenharmony_ci	Float4 linear = x * x;
3271cb93a386Sopenharmony_ci	linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
3272cb93a386Sopenharmony_ci
3273cb93a386Sopenharmony_ci	return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
3274cb93a386Sopenharmony_ci}
3275cb93a386Sopenharmony_ci
3276cb93a386Sopenharmony_ci}  // namespace sw
3277