1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "render_backend_gles.h"
16
17 #include <algorithm>
18
19 #include <base/containers/fixed_string.h>
20 #include <core/perf/intf_performance_data_manager.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23
24 #if (RENDER_PERF_ENABLED == 1)
25 #include "perf/gpu_query.h"
26 #include "perf/gpu_query_manager.h"
27 #endif
28 #include "device/gpu_resource_manager.h"
29 #include "gles/device_gles.h"
30 #include "gles/gl_functions.h"
31 #include "gles/gpu_buffer_gles.h"
32 #include "gles/gpu_image_gles.h"
33 #include "gles/gpu_program_gles.h"
34 #include "gles/gpu_query_gles.h"
35 #include "gles/gpu_sampler_gles.h"
36 #include "gles/gpu_semaphore_gles.h"
37 #include "gles/node_context_descriptor_set_manager_gles.h"
38 #include "gles/node_context_pool_manager_gles.h"
39 #include "gles/pipeline_state_object_gles.h"
40 #include "gles/render_frame_sync_gles.h"
41 #include "gles/swapchain_gles.h"
42 #include "nodecontext/render_command_list.h"
43 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
44 #include "util/log.h"
45 #include "util/render_frame_util.h"
46
47 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
48 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
49
50 using namespace BASE_NS;
51
52 // NOTE: implement missing commands, add state caching and cleanup a bit more.
53 RENDER_BEGIN_NAMESPACE()
54 namespace Gles {
55 // Indices to colorBlendConstants
56 static constexpr uint32_t RED_INDEX = 0;
57 static constexpr uint32_t GREEN_INDEX = 1;
58 static constexpr uint32_t BLUE_INDEX = 2;
59 static constexpr uint32_t ALPHA_INDEX = 3;
60 static constexpr uint32_t CUBEMAP_LAYERS = 6;
61 struct Bind {
62 DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
63 struct BufferType {
64 uint32_t bufferId;
65 uint32_t offset;
66 uint32_t size;
67 };
68 struct ImageType {
69 GpuImageGLES* image;
70 uint32_t mode;
71 uint32_t mipLevel;
72 };
73 struct SamplerType {
74 uint32_t samplerId;
75 };
76 struct Resource {
77 union {
78 Bind::BufferType buffer { 0, 0, 0 };
79 Bind::ImageType image;
80 };
81 SamplerType sampler { 0 };
82 };
83 vector<Resource> resources;
84 };
85 } // namespace Gles
86 namespace {
GetRenderHandleType(const DescriptorType descriptorType)87 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
88 {
89 if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
90 return RenderHandleType::GPU_SAMPLER;
91 } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
92 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
93 (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
94 return RenderHandleType::GPU_IMAGE;
95 } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
96 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
97 return RenderHandleType::GPU_BUFFER;
98 }
99 return RenderHandleType::UNDEFINED;
100 }
101
getCubeMapTarget(GLenum type, uint32_t layer)102 GLenum getCubeMapTarget(GLenum type, uint32_t layer)
103 {
104 if (type == GL_TEXTURE_CUBE_MAP) {
105 constexpr GLenum layerId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
106 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
107 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
108 PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
109 return layerId[layer];
110 }
111 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
112 return GL_NONE;
113 }
114
115 GLenum getTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
116 {
117 if (type == GL_TEXTURE_2D) {
118 if (sampleCount > 1) {
119 return GL_TEXTURE_2D_MULTISAMPLE;
120 }
121 return GL_TEXTURE_2D;
122 }
123 if (type == GL_TEXTURE_CUBE_MAP) {
124 PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
125 return getCubeMapTarget(type, layer);
126 }
127 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
128 return GL_NONE;
129 }
130 struct BlitArgs {
131 uint32_t mipLevel {};
132 Size3D rect0 {};
133 Size3D rect1 {};
134 uint32_t height {};
135 };
136
DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)137 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
138 {
139 // Handle top-left / bottom-left origin conversion
140 GLint sy = static_cast<GLint>(src.rect0.height);
141 const GLint sh = static_cast<const GLint>(src.rect1.height);
142 const GLint sfh = static_cast<GLint>(src.height >> src.mipLevel);
143 sy = sfh - (sy + sh);
144 GLint dy = static_cast<GLint>(dst.rect0.height);
145 const GLint dh = static_cast<const GLint>(dst.rect1.height);
146 const GLint dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
147 dy = dfh - (dy + dh);
148 GLenum glfilter = GL_NEAREST;
149 if (filter == CORE_FILTER_NEAREST) {
150 glfilter = GL_NEAREST;
151 } else if (filter == CORE_FILTER_LINEAR) {
152 glfilter = GL_LINEAR;
153 } else {
154 PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
155 }
156 glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
157 static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
158 glfilter);
159 }
160
GetPrimFromTopology(PrimitiveTopology op)161 GLenum GetPrimFromTopology(PrimitiveTopology op)
162 {
163 switch (op) {
164 case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
165 return GL_POINTS;
166 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
167 return GL_LINES;
168 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
169 return GL_LINE_STRIP;
170 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
171 return GL_TRIANGLES;
172 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
173 return GL_TRIANGLE_STRIP;
174 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
175 return GL_TRIANGLE_FAN;
176 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
177 // The following are valid after gles 3.2
178 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
179 return GL_LINES_ADJACENCY;
180 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
181 return GL_LINE_STRIP_ADJACENCY;
182 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
183 return GL_TRIANGLES_ADJACENCY;
184 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
185 return GL_TRIANGLE_STRIP_ADJACENCY;
186 case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
187 return GL_PATCHES;
188 #endif
189 default:
190 PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
191 break;
192 }
193 return GL_POINTS;
194 }
195
GetBlendOp(BlendOp func)196 GLenum GetBlendOp(BlendOp func)
197 {
198 switch (func) {
199 case CORE_BLEND_OP_ADD:
200 return GL_FUNC_ADD;
201 case CORE_BLEND_OP_SUBTRACT:
202 return GL_FUNC_SUBTRACT;
203 case CORE_BLEND_OP_REVERSE_SUBTRACT:
204 return GL_FUNC_REVERSE_SUBTRACT;
205 case CORE_BLEND_OP_MIN:
206 return GL_MIN;
207 case CORE_BLEND_OP_MAX:
208 return GL_MAX;
209 default:
210 break;
211 }
212 return GL_FUNC_ADD;
213 }
214
GetBlendFactor(BlendFactor factor)215 GLenum GetBlendFactor(BlendFactor factor)
216 {
217 switch (factor) {
218 case CORE_BLEND_FACTOR_ZERO:
219 return GL_ZERO;
220 case CORE_BLEND_FACTOR_ONE:
221 return GL_ONE;
222 case CORE_BLEND_FACTOR_SRC_COLOR:
223 return GL_SRC_COLOR;
224 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
225 return GL_ONE_MINUS_SRC_COLOR;
226 case CORE_BLEND_FACTOR_DST_COLOR:
227 return GL_DST_COLOR;
228 case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
229 return GL_ONE_MINUS_DST_COLOR;
230 case CORE_BLEND_FACTOR_SRC_ALPHA:
231 return GL_SRC_ALPHA;
232 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
233 return GL_ONE_MINUS_SRC_ALPHA;
234 case CORE_BLEND_FACTOR_DST_ALPHA:
235 return GL_DST_ALPHA;
236 case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
237 return GL_ONE_MINUS_DST_ALPHA;
238 case CORE_BLEND_FACTOR_CONSTANT_COLOR:
239 return GL_CONSTANT_COLOR;
240 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
241 return GL_ONE_MINUS_CONSTANT_COLOR;
242 case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
243 return GL_CONSTANT_ALPHA;
244 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
245 return GL_ONE_MINUS_CONSTANT_ALPHA;
246 case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
247 return GL_SRC_ALPHA_SATURATE;
248 // NOTE: check the GLES3.2...
249 /* following requires EXT_blend_func_extended (dual source blending) */
250 case CORE_BLEND_FACTOR_SRC1_COLOR:
251 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
252 case CORE_BLEND_FACTOR_SRC1_ALPHA:
253 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
254 default:
255 break;
256 }
257 return GL_ONE;
258 }
259
GetCompareOp(CompareOp aOp)260 GLenum GetCompareOp(CompareOp aOp)
261 {
262 switch (aOp) {
263 case CORE_COMPARE_OP_NEVER:
264 return GL_NEVER;
265 case CORE_COMPARE_OP_LESS:
266 return GL_LESS;
267 case CORE_COMPARE_OP_EQUAL:
268 return GL_EQUAL;
269 case CORE_COMPARE_OP_LESS_OR_EQUAL:
270 return GL_LEQUAL;
271 case CORE_COMPARE_OP_GREATER:
272 return GL_GREATER;
273 case CORE_COMPARE_OP_NOT_EQUAL:
274 return GL_NOTEQUAL;
275 case CORE_COMPARE_OP_GREATER_OR_EQUAL:
276 return GL_GEQUAL;
277 case CORE_COMPARE_OP_ALWAYS:
278 return GL_ALWAYS;
279 default:
280 break;
281 }
282 return GL_ALWAYS;
283 }
284
GetStencilOp(StencilOp aOp)285 GLenum GetStencilOp(StencilOp aOp)
286 {
287 switch (aOp) {
288 case CORE_STENCIL_OP_KEEP:
289 return GL_KEEP;
290 case CORE_STENCIL_OP_ZERO:
291 return GL_ZERO;
292 case CORE_STENCIL_OP_REPLACE:
293 return GL_REPLACE;
294 case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
295 return GL_INCR;
296 case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
297 return GL_DECR;
298 case CORE_STENCIL_OP_INVERT:
299 return GL_INVERT;
300 case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
301 return GL_INCR_WRAP;
302 case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
303 return GL_DECR_WRAP;
304 default:
305 break;
306 }
307 return GL_KEEP;
308 }
309
SetState(GLenum type, bool enabled)310 void SetState(GLenum type, bool enabled)
311 {
312 if (enabled) {
313 glEnable(type);
314 } else {
315 glDisable(type);
316 }
317 }
318
SetCullMode(const GraphicsState::RasterizationState& rs)319 void SetCullMode(const GraphicsState::RasterizationState& rs)
320 {
321 SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
322
323 switch (rs.cullModeFlags) {
324 case CORE_CULL_MODE_FRONT_BIT:
325 glCullFace(GL_FRONT);
326 break;
327 case CORE_CULL_MODE_BACK_BIT:
328 glCullFace(GL_BACK);
329 break;
330 case CORE_CULL_MODE_FRONT_AND_BACK:
331 glCullFace(GL_FRONT_AND_BACK);
332 break;
333 case CORE_CULL_MODE_NONE:
334 default:
335 break;
336 }
337 }
338
SetFrontFace(const GraphicsState::RasterizationState& rs)339 void SetFrontFace(const GraphicsState::RasterizationState& rs)
340 {
341 switch (rs.frontFace) {
342 case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
343 glFrontFace(GL_CCW);
344 break;
345 case CORE_FRONT_FACE_CLOCKWISE:
346 glFrontFace(GL_CW);
347 break;
348 default:
349 break;
350 }
351 }
352
353 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState& rs)354 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
355 {
356 GLenum mode;
357 switch (rs.polygonMode) {
358 default:
359 case CORE_POLYGON_MODE_FILL:
360 mode = GL_FILL;
361 break;
362 case CORE_POLYGON_MODE_LINE:
363 mode = GL_LINE;
364 break;
365 case CORE_POLYGON_MODE_POINT:
366 mode = GL_POINT;
367 break;
368 }
369 glPolygonMode(GL_FRONT_AND_BACK, mode);
370 }
371 #endif
372
Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd, const LowlevelFramebufferGL& frameBuffer)373 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
374 const LowlevelFramebufferGL& frameBuffer)
375 {
376 if (count > 0) {
377 if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
378 // Invalidate the whole buffer. (attachment sizes match render area)
379 glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
380 } else {
381 // invalidate only a part of the render target..
382 // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
383 const GLint X = static_cast<const GLint>(rpd.renderArea.offsetX);
384 const GLint Y = static_cast<const GLint>(rpd.renderArea.offsetY);
385 const GLsizei W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
386 const GLsizei H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
387 glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
388 }
389 }
390 }
391
392 struct BlitData {
393 const GpuImagePlatformDataGL& iPlat;
394 const GpuImageDesc& imageDesc;
395 const BufferImageCopy& bufferImageCopy;
396 uintptr_t data { 0 };
397 uint64_t size { 0 };
398 uint64_t sizeOfData { 0 };
399 bool compressed { false };
400 };
401
BlitArray(DeviceGLES& device_, const BlitData& bd)402 void BlitArray(DeviceGLES& device_, const BlitData& bd)
403 {
404 const auto& iPlat = bd.iPlat;
405 const auto& bufferImageCopy = bd.bufferImageCopy;
406 const auto& imageSubresource = bufferImageCopy.imageSubresource;
407 const auto& imageDesc = bd.imageDesc;
408 const uint32_t mip = imageSubresource.mipLevel;
409 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
410 // NOTE: image offset depth is ignored
411 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
412 const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
413 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
414 Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
415 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
416 if (valid) {
417 uintptr_t data = bd.data;
418 const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
419 for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
420 const Math::UVec3 offset3D { offset.x, offset.y, layer };
421 if (bd.compressed) {
422 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
423 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
424 } else {
425 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
426 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
427 }
428 data += static_cast<ptrdiff_t>(bd.sizeOfData);
429 }
430 }
431 }
432
Blit2D(DeviceGLES& device_, const BlitData& bd)433 void Blit2D(DeviceGLES& device_, const BlitData& bd)
434 {
435 const auto& iPlat = bd.iPlat;
436 const auto& bufferImageCopy = bd.bufferImageCopy;
437 const auto& imageSubresource = bufferImageCopy.imageSubresource;
438 const auto& imageDesc = bd.imageDesc;
439 const uint32_t mip = imageSubresource.mipLevel;
440 const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
441 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
442 const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
443 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
444 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
445 "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
446 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
447 const uintptr_t data = bd.data;
448 if (valid && bd.compressed) {
449 device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
450 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
451 } else if (valid) {
452 device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
453 iPlat.dataType, reinterpret_cast<const void*>(data));
454 }
455 }
456
Blit3D(DeviceGLES& device_, const BlitData& bd)457 void Blit3D(DeviceGLES& device_, const BlitData& bd)
458 {
459 const auto& iPlat = bd.iPlat;
460 const auto& bufferImageCopy = bd.bufferImageCopy;
461 const auto& imageSubresource = bufferImageCopy.imageSubresource;
462 const auto& imageDesc = bd.imageDesc;
463 const uint32_t mip = imageSubresource.mipLevel;
464 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
465 const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
466 bufferImageCopy.imageOffset.depth };
467 Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
468 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
469 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
470 if (valid) {
471 uintptr_t data = bd.data;
472 for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
473 const Math::UVec3 offset3D { offset.x, offset.y, slice };
474 if (bd.compressed) {
475 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
476 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
477 } else {
478 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
479 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
480 }
481 // offsets one slice
482 data += static_cast<ptrdiff_t>(bd.sizeOfData);
483 }
484 }
485 }
486
BlitCube(DeviceGLES& device_, const BlitData& bd)487 void BlitCube(DeviceGLES& device_, const BlitData& bd)
488 {
489 const auto& iPlat = bd.iPlat;
490 const auto& bufferImageCopy = bd.bufferImageCopy;
491 const auto& imageSubresource = bufferImageCopy.imageSubresource;
492 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
493 const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
494 constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
495 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
496 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
497 PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
498 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
499 "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
500 uintptr_t data = bd.data;
501 const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
502 for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
503 const GLenum face = faceId[i]; // convert layer index to cube map face id.
504 if (face == 0) {
505 // reached the end of cubemap faces (see faceId)
506 // so must stop copying.
507 break;
508 }
509 if (bd.compressed) {
510 device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
511 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
512 } else {
513 device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
514 iPlat.dataType, reinterpret_cast<const void*>(data));
515 }
516 data += static_cast<ptrdiff_t>(bd.sizeOfData);
517 }
518 }
519 template<bool usePixelUnpackBuffer>
520
SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer, const GpuImageGLES& dstGpuImage)521 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
522 const GpuImageGLES& dstGpuImage)
523 {
524 const auto& iPlat = dstGpuImage.GetPlatformData();
525 const auto& imageOffset = bufferImageCopy.imageOffset;
526 PLUGIN_UNUSED(imageOffset);
527 const auto& imageExtent = bufferImageCopy.imageExtent;
528 // size is calculated for single layer / slice
529 const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
530 static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
531 static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
532 uintptr_t data = bufferImageCopy.bufferOffset;
533 if constexpr (usePixelUnpackBuffer) {
534 const auto& plat = srcGpuBuffer.GetPlatformData();
535 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
536 } else {
537 // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
538 // simulator and crash with ETC2 textures on NVIDIA..
539 data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
540 }
541 uint64_t sizeOfData = size;
542 const auto& compinfo = iPlat.compression;
543 if (compinfo.compressed) {
544 // how many blocks in width
545 const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
546 // how many blocks in height
547 const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
548 // size in bytes..
549 sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
550
551 // Warn for partial copies. we do not handle those at the moment.
552 if (bufferImageCopy.bufferRowLength != 0) {
553 if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
554 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
555 "Stride must match image width (with block align). "
556 "bufferImageCopy.bufferRowLength(%d) "
557 "imageExtent.width(%d) ",
558 bufferImageCopy.bufferRowLength, imageExtent.width);
559 }
560 }
561 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
562 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
563 } else {
564 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
565 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
566 }
567 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
568 return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
569 }
570
571 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)572 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
573 {
574 if constexpr (usePixelUnpackBuffer) {
575 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
576 } else {
577 srcGpuBuffer.Unmap();
578 }
579 }
580
581 template<typename T, size_t N>
582 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
583 {
584 for (size_t i = 0; i < N; i++) {
585 if (a[i] != b[i])
586 return false;
587 }
588 return true;
589 }
590
591 template<typename T, size_t N>
592
593 constexpr size_t Set(T (&a)[N], const T (&b)[N])
594 {
595 for (size_t i = 0; i < N; i++) {
596 a[i] = b[i];
597 }
598 return true;
599 }
600
CompareBlendFactors( const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)601 bool CompareBlendFactors(
602 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
603 {
604 return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
605 (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
606 }
607
SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)608 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
609 {
610 a.srcColorBlendFactor = b.srcColorBlendFactor;
611 a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
612 a.dstColorBlendFactor = b.dstColorBlendFactor;
613 a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
614 }
615
CompareBlendOps( const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)616 bool CompareBlendOps(
617 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
618 {
619 return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
620 }
621
SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)622 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
623 {
624 a.colorBlendOp = b.colorBlendOp;
625 a.alphaBlendOp = b.alphaBlendOp;
626 }
627
CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)628 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
629 {
630 return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
631 }
632
SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)633 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
634 {
635 a.failOp = b.failOp;
636 a.depthFailOp = b.depthFailOp;
637 a.passOp = b.passOp;
638 }
639
SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)640 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642 a.compareOp = b.compareOp;
643 a.compareMask = b.compareMask;
644 a.reference = b.reference;
645 }
646
647 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)648 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
649 {
650 if (mipLevel >= imageDesc.mipCount) {
651 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
652 }
653 if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
654 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
655 }
656 if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
657 ((offset.z + extent.depth) > imageDesc.depth)) {
658 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
659 }
660 }
661
ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)662 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
663 {
664 ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
665 ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
666 }
667 #endif
668
ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)669 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
670 {
671 if (srcOffset < 0) {
672 size += srcOffset;
673 dstOffset -= srcOffset;
674 srcOffset = 0;
675 }
676 }
677
ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)678 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
679 {
680 ClampOffset(srcOffset.x, dstOffset.x, size.width);
681 ClampOffset(srcOffset.y, dstOffset.y, size.height);
682 ClampOffset(srcOffset.z, dstOffset.z, size.depth);
683 }
684
ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)685 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
686 {
687 if (size > (maxSize - offset)) {
688 size = maxSize - offset;
689 }
690 }
691
ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)692 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
693 {
694 ClampSize(offset.x, desc.width, size.width);
695 ClampSize(offset.y, desc.height, size.height);
696 ClampSize(offset.z, desc.depth, size.depth);
697 }
698 } // namespace
699
RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)700 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
701 : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
702 {
703 #if (RENDER_PERF_ENABLED == 1)
704 validGpuQueries_ = false;
705 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
706 gpuQueryMgr_ = make_unique<GpuQueryManager>();
707 #if RENDER_HAS_GL_BACKEND
708 if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
709 validGpuQueries_ = true;
710 }
711 #endif
712 #if RENDER_HAS_GLES_BACKEND
713 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
714 // Check if GL_EXT_disjoint_timer_query is available.
715 validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
716 }
717 #endif
718 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
719 #endif // RENDER_PERF_ENABLED
720 #if RENDER_HAS_GLES_BACKEND
721 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
722 multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
723 }
724 #endif
725 PLUGIN_ASSERT(device_.IsActive());
726 PrimeCache(GraphicsState {}); // Initializes cache.
727 glGenFramebuffers(1, &blitImageSourceFbo_);
728 glGenFramebuffers(1, &blitImageDestinationFbo_);
729 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
730 PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
731 PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
732 #endif
733 #if !RENDER_HAS_GLES_BACKEND
734 glEnable(GL_PROGRAM_POINT_SIZE);
735 #endif
736 }
737
~RenderBackendGLES()738 RenderBackendGLES::~RenderBackendGLES()
739 {
740 PLUGIN_ASSERT(device_.IsActive());
741 device_.DeleteFrameBuffer(blitImageSourceFbo_);
742 device_.DeleteFrameBuffer(blitImageDestinationFbo_);
743 }
744
Present(const RenderBackendBackBufferConfiguration& backBufferConfig)745 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
746 {
747 if (!backBufferConfig.swapchainData.empty()) {
748 if (device_.HasSwapchain()) {
749 #if (RENDER_PERF_ENABLED == 1)
750 commonCpuTimers_.present.Begin();
751 #endif
752 for (const auto& swapchainData : backBufferConfig.swapchainData) {
753 #if (RENDER_DEV_ENABLED == 1)
754 if (swapchainData.config.gpuSemaphoreHandle) {
755 // NOTE: not implemented
756 PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
757 }
758 #endif
759 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
760 if (swp) {
761 #if RENDER_GL_FLIP_Y_SWAPCHAIN
762 // Blit and flip our swapchain frame to backbuffer..
763 const auto& sdesc = swp->GetDesc();
764 if (scissorEnabled_) {
765 glDisable(GL_SCISSOR_TEST);
766 scissorEnabled_ = false;
767 }
768 const auto& platSwapchain = swp->GetPlatformData();
769 device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
770 device_.BindWriteFrameBuffer(0); // FBO 0 is the surface bound to current context..
771 glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
772 (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
773 device_.BindReadFrameBuffer(0);
774 #endif
775 device_.SwapBuffers(*swp);
776 }
777 }
778 #if (RENDER_PERF_ENABLED == 1)
779 commonCpuTimers_.present.End();
780 #endif
781 }
782 }
783 }
784
ResetState()785 void RenderBackendGLES::ResetState()
786 {
787 boundProgram_ = {};
788 boundIndexBuffer_ = {};
789 vertexAttribBinds_ = 0;
790 renderingToDefaultFbo_ = false;
791 boundComputePipeline_ = nullptr;
792 boundGraphicsPipeline_ = nullptr;
793 currentPsoHandle_ = {};
794 renderArea_ = {};
795 activeRenderPass_ = {};
796 currentSubPass_ = 0;
797 currentFrameBuffer_ = nullptr;
798 scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
799 inRenderpass_ = 0;
800 }
801
ResetBindings()802 void RenderBackendGLES::ResetBindings()
803 {
804 for (auto& b : boundObjects_) {
805 b.dirty = true;
806 }
807 boundComputePipeline_ = nullptr;
808 boundGraphicsPipeline_ = nullptr;
809 currentPsoHandle_ = {};
810 }
811
Render( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)812 void RenderBackendGLES::Render(
813 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
814 {
815 // NOTE: all command lists are validated before entering here
816 PLUGIN_ASSERT(device_.IsActive());
817 #if (RENDER_PERF_ENABLED == 1)
818 commonCpuTimers_.full.Begin();
819 commonCpuTimers_.acquire.Begin();
820 #endif
821 presentationInfo_ = {};
822
823 if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
824 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
825 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
826 if (const SwapchainGLES* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
827 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
828 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
829 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
830 // remap image to backbuffer
831 const RenderHandle currentSwapchainHandle =
832 swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
833 // special swapchain remapping
834 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
835 }
836 }
837 }
838 }
839 #if (RENDER_PERF_ENABLED == 1)
840 commonCpuTimers_.acquire.End();
841
842 StartFrameTimers(renderCommandFrameData);
843 commonCpuTimers_.execute.Begin();
844 #endif
845 // Reset bindings.
846 ResetState();
847 for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
848 // Reset bindings between command lists..
849 ResetBindings();
850 RenderSingleCommandList(ref);
851 }
852 #if (RENDER_PERF_ENABLED == 1)
853 commonCpuTimers_.execute.End();
854 #endif
855 RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
856 #if (RENDER_PERF_ENABLED == 1)
857 commonCpuTimers_.full.End();
858 EndFrameTimers();
859 #endif
860 }
861
RenderProcessEndCommandLists( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)862 void RenderBackendGLES::RenderProcessEndCommandLists(
863 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
864 {
865 if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
866 frameSync->GetFrameFence();
867 }
868 // signal external GPU fences
869 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
870 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
871 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
872 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
873 if (externalSignals.size() == externalSemaphores.size()) {
874 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
875 // needs to be false
876 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
877 if (const GpuSemaphoreGles* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
878 auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
879 // NOTE: currently could create only one GPU sync
880 GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
881 plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
882 externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
883 externalSignals[sigIdx].signaled = true;
884
885 // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
886 }
887 }
888 }
889 }
890 }
891 }
892
RenderCommandUndefined(const RenderCommandWithType& renderCommand)893 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
894 {
895 PLUGIN_ASSERT_MSG(false, "non-valid render command");
896 }
897
RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)898 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
899 {
900 // these are validated in render graph
901 managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
902 renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
903
904 managers_.poolMgr->BeginBackendFrame();
905 managers_.psoMgr->BeginBackendFrame();
906 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
907 const auto& debugName = renderCommandCtx.debugName;
908 #endif
909 #if (RENDER_PERF_ENABLED == 1)
910 perfCounters_ = {};
911 PLUGIN_ASSERT(timers_.count(debugName) == 1);
912 PerfDataSet& perfDataSet = timers_[debugName];
913 perfDataSet.cpuTimer.Begin();
914 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
915 if (validGpuQueries_) {
916 #ifdef GL_GPU_DISJOINT_EXT
917 /* Clear disjoint error */
918 GLint disjointOccurred = 0;
919 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
920 #endif
921 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
922 PLUGIN_ASSERT(gpuQuery);
923
924 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
925 PLUGIN_ASSERT(platData.queryObject);
926 glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
927 }
928 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
929 #endif // RENDER_PERF_ENABLED
930 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
931 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
932 #endif
933 commandListValid_ = true;
934 for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
935 PLUGIN_ASSERT(ref.rc);
936 if (commandListValid_) {
937 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
938 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
939 #endif
940 (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
941 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
942 glPopDebugGroup();
943 #endif
944 }
945 }
946 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
947 glPopDebugGroup();
948 #endif
949 #if (RENDER_PERF_ENABLED == 1)
950 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
951 if (validGpuQueries_) {
952 glEndQuery(GL_TIME_ELAPSED_EXT);
953 }
954 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
955 perfDataSet.cpuTimer.End();
956 CopyPerfTimeStamp(debugName, perfDataSet);
957 #endif // RENDER_PERF_ENABLED
958 }
959
RenderCommandBindPipeline(const RenderCommandWithType& ref)960 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
961 {
962 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
963 const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
964 boundProgram_ = {};
965 if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
966 PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
967 BindComputePipeline(renderCmd);
968 } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
969 BindGraphicsPipeline(renderCmd);
970 }
971 currentPsoHandle_ = renderCmd.psoHandle;
972 }
973
BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)974 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
975 {
976 const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
977 managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
978 if (pso) {
979 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
980 // Setup descriptorset bind cache..
981 SetupCache(data.pipelineLayout);
982 }
983 boundComputePipeline_ = pso;
984 boundGraphicsPipeline_ = nullptr;
985 }
986
SetupCache(const PipelineLayout& pipelineLayout)987 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
988 {
989 // based on pipeline layout. (note that compatible sets should "save state")
990 for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
991 // mark unmatching sets dirty (all for now)
992 // resize the cache stuffs.
993 const auto& s = pipelineLayout.descriptorSetLayouts[set];
994 if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
995 boundObjects_[set].dirty = true;
996 #if RENDER_HAS_GLES_BACKEND
997 boundObjects_[set].oesBinds.clear();
998 #endif
999 boundObjects_[set].resources.clear();
1000 continue;
1001 }
1002 PLUGIN_ASSERT(s.set == set);
1003
1004 uint32_t maxB = 0;
1005 // NOTE: compatibility optimizations?
1006 // NOTE: we expect bindings to be sorted.
1007 if (s.bindings.back().binding == s.bindings.size() - 1U) {
1008 // since the last binding matches the size, expect it to be continuous.
1009 maxB = static_cast<uint32_t>(s.bindings.size());
1010 } else {
1011 // Sparse binding.
1012 // NOTE: note sparse sets will waste memory here. (see notes in
1013 // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1014 for (const auto& bind : s.bindings) {
1015 maxB = Math::max(maxB, bind.binding);
1016 }
1017 maxB += 1U; // zero based bindings..
1018 }
1019 if (boundObjects_[set].resources.size() != maxB) {
1020 // resource count change.. (so it's dirty then)
1021 boundObjects_[set].dirty = true;
1022 #if RENDER_HAS_GLES_BACKEND
1023 boundObjects_[set].oesBinds.clear();
1024 #endif
1025 boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1026 boundObjects_[set].resources.resize(maxB);
1027 }
1028
1029 for (const auto& b : s.bindings) {
1030 auto& o = boundObjects_[set].resources[b.binding];
1031 // ignore b.shaderStageFlags for now.
1032 if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1033 // mark set dirty, since "not matching"
1034 o.resources.clear();
1035 o.resources.resize(b.descriptorCount);
1036 o.descriptorType = b.descriptorType;
1037 boundObjects_[set].dirty = true;
1038 #if RENDER_HAS_GLES_BACKEND
1039 boundObjects_[set].oesBinds.clear();
1040 #endif
1041 }
1042 }
1043 }
1044 }
1045
BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)1046 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1047 {
1048 const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1049 managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1050 activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1051 if (pso) {
1052 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1053 dynamicStateFlags_ = data.dynamicStateFlags;
1054 DoGraphicsState(data.graphicsState);
1055 // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1056 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1057 SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1058 static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1059 }
1060 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1061 SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1062 }
1063 // Setup descriptorset bind cache..
1064 SetupCache(data.pipelineLayout);
1065 }
1066 boundComputePipeline_ = nullptr;
1067 boundGraphicsPipeline_ = pso;
1068 }
1069
RenderCommandDraw(const RenderCommandWithType& ref)1070 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1071 {
1072 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1073 const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1074 if (!boundGraphicsPipeline_) {
1075 return;
1076 }
1077 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1078 BindResources();
1079 const auto type = GetPrimFromTopology(topology_);
1080 const GLsizei firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1081 const GLsizei instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1082 // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1083 // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1084 if (renderCmd.indexCount) {
1085 uintptr_t offsetp = boundIndexBuffer_.offset;
1086 GLenum indexType = GL_UNSIGNED_SHORT;
1087 switch (boundIndexBuffer_.type) {
1088 case CORE_INDEX_TYPE_UINT16:
1089 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1090 indexType = GL_UNSIGNED_SHORT;
1091 break;
1092 case CORE_INDEX_TYPE_UINT32:
1093 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1094 indexType = GL_UNSIGNED_INT;
1095 break;
1096 default:
1097 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1098 break;
1099 }
1100 const GLsizei indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1101 const void* offset = reinterpret_cast<const void*>(offsetp);
1102 if (renderCmd.instanceCount > 1) {
1103 if (renderCmd.firstVertex) {
1104 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, firstVertex);
1105 } else {
1106 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1107 }
1108 } else {
1109 if (renderCmd.vertexOffset) {
1110 glDrawElementsBaseVertex(
1111 type, indexCount, indexType, offset, static_cast<GLint>(renderCmd.vertexOffset));
1112 } else {
1113 glDrawElements(type, indexCount, indexType, offset);
1114 }
1115 }
1116 #if (RENDER_PERF_ENABLED == 1)
1117 ++perfCounters_.drawCount;
1118 perfCounters_.instanceCount += renderCmd.instanceCount;
1119 perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1120 #endif
1121 } else {
1122 const GLsizei vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1123 if (renderCmd.instanceCount > 1) {
1124 glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1125 } else {
1126 glDrawArrays(type, firstVertex, vertexCount);
1127 }
1128 #if (RENDER_PERF_ENABLED == 1)
1129 ++perfCounters_.drawCount;
1130 perfCounters_.instanceCount += renderCmd.instanceCount;
1131 perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1132 #endif
1133 }
1134 }
1135
RenderCommandDrawIndirect(const RenderCommandWithType& ref)1136 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1137 {
1138 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1139 const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1140 if (!boundGraphicsPipeline_) {
1141 return;
1142 }
1143 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1144 BindResources();
1145 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1146 const auto& plat = gpuBuffer->GetPlatformData();
1147 device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1148 const auto type = GetPrimFromTopology(topology_);
1149 auto offset = static_cast<GLintptr>(renderCmd.offset);
1150 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1151 GLenum indexType = GL_UNSIGNED_SHORT;
1152 switch (boundIndexBuffer_.type) {
1153 case CORE_INDEX_TYPE_UINT16:
1154 indexType = GL_UNSIGNED_SHORT;
1155 break;
1156 case CORE_INDEX_TYPE_UINT32:
1157 indexType = GL_UNSIGNED_INT;
1158 break;
1159 default:
1160 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1161 break;
1162 }
1163 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1164 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1165 offset += renderCmd.stride;
1166 }
1167 } else {
1168 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1169 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1170 offset += renderCmd.stride;
1171 }
1172 }
1173 #if (RENDER_PERF_ENABLED == 1)
1174 perfCounters_.drawIndirectCount += renderCmd.drawCount;
1175 #endif
1176 }
1177 }
1178
RenderCommandDispatch(const RenderCommandWithType& ref)1179 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1180 {
1181 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1182 const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1183 if (!boundComputePipeline_) {
1184 return;
1185 }
1186 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1187 BindResources();
1188 glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1189 #if (RENDER_PERF_ENABLED == 1)
1190 ++perfCounters_.dispatchCount;
1191 #endif
1192 }
1193
RenderCommandDispatchIndirect(const RenderCommandWithType& ref)1194 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1195 {
1196 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1197 const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1198 if (!boundComputePipeline_) {
1199 return;
1200 }
1201 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1202 BindResources();
1203 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1204 const auto& plat = gpuBuffer->GetPlatformData();
1205 device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1206 glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1207 #if (RENDER_PERF_ENABLED == 1)
1208 ++perfCounters_.dispatchIndirectCount;
1209 #endif
1210 }
1211 }
1212
ClearScissorInit(const RenderPassDesc::RenderArea& aArea)1213 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1214 {
1215 resetScissor_ = false; // need to reset scissor state after clear?
1216 clearScissorSet_ = true; // need to setup clear scissors before clear?
1217 clearScissor_ = aArea; // area to be cleared
1218 if (scissorPrimed_) { // have scissors been set yet?
1219 if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1220 (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1221 (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1222 (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1223 // Current scissors match clearscissor area, so no need to set it again.
1224 clearScissorSet_ = false;
1225 }
1226 }
1227 }
1228
ClearScissorSet()1229 void RenderBackendGLES::ClearScissorSet()
1230 {
1231 if (clearScissorSet_) { // do we need to set clear scissors.
1232 clearScissorSet_ = false; // clear scissors have been set now.
1233 resetScissor_ = true; // we are modifying scissors, so remember to reset them afterwards.
1234 glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1235 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1236 }
1237 }
1238
ClearScissorReset()1239 void RenderBackendGLES::ClearScissorReset()
1240 {
1241 if (resetScissor_) { // need to reset correct scissors?
1242 if (!scissorPrimed_) {
1243 // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1244 // setting)
1245 scissorPrimed_ = true;
1246 scissorBox_.offsetX = clearScissor_.offsetX;
1247 scissorBox_.offsetY = clearScissor_.offsetY;
1248 scissorBox_.extentHeight = clearScissor_.extentHeight;
1249 scissorBox_.extentWidth = clearScissor_.extentWidth;
1250 } else {
1251 // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1252 scissorBoxUpdated_ = true; // ie. request to update scissor state.
1253 }
1254 }
1255 }
1256
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)1257 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1258 {
1259 constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1260 CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1261 const auto& cBlend = cacheState_.colorBlendState;
1262 for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1263 if (colorAttachments[idx] == nullptr) {
1264 continue;
1265 }
1266 const auto& ref = *(colorAttachments[idx]);
1267 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1268 const auto& cBlendState = cBlend.colorAttachments[idx];
1269 if (clearAll != cBlendState.colorWriteMask) {
1270 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1271 }
1272 ClearScissorSet();
1273 // glClearBufferfv only for float formats?
1274 // glClearBufferiv & glClearbufferuv only for integer formats?
1275 glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1276 if (clearAll != cBlendState.colorWriteMask) {
1277 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1278 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1279 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1280 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1281 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1282 }
1283 }
1284 }
1285 }
1286
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)1287 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1288 {
1289 const GLuint allBits = 0xFFFFFFFFu;
1290 const auto& ref = depthAttachment;
1291 const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1292 const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1293 // Change state if needed.
1294 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1295 glDepthMask(GL_TRUE);
1296 }
1297 if (clearStencil) {
1298 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1299 glStencilMaskSeparate(GL_FRONT, allBits);
1300 }
1301 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1302 glStencilMaskSeparate(GL_BACK, allBits);
1303 }
1304 }
1305 if (clearDepth || clearStencil) {
1306 // Set the scissors for clear..
1307 ClearScissorSet();
1308 }
1309 // Do clears.
1310 if (clearDepth && clearStencil) {
1311 glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1312 static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1313 } else if (clearDepth) {
1314 glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1315 } else if (clearStencil) {
1316 glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1317 }
1318
1319 // Restore cached state, if we touched the state.
1320 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1321 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1322 glDepthMask(GL_FALSE);
1323 }
1324 if (clearStencil) {
1325 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1326 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1327 glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1328 }
1329 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1330 glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1331 }
1332 }
1333 }
1334
DoSubPass(uint32_t subPass)1335 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1336 {
1337 if (currentFrameBuffer_ == nullptr) {
1338 // Completely invalid state in backend.
1339 return;
1340 }
1341 const auto& rpd = activeRenderPass_.renderPassDesc;
1342 const auto& sb = activeRenderPass_.subpasses[subPass];
1343
1344 // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1345 if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1346 auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1347 device_.Activate(color);
1348 }
1349 device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1350 ClearScissorInit(renderArea_);
1351 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1352 SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1353 }
1354 {
1355 // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1356 // possible. (ie. all buffers at once)
1357 renderingToDefaultFbo_ = false;
1358 if (sb.colorAttachmentCount > 0) {
1359 // collect color attachment infos..
1360 const RenderPassDesc::AttachmentDesc*
1361 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1362 for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1363 uint32_t index = sb.colorAttachmentIndices[ci];
1364 if (resolveToBackbuffer_[index]) {
1365 // NOTE: this could fail with multiple color attachments....
1366 renderingToDefaultFbo_ = true;
1367 }
1368 if (!attachmentCleared_[index]) {
1369 attachmentCleared_[index] = true;
1370 colorAttachments[ci] = &rpd.attachments[index];
1371 } else {
1372 colorAttachments[ci] = nullptr;
1373 }
1374 }
1375 HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1376 }
1377 if (sb.depthAttachmentCount) {
1378 if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1379 attachmentCleared_[sb.depthAttachmentIndex] = true;
1380 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1381 }
1382 }
1383 }
1384 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1385 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1386 SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1387 }
1388 ClearScissorReset();
1389 }
1390
ScanPasses(const RenderPassDesc& rpd)1391 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1392 {
1393 for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1394 const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1395 for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1396 uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1397 if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1398 attachmentFirstUse_[resolveTo] = sub;
1399 }
1400 attachmentLastUse_[resolveTo] = sub;
1401 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1402 if ((p.image == 0) && (p.renderBuffer == 0)) {
1403 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1404 // backbuffer...
1405 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1406 resolveToBackbuffer_[resolveFrom] = true;
1407 }
1408 }
1409 for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1410 uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1411 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1412 attachmentFirstUse_[index] = sub;
1413 }
1414 attachmentLastUse_[index] = sub;
1415 }
1416 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1417 uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1418 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1419 attachmentFirstUse_[index] = sub;
1420 }
1421 attachmentLastUse_[index] = sub;
1422 if (attachmentImage_[index]) {
1423 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1424 if ((p.image == 0) && (p.renderBuffer == 0)) {
1425 resolveToBackbuffer_[index] = true;
1426 }
1427 }
1428 }
1429 if (currentSubPass.depthAttachmentCount > 0) {
1430 uint32_t index = currentSubPass.depthAttachmentIndex;
1431 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1432 attachmentFirstUse_[index] = sub;
1433 }
1434 attachmentLastUse_[index] = sub;
1435 }
1436 }
1437 }
1438
RenderCommandBeginRenderPass(const RenderCommandWithType& ref)1439 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1440 {
1441 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1442 const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1443 switch (renderCmd.beginType) {
1444 case RenderPassBeginType::RENDER_PASS_BEGIN: {
1445 ++inRenderpass_;
1446 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1447 activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1448
1449 const auto& rpd = activeRenderPass_.renderPassDesc;
1450 renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1451 auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1452 if (multisampledRenderToTexture_) {
1453 cpm.FilterRenderPass(activeRenderPass_);
1454 }
1455 currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1456 if (currentFrameBuffer_ == nullptr) {
1457 // Completely invalid state in backend.
1458 commandListValid_ = false;
1459 --inRenderpass_;
1460 return;
1461 }
1462 PLUGIN_ASSERT_MSG(
1463 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1464 currentSubPass_ = 0;
1465 // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1466 for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1467 attachmentCleared_[i] = false;
1468 attachmentFirstUse_[i] = 0xFFFFFFFF;
1469 attachmentLastUse_[i] = 0;
1470 resolveToBackbuffer_[i] = false;
1471 attachmentImage_[i] =
1472 static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1473 }
1474 ScanPasses(rpd);
1475 DoSubPass(0);
1476 #if (RENDER_PERF_ENABLED == 1)
1477 ++perfCounters_.renderPassCount;
1478 #endif
1479 } break;
1480
1481 case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1482 ++currentSubPass_;
1483 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1484 DoSubPass(activeRenderPass_.subpassStartIndex);
1485 } break;
1486
1487 default:
1488 break;
1489 }
1490 }
1491
1492 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1493 {
1494 PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1495 const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1496 PLUGIN_UNUSED(renderCmd);
1497 PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1498 ++currentSubPass_;
1499 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1500 DoSubPass(currentSubPass_);
1501 }
1502
1503 int32_t RenderBackendGLES::InvalidateDepthStencil(
1504 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1505 {
1506 int32_t depthCount = 0;
1507 if (currentSubPass.depthAttachmentCount > 0) {
1508 const uint32_t index = currentSubPass.depthAttachmentIndex;
1509 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1510 const auto& image = attachmentImage_[index];
1511 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1512 // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1513 if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1514 bool depth = false;
1515 bool stencil = false;
1516 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1517 if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1518 depth = true;
1519 }
1520 }
1521 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1522 if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1523 stencil = true;
1524 }
1525 }
1526 if (depth && stencil) {
1527 invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1528 depthCount++;
1529 } else if (stencil) {
1530 invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1531 depthCount++;
1532 } else if (depth) {
1533 invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1534 depthCount++;
1535 }
1536 }
1537 }
1538 }
1539 return depthCount;
1540 }
1541
InvalidateColor( array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)1542 int32_t RenderBackendGLES::InvalidateColor(
1543 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1544 {
1545 int32_t colorCount = 0;
1546 // see which parts of the fbo can be invalidated...
1547 // collect color attachment infos..
1548 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1549 const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1550 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1551 if (const auto* image = attachmentImage_[index]) {
1552 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1553 if (dplat.image || dplat.renderBuffer) {
1554 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1555 invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1556 colorCount++;
1557 }
1558 }
1559 }
1560 }
1561 }
1562 return colorCount;
1563 }
1564
ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)1565 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1566 {
1567 const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1568 ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1569 if (mask) {
1570 // Resolve MSAA buffers.
1571 // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1572 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1573 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1574 if (scissorEnabled_) {
1575 glDisable(GL_SCISSOR_TEST);
1576 scissorEnabled_ = false;
1577 }
1578 // FLIP_RESOLVE_DEFAULT_FBO not needed, since we render flipped if end result will be resolved to fbo..
1579 // hopefully it works now.
1580 #if defined(FLIP_RESOLVE_DEFAULT_FBO) && FLIP_RESOLVE_DEFAULT_FBO
1581 if (currentFrameBuffer_->resolveFbo[currentSubPass_] == 0) {
1582 // flip if resolving to default fbo. (NOTE: sample count of destination must be zero or equal to source)
1583 // and in mali devices src and dst rects MUST be equal. (which is not according to spec)
1584 // IE. can't flip and resolve at the same time on MALI based devices.
1585 // NEED A FIX HERE!
1586 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1587 static_cast<GLint>(currentFrameBuffer_->height), 0, static_cast<GLint>(currentFrameBuffer_->height),
1588 static_cast<GLint>(currentFrameBuffer_->width), 0, mask, GL_NEAREST);
1589 return GL_READ_FRAMEBUFFER;
1590 }
1591 #endif
1592 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1593 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1594 static_cast<GLint>(currentFrameBuffer_->height), mask,
1595 GL_NEAREST); // no flip
1596 return GL_READ_FRAMEBUFFER;
1597 }
1598 return GL_FRAMEBUFFER;
1599 }
1600
RenderCommandEndRenderPass(const RenderCommandWithType& ref)1601 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1602 {
1603 PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1604 const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1605 if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1606 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1607 inRenderpass_--;
1608 }
1609 if (currentFrameBuffer_ == nullptr) {
1610 // Completely invalid state in backend.
1611 return;
1612 }
1613 const auto& rpd = activeRenderPass_.renderPassDesc;
1614 const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1615
1616 // Resolve MSAA
1617 const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1618
1619 // Finally invalidate color and depth..
1620 GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1621 int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1622 invalidateCount += InvalidateDepthStencil(
1623 array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1624
1625 // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1626 Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1627
1628 if (inRenderpass_ == 0) {
1629 currentFrameBuffer_ = nullptr;
1630 }
1631 }
1632
RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)1633 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1634 {
1635 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1636 const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1637 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1638 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1639 if (!boundGraphicsPipeline_) {
1640 return;
1641 }
1642 vertexAttribBinds_ = renderCmd.vertexBufferCount;
1643 for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1644 const auto& currVb = renderCmd.vertexBuffers[i];
1645 if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1646 const auto& plat = gpuBuffer->GetPlatformData();
1647 uintptr_t offset = currVb.bufferOffset;
1648 offset += plat.currentByteOffset;
1649 vertexAttribBindSlots_[i].id = plat.buffer;
1650 vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1651 } else {
1652 vertexAttribBindSlots_[i].id = 0;
1653 vertexAttribBindSlots_[i].offset = 0;
1654 }
1655 }
1656 }
1657
RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)1658 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1659 {
1660 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1661 const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1662 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1663 gpuBuffer) {
1664 const auto& plat = gpuBuffer->GetPlatformData();
1665 boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1666 boundIndexBuffer_.offset += plat.currentByteOffset;
1667 boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1668 boundIndexBuffer_.id = plat.buffer;
1669 }
1670 }
1671
RenderCommandBlitImage(const RenderCommandWithType& ref)1672 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1673 {
1674 PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1675 const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1676 const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1677 const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1678 if ((srcImage == nullptr) || (dstImage == nullptr)) {
1679 return;
1680 }
1681 const auto& srcDesc = srcImage->GetDesc();
1682 const auto& srcPlat = srcImage->GetPlatformData();
1683 const auto& dstDesc = dstImage->GetDesc();
1684 const auto& dstPlat = dstImage->GetPlatformData();
1685 const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1686 const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1687 const auto& src = renderCmd.imageBlit.srcSubresource;
1688 const auto& dst = renderCmd.imageBlit.dstSubresource;
1689 const GLint srcMipLevel = static_cast<GLint>(src.mipLevel);
1690 const GLint dstMipLevel = static_cast<GLint>(dst.mipLevel);
1691 const uint32_t srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1692 const uint32_t dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1693 PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1694 PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1695 glDisable(GL_SCISSOR_TEST);
1696 scissorEnabled_ = false;
1697 // NOTE: LAYERS! (texture arrays)
1698 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1699 device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1700 for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1701 const GLenum srcType = getTarget(srcPlat.type, layer, srcSampleCount);
1702 const GLenum dstType = getTarget(dstPlat.type, layer, dstSampleCount);
1703 // glFramebufferTextureLayer for array textures....
1704 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1705 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1706 DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1707 { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1708 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1709 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1710 }
1711 }
1712
RenderCommandCopyBuffer(const RenderCommandWithType& ref)1713 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1714 {
1715 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1716 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1717 const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1718 const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1719 if (srcGpuBuffer && dstGpuBuffer) {
1720 const auto& srcData = srcGpuBuffer->GetPlatformData();
1721 const auto& dstData = dstGpuBuffer->GetPlatformData();
1722 const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1723 const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1724 device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1725 device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1726 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1727 static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1728 static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1729 device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1730 device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1731 }
1732 }
1733
BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)1734 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1735 {
1736 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1737 // use the workaround only for gles backend on windows. (pvr simulator bug)
1738 constexpr const bool usePixelUnpackBuffer = false;
1739 #else
1740 // expect this to work, and the nvidia bug to be fixed.
1741 constexpr const bool usePixelUnpackBuffer = true;
1742 #endif
1743 auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1744 auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1745 if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1746 return;
1747 }
1748 const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1749 if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1750 BlitCube(device_, info);
1751 } else if (info.iPlat.type == GL_TEXTURE_2D) {
1752 Blit2D(device_, info);
1753 } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1754 BlitArray(device_, info);
1755 } else if (info.iPlat.type == GL_TEXTURE_3D) {
1756 Blit3D(device_, info);
1757 #if RENDER_HAS_GLES_BACKEND
1758 } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1759 PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1760 #endif
1761 } else {
1762 PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1763 }
1764 FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1765 }
1766
ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)1767 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1768 {
1769 const auto& bc = renderCmd.bufferImageCopy;
1770 const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1771 const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1772 PLUGIN_ASSERT(srcGpuImage);
1773 PLUGIN_ASSERT(dstGpuBuffer);
1774 if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1775 return;
1776 }
1777 const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1778 const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1779 if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1780 PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1781 return;
1782 }
1783 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1784 PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1785 GLenum type = GL_TEXTURE_2D;
1786 if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1787 type = getCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1788 }
1789 // glFramebufferTextureLayer for array textures....
1790 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1791 static_cast<GLint>(bc.imageSubresource.mipLevel));
1792 const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1793 const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1794 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1795 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1796 glPixelStorei(GL_PACK_ALIGNMENT, 1);
1797 uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1798 glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1799 static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1800 static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1801 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1802 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1803 }
1804
RenderCommandCopyBufferImage(const RenderCommandWithType& ref)1805 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1806 {
1807 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1808 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1809 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1810 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1811 BufferToImageCopy(renderCmd);
1812 } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1813 ImageToBufferCopy(renderCmd);
1814 }
1815 }
1816
RenderCommandCopyImage(const RenderCommandWithType& ref)1817 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1818 {
1819 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1820 const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1821 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1822 const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1823 const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1824 if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1825 return;
1826 }
1827 const auto& srcDesc = srcGpuImage->GetDesc();
1828 const auto& dstDesc = dstGpuImage->GetDesc();
1829 #if RENDER_VALIDATION_ENABLED
1830 ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1831 #endif
1832 const auto srcMipLevel =
1833 static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1834 const auto dstMipLevel =
1835 static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1836
1837 auto sOffset = renderCmd.imageCopy.srcOffset;
1838 auto dOffset = renderCmd.imageCopy.dstOffset;
1839 auto size = renderCmd.imageCopy.extent;
1840
1841 // clamp negative offsets to zero and adjust extent and other offset accordingly
1842 ClampOffset(sOffset, dOffset, size);
1843 ClampOffset(dOffset, sOffset, size);
1844
1845 // clamp size to fit src and dst
1846 ClampSize(sOffset, srcDesc, size);
1847 ClampSize(dOffset, dstDesc, size);
1848
1849 const auto& srcPlatData = srcGpuImage->GetPlatformData();
1850 const auto& dstPlatData = dstGpuImage->GetPlatformData();
1851 glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1852 dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1853 static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1854 }
1855
RenderCommandBarrierPoint(const RenderCommandWithType& ref)1856 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1857 {
1858 PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1859 const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1860 const auto& rbList = *managers_.rbList;
1861 // NOTE: proper flagging of barriers.
1862 if (const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1863 rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1864 barrierPointBarriers) {
1865 const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1866 const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1867 GLbitfield barriers = 0;
1868 GLbitfield barriersByRegion = 0;
1869 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1870 if (nextBarrierList == nullptr) {
1871 // cannot be null, just a safety
1872 PLUGIN_ASSERT(false);
1873 return;
1874 }
1875 const auto& barrierListRef = *nextBarrierList;
1876 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1877 const uint32_t barrierCount = barrierListRef.count;
1878 // helper which covers barriers supported by Barrier and BarrierByRegion
1879 auto commonBarrierBits = [](AccessFlags accessFlags, RenderHandleType resourceType) -> GLbitfield {
1880 GLbitfield barriers = 0;
1881 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
1882 barriers |= GL_UNIFORM_BARRIER_BIT;
1883 }
1884 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
1885 // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
1886 if (resourceType == RenderHandleType::GPU_IMAGE) {
1887 barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1888 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1889 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1890 } else {
1891 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT |
1892 GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1893 }
1894 }
1895 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
1896 if (resourceType == RenderHandleType::GPU_IMAGE) {
1897 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1898 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1899 barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
1900 } else {
1901 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1902 }
1903 }
1904 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1905 CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
1906 barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
1907 }
1908 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
1909 return barriers;
1910 };
1911 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1912 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1913
1914 // check if written by previous shader as an attachment or storage/ image buffer
1915 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1916 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1917 const auto resourceHandle = barrier.resourceHandle;
1918 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1919
1920 // barrier by region is between fragment shaders and supports a subset of barriers.
1921 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1922 (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1923 barriersByRegion |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1924 } else {
1925 // check the barriers shared with ByRegion
1926 barriers |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1927
1928 // the rest are invalid for ByRegion
1929 if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1930 barriers |= GL_COMMAND_BARRIER_BIT;
1931 }
1932 if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1933 barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1934 }
1935 if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1936 barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1937 }
1938 // which are the correct accessFlags?
1939 // GL_PIXEL_BUFFER_BARRIER_BIT:
1940 // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1941 // glReadPixels, glTexSubImage1D, etc.)
1942 // GL_TEXTURE_UPDATE_BARRIER_BIT:
1943 // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
1944 // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
1945 // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
1946 // mapped
1947 // by glMapBuffer or glMapBufferRange
1948 // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
1949 // CORE_ACCESS_MEMORY_WRITE_BIT?
1950 if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
1951 CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
1952 if (handleType == RenderHandleType::GPU_IMAGE) {
1953 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
1954 } else if (handleType == RenderHandleType::GPU_BUFFER) {
1955 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
1956 }
1957 }
1958 // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
1959 }
1960 }
1961 }
1962 }
1963 if (barriers) {
1964 glMemoryBarrier(barriers);
1965 }
1966 if (barriersByRegion) {
1967 // only for fragment-fragment
1968 glMemoryBarrierByRegion(barriersByRegion);
1969 }
1970 }
1971 }
1972
SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)1973 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
1974 {
1975 PLUGIN_ASSERT(binding.binding < resources.size());
1976 auto& obj = resources[binding.binding];
1977 PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
1978 PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
1979 return obj;
1980 }
1981
BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)1982 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
1983 {
1984 const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
1985 if (gpuSampler) {
1986 const auto& plat = gpuSampler->GetPlatformData();
1987 obj.resources[index].sampler.samplerId = plat.sampler;
1988 } else {
1989 obj.resources[index].sampler.samplerId = 0;
1990 }
1991 }
1992
BindImage( const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)1993 void RenderBackendGLES::BindImage(
1994 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
1995 {
1996 const AccessFlags accessFlags = resState.accessFlags;
1997 auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
1998 auto& ref = obj.resources[index];
1999 ref.image.image = gpuImage;
2000 const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2001 const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2002 if (read && write) {
2003 ref.image.mode = GL_READ_WRITE;
2004 } else if (read) {
2005 ref.image.mode = GL_READ_ONLY;
2006 } else if (write) {
2007 ref.image.mode = GL_WRITE_ONLY;
2008 } else {
2009 // no read and no write?
2010 ref.image.mode = GL_READ_WRITE;
2011 }
2012 ref.image.mipLevel = res.mip;
2013 }
2014
BindImageSampler( const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)2015 void RenderBackendGLES::BindImageSampler(
2016 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2017 {
2018 BindImage(res, resState, obj, index);
2019 BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2020 }
2021
BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)2022 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2023 {
2024 const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2025 if (gpuBuffer) {
2026 const auto& plat = gpuBuffer->GetPlatformData();
2027 const uint32_t baseOffset = res.byteOffset;
2028 obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2029 obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2030 obj.resources[index].buffer.bufferId = plat.buffer;
2031 } else {
2032 obj.resources[index].buffer.offset = 0;
2033 obj.resources[index].buffer.size = 0;
2034 obj.resources[index].buffer.bufferId = 0;
2035 }
2036 }
2037
ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd, const DescriptorSetLayoutBindingResources& data, uint32_t set)2038 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2039 const DescriptorSetLayoutBindingResources& data, uint32_t set)
2040 {
2041 BindState& bind = boundObjects_[set];
2042 vector<Gles::Bind>& resources = bind.resources;
2043 #if RENDER_HAS_GLES_BACKEND
2044 bind.oesBinds.clear();
2045 #endif
2046 const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2047 const auto& buffers = data.buffers;
2048 const auto& images = data.images;
2049 const auto& samplers = data.samplers;
2050 uint32_t currDynamic = 0U;
2051 for (const auto& res : data.bindings) {
2052 auto& obj = SetupBind(res.binding, resources);
2053 #if RENDER_HAS_GLES_BACKEND
2054 bool hasOes = false;
2055 #endif
2056 auto GetArrayOffset = [](const auto& data, const auto& res) {
2057 const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
2058 if (type == RenderHandleType::GPU_BUFFER) {
2059 return data.buffers[res.resourceIndex].arrayOffset;
2060 } else if (type == RenderHandleType::GPU_IMAGE) {
2061 return data.images[res.resourceIndex].arrayOffset;
2062 } else if (type == RenderHandleType::GPU_SAMPLER) {
2063 return data.samplers[res.resourceIndex].arrayOffset;
2064 }
2065 return 0u;
2066 };
2067 const bool hasArrOffset = (res.binding.descriptorCount > 1);
2068 const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2069 for (uint8_t index = 0; index < res.binding.descriptorCount; index++) {
2070 const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2071 GpuImageGLES* image = nullptr;
2072 switch (res.binding.descriptorType) {
2073 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2074 const auto& bRes = samplers[resIdx];
2075 BindSampler(bRes.resource, obj, index);
2076 break;
2077 }
2078 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2079 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2080 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2081 const auto& bRes = images[resIdx];
2082 BindImage(bRes.resource, bRes.state, obj, index);
2083 image = obj.resources[index].image.image;
2084 break;
2085 }
2086 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2087 const auto& bRes = images[resIdx];
2088 BindImageSampler(bRes.resource, bRes.state, obj, index);
2089 image = obj.resources[index].image.image;
2090 break;
2091 }
2092 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2093 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2094 const auto& bRes = buffers[resIdx];
2095 uint32_t dynamicOffset = 0;
2096 if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2097 dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2098 currDynamic++;
2099 }
2100 BindBuffer(bRes.resource, obj, dynamicOffset, index);
2101 break;
2102 }
2103 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2104 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2105 const auto& bRes = buffers[resIdx];
2106 BindBuffer(bRes.resource, obj, 0, index);
2107 break;
2108 }
2109 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2110 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2111 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2112 default:
2113 PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2114 break;
2115 }
2116 #if RENDER_HAS_GLES_BACKEND
2117 if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2118 hasOes = true;
2119 }
2120 #endif
2121 }
2122 #if RENDER_HAS_GLES_BACKEND
2123 if (hasOes) {
2124 bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2125 }
2126 #endif
2127 }
2128 }
2129
RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)2130 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2131 {
2132 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2133 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2134 return;
2135 }
2136 const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2137 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2138
2139 const auto& aNcdsm = *managers_.descriptorSetMgr;
2140 for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2141 PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2142 const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2143 PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2144 const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2145 boundObjects_[idx].dirty = true; // mark the set as "changed"
2146 ProcessBindings(renderCmd, data, idx);
2147 // (note, nothing actually gets bound yet.. just the bind cache is updated)
2148 }
2149 }
2150
2151 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2152 {
2153 const GLint location = static_cast<GLint>(pc.location);
2154 // the consts list has been filtered and cleared of unused uniforms.
2155 PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2156 GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2157 switch (pc.type) {
2158 case GL_UNSIGNED_INT: {
2159 glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2160 break;
2161 }
2162 case GL_FLOAT: {
2163 glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2164 break;
2165 }
2166 case GL_FLOAT_VEC2: {
2167 glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2168 break;
2169 }
2170 case GL_FLOAT_VEC4: {
2171 glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2172 break;
2173 }
2174 case GL_FLOAT_MAT4: {
2175 glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2176 break;
2177 }
2178 case GL_UNSIGNED_INT_VEC4: {
2179 glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2180 break;
2181 }
2182 default:
2183 PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2184 }
2185 }
2186
2187 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2188 {
2189 if (boundProgram_.setPushConstants) {
2190 boundProgram_.setPushConstants = false;
2191 const auto& renderCmd = boundProgram_.pushConstants;
2192 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2193 PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2194 PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2195 if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2196 return;
2197 // ASSERT: expecting data is valid
2198 // NOTE: handle rest of the types
2199 for (const auto& pc : consts) {
2200 const size_t offs = pc.offset;
2201 if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2202 PLUGIN_LOG_E(
2203 "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2204 pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2205 continue;
2206 }
2207 /*
2208 NOTE: handle the strides....
2209 consts[i].array_stride;
2210 consts[i].matrix_stride; */
2211 SetPushConstant(program, pc, &renderCmd.data[offs]);
2212 }
2213 }
2214 }
2215
RenderCommandPushConstant(const RenderCommandWithType& ref)2216 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2217 {
2218 PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2219 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2220 return;
2221 }
2222 const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2223 if (renderCmd.pushConstant.byteSize > 0) {
2224 PLUGIN_ASSERT(renderCmd.data);
2225 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2226 boundProgram_.setPushConstants = true;
2227 boundProgram_.pushConstants = renderCmd;
2228 }
2229 }
2230
RenderCommandClearColorImage(const RenderCommandWithType& ref)2231 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2232 {
2233 PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2234 #if RENDER_HAS_GLES_BACKEND
2235 #if (RENDER_VALIDATION_ENABLED == 1)
2236 PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2237 "Render command clear color image not support with GLES. One should implement higher level path for "
2238 "clearing.");
2239 #endif
2240 #else
2241 const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2242
2243 const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2244 if (imagePtr) {
2245 const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2246 // NOTE: mip levels and array layers should be handled separately
2247 for (const auto& subresRef : renderCmd.ranges) {
2248 glClearTexImage(platImage.image, // texture
2249 (int32_t)subresRef.baseMipLevel, // level
2250 platImage.format, // format
2251 platImage.dataType, // type
2252 &renderCmd.color); // data
2253 }
2254 }
2255 #endif
2256 }
2257
2258 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)2259 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2260 {
2261 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2262 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2263 const ViewportDesc& vd = renderCmd.viewportDesc;
2264 SetViewport(renderArea_, vd);
2265 }
2266
RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)2267 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2268 {
2269 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2270 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2271 const ScissorDesc& sd = renderCmd.scissorDesc;
2272 SetScissor(renderArea_, sd);
2273 }
2274
RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)2275 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2276 {
2277 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2278 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2279 if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2280 cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2281 glLineWidth(renderCmd.lineWidth);
2282 }
2283 }
2284
RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)2285 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2286 {
2287 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2288 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2289 }
2290
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)2291 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2292 {
2293 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2294 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2295 }
2296
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)2297 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2298 {
2299 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2300 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2301 }
2302
SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front, const uint32_t backFlags, const GraphicsState::StencilOpState& back)2303 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2304 const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2305 {
2306 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2307 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2308 const uint32_t FUNCMASK =
2309 (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2310 if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2311 cFront.writeMask = front.writeMask;
2312 glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2313 }
2314 if (frontFlags & FUNCMASK) {
2315 SetStencilCompareOp(cFront, front);
2316 glStencilFuncSeparate(
2317 GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2318 }
2319 if (frontFlags & StencilSetFlags::SETOP) {
2320 SetStencilOp(cFront, front);
2321 glStencilOpSeparate(
2322 GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2323 }
2324 if (backFlags & StencilSetFlags::SETWRITEMASK) {
2325 cBack.writeMask = back.writeMask;
2326 glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2327 }
2328 if (backFlags & FUNCMASK) {
2329 SetStencilCompareOp(cBack, back);
2330 glStencilFuncSeparate(
2331 GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2332 }
2333 if (backFlags & StencilSetFlags::SETOP) {
2334 SetStencilOp(cBack, back);
2335 glStencilOpSeparate(
2336 GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2337 }
2338 }
2339
RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)2340 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2341 {
2342 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2343 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2344 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2345 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2346 uint32_t setFront = 0;
2347 uint32_t setBack = 0;
2348 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2349 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2350 if (renderCmd.mask != cFront.compareMask) {
2351 cFront.compareMask = renderCmd.mask;
2352 setFront |= StencilSetFlags::SETCOMPAREMASK;
2353 }
2354 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2355 if (renderCmd.mask != cFront.writeMask) {
2356 cFront.writeMask = renderCmd.mask;
2357 setFront |= StencilSetFlags::SETWRITEMASK;
2358 }
2359 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2360 if (renderCmd.mask != cFront.reference) {
2361 cFront.reference = renderCmd.mask;
2362 setFront |= StencilSetFlags::SETREFERENCE;
2363 }
2364 }
2365 }
2366 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2367 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2368 if (renderCmd.mask != cBack.compareMask) {
2369 cBack.compareMask = renderCmd.mask;
2370 setBack |= StencilSetFlags::SETCOMPAREMASK;
2371 }
2372 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2373 if (renderCmd.mask != cBack.writeMask) {
2374 cBack.writeMask = renderCmd.mask;
2375 setBack |= StencilSetFlags::SETWRITEMASK;
2376 }
2377 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2378 if (renderCmd.mask != cBack.reference) {
2379 cBack.reference = renderCmd.mask;
2380 setBack |= StencilSetFlags::SETREFERENCE;
2381 }
2382 }
2383 }
2384 SetStencilState(setFront, cFront, setBack, cBack);
2385 }
2386
RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)2387 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2388 {
2389 #if (RENDER_VALIDATION_ENABLED == 1)
2390 PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2391 "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2392 #endif
2393 }
2394
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)2395 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2396 {
2397 PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2398 }
2399
RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)2400 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2401 {
2402 PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2403 }
2404
BindVertexInputs( const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)2405 void RenderBackendGLES::BindVertexInputs(
2406 const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2407 {
2408 // update bindings for the VAO.
2409 // process with attribute descriptions to only bind the needed vertex buffers
2410 // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2411 // but we only bind the ones needed for the shader
2412 const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2413 for (uint32_t i = 0; i < minBinding; ++i) {
2414 const auto& attributeRef = decldata.attributeDescriptions[i];
2415 const uint32_t location = attributeRef.location;
2416 const uint32_t binding = attributeRef.binding;
2417 // NOTE: we need to bind all the buffers to the correct bindings.
2418 // shader optimized check (vertexInputs, some locations are not in use)
2419 if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2420 const auto& slot = vertexAttribBindSlots_[binding];
2421 const auto& bindingRef = decldata.bindingDescriptions[binding];
2422 PLUGIN_ASSERT(bindingRef.binding == binding);
2423 // buffer bound to slot, and it's used by the shader.
2424 device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2425 /*
2426 core/vulkan
2427 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_VERTEX (0) attribute index advances per vertex
2428 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_INSTANCE (1) attribute index advances per instance
2429
2430 gl/gles
2431 If divisor is 0, the attributes using the buffer bound to bindingindex advance once per vertex.
2432 If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2433 rendered.
2434
2435 so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2436 ie. enum happens to match and can simply cast.
2437 */
2438 static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2439 device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2440 }
2441 }
2442 }
2443
BindResources()2444 void RenderBackendGLES::BindResources()
2445 {
2446 #if RENDER_HAS_GLES_BACKEND
2447 // scan all sets here to see if any of the sets has oes.
2448 // we don't actually need to rebuild this info every time.
2449 // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2450 // data there)
2451 oesBinds_.clear();
2452 for (const auto& state : boundObjects_) {
2453 const auto& oes = state.oesBinds;
2454 if (!oes.empty()) {
2455 oesBinds_.insert(oesBinds_.end(), oes.begin(), oes.end());
2456 }
2457 }
2458 #endif
2459 const array_view<Binder>* resourceList = nullptr;
2460 const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2461 int32_t flipLocation = Gles::INVALID_LOCATION;
2462 uint32_t program = 0;
2463 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2464 if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2465 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2466 PLUGIN_ASSERT(boundGraphicsPipeline_);
2467 if (!boundGraphicsPipeline_) {
2468 return;
2469 }
2470 array_view<const int32_t> vertexInputs;
2471 const auto& pipelineData =
2472 static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2473 const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2474 #if RENDER_HAS_GLES_BACKEND
2475 if (!oesBinds_.empty()) {
2476 // okay, oes vector contains the set/bind to which an OES texture is bounds
2477 // ask for a compatible program from the boundGraphicsPipeline_
2478 shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2479 }
2480 #endif
2481 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2482 program = sd.program;
2483 vertexInputs = { sd.inputs, countof(sd.inputs) };
2484 FlushViewportScissors();
2485 if (!scissorEnabled_) {
2486 scissorEnabled_ = true;
2487 glEnable(GL_SCISSOR_TEST); // Always enabled
2488 }
2489 #if (RENDER_PERF_ENABLED == 1)
2490 if (device_.BoundProgram() != program) {
2491 ++perfCounters_.bindProgram;
2492 }
2493 #endif
2494 device_.UseProgram(program);
2495 device_.BindVertexArray(pipelineData.vao);
2496 BindVertexInputs(pipelineData.vertexInputDeclaration, vertexInputs);
2497 device_.BindElementBuffer(boundIndexBuffer_.id);
2498 resourceList = &sd.resourceList;
2499 flipLocation = sd.flipLocation;
2500 pushConstants = &sd.pushConstants;
2501 } else {
2502 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2503 PLUGIN_ASSERT(boundComputePipeline_);
2504 if (!boundComputePipeline_) {
2505 return;
2506 }
2507 const auto& pipelineData =
2508 static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2509 if (pipelineData.computeShader) {
2510 const auto& sd =
2511 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2512 program = sd.program;
2513 #if (RENDER_PERF_ENABLED == 1)
2514 if (device_.BoundProgram() != program) {
2515 ++perfCounters_.bindProgram;
2516 }
2517 #endif
2518 device_.UseProgram(program);
2519 resourceList = &sd.resourceList;
2520 flipLocation = sd.flipLocation;
2521 pushConstants = &sd.pushConstants;
2522 }
2523 }
2524
2525 SetPushConstants(program, *pushConstants);
2526 if (flipLocation != Gles::INVALID_LOCATION) {
2527 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2528 glProgramUniform1fv(program, flipLocation, 1, &flip);
2529 }
2530
2531 for (const auto& r : *resourceList) {
2532 PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2533 if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2534 continue;
2535 }
2536 const auto& res = boundObjects_[r.set].resources[r.bind];
2537 PLUGIN_ASSERT(res.resources.size() == r.id.size());
2538 auto resType = res.descriptorType;
2539 if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2540 resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2541 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2542 resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2543 }
2544
2545 // a few helpers for updating perf counters and binding the sampler/texture/buffer
2546 auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2547 #if (RENDER_PERF_ENABLED == 1)
2548 if (device_.BoundSampler(textureUnit) != samplerId) {
2549 ++perfCounters_.bindSampler;
2550 }
2551 #endif
2552 device_.BindSampler(textureUnit, samplerId);
2553 };
2554 auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2555 #if (RENDER_PERF_ENABLED == 1)
2556 if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2557 ++perfCounters_.bindTexture;
2558 }
2559 #endif
2560 device_.BindTexture(textureUnit, dplat.type, dplat.image);
2561 };
2562 auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2563 const GpuImagePlatformDataGL& dplat) {
2564 uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2565 device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2566 };
2567 auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2568 #if (RENDER_PERF_ENABLED == 1)
2569 if (device_.BoundBuffer(target) != buffer.bufferId) {
2570 ++perfCounters_.bindBuffer;
2571 }
2572 #endif
2573 device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2574 };
2575 auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2576 // either force the defined mip level or use defaults.
2577 glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2578 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2579 glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2580 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 1000U));
2581 };
2582
2583 #if (RENDER_VALIDATION_ENABLED == 1)
2584 if (resType != r.type) {
2585 PLUGIN_LOG_ONCE_E(
2586 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2587 }
2588 #endif
2589
2590 for (uint32_t index = 0; index < res.resources.size(); index++) {
2591 const auto& obj = res.resources[index];
2592 for (const auto& id : r.id[index]) {
2593 const auto binding = index + id;
2594 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2595 bindSampler(binding, obj.sampler.samplerId);
2596 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2597 (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2598 (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2599 if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2600 bindSampler(binding, obj.sampler.samplerId);
2601 } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2602 bindSampler(binding, 0U);
2603 }
2604 if (obj.image.image) {
2605 auto& dplat = obj.image.image->GetPlatformData();
2606 bindTexture(binding, dplat);
2607
2608 // NOTE: the last setting is active, can not have different miplevels bound from single
2609 // resource.
2610 // Check and update (if needed) the forced miplevel.
2611 if (dplat.mipLevel != obj.image.mipLevel) {
2612 // NOTE: we are actually modifying the texture object bound above
2613 const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2614 setMipLevel(dplat.type, dplat.mipLevel);
2615 }
2616 }
2617 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2618 if (obj.image.image) {
2619 auto& dplat = obj.image.image->GetPlatformData();
2620 bindTextureImage(binding, obj.image, dplat);
2621 }
2622 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2623 bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2624 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2625 bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2626 }
2627 }
2628 }
2629 }
2630 // mark all bound.
2631 for (auto& b : boundObjects_) {
2632 b.dirty = false;
2633 }
2634 }
2635
2636 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)2637 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2638 {
2639 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2640 const string_view& debugName = renderCommandContext.debugName;
2641 if (timers_.count(debugName) == 0) { // new timers
2642 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2643 PerfDataSet& perfDataSet = timers_[debugName];
2644 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2645 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2646 perfDataSet.counter = 0u;
2647 #else
2648 timers_.insert({ debugName, {} });
2649 #endif
2650 }
2651 }
2652 }
2653
EndFrameTimers()2654 void RenderBackendGLES::EndFrameTimers()
2655 {
2656 int64_t fullGpuTime = 0;
2657 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2658 // already in micros
2659 fullGpuTime = fullGpuCounter_;
2660 fullGpuCounter_ = 0;
2661 #endif
2662 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2663 CORE_NS::GetInstance<CORE_NS ::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2664 globalPerfData) {
2665 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2666 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2667 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2668 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2669 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2670 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2671 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2672 }
2673 }
2674
CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)2675 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2676 {
2677 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2678 int64_t gpuMicroSeconds = 0;
2679 if (validGpuQueries_) {
2680 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2681 PLUGIN_ASSERT(gpuQuery);
2682
2683 gpuQuery->NextQueryIndex();
2684
2685 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2686 PLUGIN_ASSERT(platData.queryObject);
2687
2688 GLint disjointOccurred = 0;
2689 #ifdef GL_GPU_DISJOINT_EXT
2690 // Clear disjoint error
2691 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2692 #endif
2693 if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2694 GLuint64 gpuNanoSeconds = 0U;
2695 #ifdef GL_GPU_DISJOINT_EXT
2696 glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2697 #else
2698 glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2699 #endif
2700 static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2701 gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2702 if (gpuMicroSeconds > UINT32_MAX) {
2703 gpuMicroSeconds = 0;
2704 }
2705 fullGpuCounter_ += gpuMicroSeconds;
2706 } else if (disjointOccurred) {
2707 PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2708 }
2709 }
2710 #endif
2711 const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2712
2713 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2714 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2715 globalPerfData) {
2716 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2717
2718 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2719 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2720 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2721 #endif
2722 perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2723 perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2724 perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2725 perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2726 perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2727 perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2728 perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2729 perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2730 perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2731 perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2732 perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2733 }
2734 }
2735 #endif
2736
PrimeDepthStencilState(const GraphicsState& graphicsState)2737 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2738 {
2739 auto& cDepth = cacheState_.depthStencilState;
2740 cDepth = graphicsState.depthStencilState;
2741 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2742 SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2743 SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2744 glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2745 glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2746 const uint32_t updateAllFlags =
2747 (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2748 StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2749 SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2750 }
2751
PrimeBlendState(const GraphicsState& graphicsState)2752 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2753 {
2754 auto& cBlend = cacheState_.colorBlendState;
2755 cBlend = graphicsState.colorBlendState;
2756 glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2757 cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2758 GLuint maxColorAttachments;
2759 glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2760 maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2761 for (GLuint i = 0; i < maxColorAttachments; i++) {
2762 const auto& cBlendState = cBlend.colorAttachments[i];
2763 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2764 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2765 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2766 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2767 if (cBlendState.enableBlend) {
2768 glEnablei(GL_BLEND, i);
2769 } else {
2770 glDisablei(GL_BLEND, i);
2771 }
2772 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2773 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2774 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2775 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2776 }
2777 // logicops are unsupported on GLES
2778 }
2779
PrimeCache(const GraphicsState& graphicsState)2780 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2781 {
2782 if (cachePrimed_) {
2783 return;
2784 }
2785 cachePrimed_ = true;
2786 /// GRAPHICSSTATE inputAssembly
2787 const auto& ia = graphicsState.inputAssembly;
2788 auto& cia = cacheState_.inputAssembly;
2789 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2790 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2791 topology_ = ia.primitiveTopology;
2792 /// GRAPHICSSTATE rasterizationState
2793 const auto& rs = graphicsState.rasterizationState;
2794 auto& crs = cacheState_.rasterizationState;
2795 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2796 polygonMode_ = rs.polygonMode;
2797 // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED CHECK GLES 3.2
2798 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2799 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2800 crs.enableDepthBias = rs.enableDepthBias;
2801 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2802 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2803 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2804 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2805 // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2806 // If cull mode Flags change...
2807 crs.cullModeFlags = rs.cullModeFlags;
2808 SetCullMode(crs);
2809 crs.frontFace = rs.frontFace;
2810 SetFrontFace(crs);
2811 crs.lineWidth = rs.lineWidth;
2812 glLineWidth(rs.lineWidth);
2813 PrimeDepthStencilState(graphicsState);
2814 PrimeBlendState(graphicsState);
2815 }
2816
UpdateDepthState(const GraphicsState& graphicsState)2817 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2818 {
2819 const auto& depth = graphicsState.depthStencilState;
2820 auto& cDepth = cacheState_.depthStencilState;
2821 if (depth.enableDepthTest != cDepth.enableDepthTest) {
2822 cDepth.enableDepthTest = depth.enableDepthTest;
2823 SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2824 }
2825 if (depth.depthCompareOp != cDepth.depthCompareOp) {
2826 cDepth.depthCompareOp = depth.depthCompareOp;
2827 glDepthFunc(GetCompareOp(depth.depthCompareOp));
2828 }
2829 if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2830 cDepth.enableDepthWrite = depth.enableDepthWrite;
2831 glDepthMask((depth.enableDepthWrite == GL_TRUE));
2832 }
2833 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2834 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2835 }
2836 }
2837
UpdateStencilState(const GraphicsState& graphicsState)2838 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2839 {
2840 const auto& depth = graphicsState.depthStencilState;
2841 auto& cDepth = cacheState_.depthStencilState;
2842 if (depth.enableStencilTest != cDepth.enableStencilTest) {
2843 cDepth.enableStencilTest = depth.enableStencilTest;
2844 SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2845 }
2846 uint32_t setFront = 0;
2847 uint32_t setBack = 0;
2848 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2849 if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2850 setFront |= StencilSetFlags::SETREFERENCE;
2851 }
2852 if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2853 setBack |= StencilSetFlags::SETREFERENCE;
2854 }
2855 }
2856 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2857 if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2858 setFront |= StencilSetFlags::SETCOMPAREMASK;
2859 }
2860 if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2861 setBack |= StencilSetFlags::SETCOMPAREMASK;
2862 }
2863 }
2864 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2865 if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2866 setFront |= StencilSetFlags::SETWRITEMASK;
2867 }
2868 if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2869 setBack |= StencilSetFlags::SETWRITEMASK;
2870 }
2871 }
2872 if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2873 setFront |= StencilSetFlags::SETCOMPAREOP;
2874 }
2875 if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2876 setBack |= StencilSetFlags::SETCOMPAREOP;
2877 }
2878 if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2879 setFront |= StencilSetFlags::SETOP;
2880 }
2881 if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2882 setBack |= StencilSetFlags::SETOP;
2883 }
2884 SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2885 }
2886
UpdateDepthStencilState(const GraphicsState& graphicsState)2887 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2888 {
2889 UpdateDepthState(graphicsState);
2890 UpdateStencilState(graphicsState);
2891 }
2892
UpdateBlendState(const GraphicsState& graphicsState)2893 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2894 {
2895 const auto& blend = graphicsState.colorBlendState;
2896 auto& cBlend = cacheState_.colorBlendState;
2897 for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2898 const auto& blendState = blend.colorAttachments[i];
2899 auto& cBlendState = cBlend.colorAttachments[i];
2900 if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2901 cBlendState.colorWriteMask = blendState.colorWriteMask;
2902 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2903 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2904 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2905 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2906 }
2907
2908 // Check if blend state has changed
2909 bool factorsChanged = false;
2910 bool opsChanged = false;
2911
2912 if (blendState.enableBlend) {
2913 factorsChanged = !CompareBlendFactors(cBlendState, blendState);
2914 opsChanged = !CompareBlendOps(cBlendState, blendState);
2915 }
2916
2917 if (blendState.enableBlend != cBlendState.enableBlend || factorsChanged || opsChanged) {
2918 cBlendState.enableBlend = blendState.enableBlend;
2919 if (blendState.enableBlend) {
2920 glEnablei(GL_BLEND, i);
2921 if (factorsChanged) {
2922 SetBlendFactors(cBlendState, blendState);
2923 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2924 GetBlendFactor(cBlendState.dstColorBlendFactor),
2925 GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2926 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2927 }
2928 if (opsChanged) {
2929 SetBlendOps(cBlendState, blendState);
2930 glBlendEquationSeparatei(
2931 i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2932 }
2933 } else {
2934 glDisablei(GL_BLEND, i);
2935 }
2936 }
2937 }
2938 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
2939 if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
2940 Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
2941 glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
2942 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
2943 }
2944 }
2945 // logicOps in blend not supported on GLES
2946 }
2947
UpdateRasterizationState(const GraphicsState& graphicsState)2948 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
2949 {
2950 const auto& rs = graphicsState.rasterizationState;
2951 auto& crs = cacheState_.rasterizationState;
2952 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2953 polygonMode_ = rs.polygonMode;
2954 #if RENDER_HAS_GL_BACKEND
2955 if (rs.polygonMode != crs.polygonMode) {
2956 crs.polygonMode = rs.polygonMode;
2957 SetPolygonMode(rs);
2958 }
2959 #endif
2960 if (rs.enableDepthClamp != crs.enableDepthClamp) {
2961 crs.enableDepthClamp = rs.enableDepthClamp;
2962 // NOT SUPPORTED (needs an extension)
2963 }
2964 if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
2965 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2966 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2967 }
2968 if (rs.enableDepthBias != crs.enableDepthBias) {
2969 crs.enableDepthBias = rs.enableDepthBias;
2970 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2971 }
2972 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
2973 if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
2974 (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
2975 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2976 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2977 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2978 }
2979 // depthBiasClamp NOT SUPPORTED (needs an extension)
2980 }
2981 // If cull mode Flags change...
2982 if (rs.cullModeFlags != crs.cullModeFlags) {
2983 crs.cullModeFlags = rs.cullModeFlags;
2984 SetCullMode(crs);
2985 }
2986 auto frontFace = rs.frontFace;
2987 if (!renderingToDefaultFbo_) {
2988 // Flip winding for default fbo.
2989 if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
2990 frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
2991 } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
2992 frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
2993 }
2994 }
2995 if (frontFace != crs.frontFace) {
2996 crs.frontFace = frontFace;
2997 SetFrontFace(crs);
2998 }
2999 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3000 if (rs.lineWidth != crs.lineWidth) {
3001 crs.lineWidth = rs.lineWidth;
3002 glLineWidth(rs.lineWidth);
3003 }
3004 }
3005 }
3006
DoGraphicsState(const GraphicsState& graphicsState)3007 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3008 {
3009 /// GRAPHICSSTATE inputAssembly
3010 const auto& ia = graphicsState.inputAssembly;
3011 if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3012 auto& cia = cacheState_.inputAssembly;
3013 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3014 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3015 }
3016 topology_ = ia.primitiveTopology;
3017 UpdateRasterizationState(graphicsState);
3018 UpdateDepthStencilState(graphicsState);
3019 UpdateBlendState(graphicsState);
3020 }
3021
SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)3022 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3023 {
3024 // NOTE: viewportdesc is in floats?!?
3025 bool forceV = false;
3026 bool forceD = false;
3027 if (!viewportPrimed_) {
3028 viewportPrimed_ = true;
3029 forceV = true;
3030 forceD = true;
3031 }
3032 if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3033 (vd.height != viewport_.height)) {
3034 forceV = true;
3035 }
3036 if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3037 forceD = true;
3038 }
3039
3040 if (forceV) {
3041 viewport_.x = vd.x;
3042 viewport_.y = vd.y;
3043 viewport_.width = vd.width;
3044 viewport_.height = vd.height;
3045 viewportUpdated_ = true;
3046 }
3047 if (forceD) {
3048 viewport_.minDepth = vd.minDepth;
3049 viewport_.maxDepth = vd.maxDepth;
3050 viewportDepthRangeUpdated_ = true;
3051 }
3052 }
3053
SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)3054 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3055 {
3056 // NOTE: scissordesc is in floats?!?
3057 bool force = false;
3058 if (!scissorPrimed_) {
3059 scissorPrimed_ = true;
3060 force = true;
3061 }
3062 if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3063 (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3064 force = true;
3065 }
3066 if (force) {
3067 scissorBox_ = sd;
3068 scissorBoxUpdated_ = true;
3069 }
3070 }
3071
FlushViewportScissors()3072 void RenderBackendGLES::FlushViewportScissors()
3073 {
3074 if (!currentFrameBuffer_) {
3075 return;
3076 }
3077 bool force = false;
3078 if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3079 force = true;
3080 scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3081 }
3082 if ((viewportUpdated_) || (force)) {
3083 viewportUpdated_ = false;
3084 // Handle top-left / bottom-left origin conversion
3085 PLUGIN_ASSERT(currentFrameBuffer_);
3086 GLint y = static_cast<GLint>(viewport_.y);
3087 const GLsizei h = static_cast<GLsizei>(viewport_.height);
3088 if (renderingToDefaultFbo_) {
3089 const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3090 y = fh - (y + h);
3091 }
3092 glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3093 }
3094 if ((scissorBoxUpdated_) || (force)) {
3095 scissorBoxUpdated_ = false;
3096 // Handle top-left / bottom-left origin conversion
3097 GLint y = static_cast<GLint>(scissorBox_.offsetY);
3098 const GLsizei h = static_cast<GLsizei>(scissorBox_.extentHeight);
3099 if (renderingToDefaultFbo_) {
3100 const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3101 y = fh - (y + h);
3102 }
3103 glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3104 }
3105 if (viewportDepthRangeUpdated_) {
3106 viewportDepthRangeUpdated_ = false;
3107 glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3108 }
3109 }
3110 RENDER_END_NAMESPACE()
3111