1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "render_backend_gles.h"
16 
17 #include <algorithm>
18 
19 #include <base/containers/fixed_string.h>
20 #include <core/perf/intf_performance_data_manager.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23 
24 #if (RENDER_PERF_ENABLED == 1)
25 #include "perf/gpu_query.h"
26 #include "perf/gpu_query_manager.h"
27 #endif
28 #include "device/gpu_resource_manager.h"
29 #include "gles/device_gles.h"
30 #include "gles/gl_functions.h"
31 #include "gles/gpu_buffer_gles.h"
32 #include "gles/gpu_image_gles.h"
33 #include "gles/gpu_program_gles.h"
34 #include "gles/gpu_query_gles.h"
35 #include "gles/gpu_sampler_gles.h"
36 #include "gles/gpu_semaphore_gles.h"
37 #include "gles/node_context_descriptor_set_manager_gles.h"
38 #include "gles/node_context_pool_manager_gles.h"
39 #include "gles/pipeline_state_object_gles.h"
40 #include "gles/render_frame_sync_gles.h"
41 #include "gles/swapchain_gles.h"
42 #include "nodecontext/render_command_list.h"
43 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
44 #include "util/log.h"
45 #include "util/render_frame_util.h"
46 
47 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
48 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
49 
50 using namespace BASE_NS;
51 
52 // NOTE: implement missing commands, add state caching and cleanup a bit more.
53 RENDER_BEGIN_NAMESPACE()
54 namespace Gles {
55 // Indices to colorBlendConstants
56 static constexpr uint32_t RED_INDEX = 0;
57 static constexpr uint32_t GREEN_INDEX = 1;
58 static constexpr uint32_t BLUE_INDEX = 2;
59 static constexpr uint32_t ALPHA_INDEX = 3;
60 static constexpr uint32_t CUBEMAP_LAYERS = 6;
61 struct Bind {
62     DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
63     struct BufferType {
64         uint32_t bufferId;
65         uint32_t offset;
66         uint32_t size;
67     };
68     struct ImageType {
69         GpuImageGLES* image;
70         uint32_t mode;
71         uint32_t mipLevel;
72     };
73     struct SamplerType {
74         uint32_t samplerId;
75     };
76     struct Resource {
77         union {
78             Bind::BufferType buffer { 0, 0, 0 };
79             Bind::ImageType image;
80         };
81         SamplerType sampler { 0 };
82     };
83     vector<Resource> resources;
84 };
85 } // namespace Gles
86 namespace {
GetRenderHandleType(const DescriptorType descriptorType)87 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
88 {
89     if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
90         return RenderHandleType::GPU_SAMPLER;
91     } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
92                    (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
93                (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
94         return RenderHandleType::GPU_IMAGE;
95     } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
96                (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
97         return RenderHandleType::GPU_BUFFER;
98     }
99     return RenderHandleType::UNDEFINED;
100 }
101 
getCubeMapTarget(GLenum type, uint32_t layer)102 GLenum getCubeMapTarget(GLenum type, uint32_t layer)
103 {
104     if (type == GL_TEXTURE_CUBE_MAP) {
105         constexpr GLenum layerId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
106             GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
107             GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
108         PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
109         return layerId[layer];
110     }
111     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
112     return GL_NONE;
113 }
114 
115 GLenum getTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
116 {
117     if (type == GL_TEXTURE_2D) {
118         if (sampleCount > 1) {
119             return GL_TEXTURE_2D_MULTISAMPLE;
120         }
121         return GL_TEXTURE_2D;
122     }
123     if (type == GL_TEXTURE_CUBE_MAP) {
124         PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
125         return getCubeMapTarget(type, layer);
126     }
127     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
128     return GL_NONE;
129 }
130 struct BlitArgs {
131     uint32_t mipLevel {};
132     Size3D rect0 {};
133     Size3D rect1 {};
134     uint32_t height {};
135 };
136 
DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)137 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
138 {
139     // Handle top-left / bottom-left origin conversion
140     GLint sy = static_cast<GLint>(src.rect0.height);
141     const GLint sh = static_cast<const GLint>(src.rect1.height);
142     const GLint sfh = static_cast<GLint>(src.height >> src.mipLevel);
143     sy = sfh - (sy + sh);
144     GLint dy = static_cast<GLint>(dst.rect0.height);
145     const GLint dh = static_cast<const GLint>(dst.rect1.height);
146     const GLint dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
147     dy = dfh - (dy + dh);
148     GLenum glfilter = GL_NEAREST;
149     if (filter == CORE_FILTER_NEAREST) {
150         glfilter = GL_NEAREST;
151     } else if (filter == CORE_FILTER_LINEAR) {
152         glfilter = GL_LINEAR;
153     } else {
154         PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
155     }
156     glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
157         static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
158         glfilter);
159 }
160 
GetPrimFromTopology(PrimitiveTopology op)161 GLenum GetPrimFromTopology(PrimitiveTopology op)
162 {
163     switch (op) {
164         case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
165             return GL_POINTS;
166         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
167             return GL_LINES;
168         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
169             return GL_LINE_STRIP;
170         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
171             return GL_TRIANGLES;
172         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
173             return GL_TRIANGLE_STRIP;
174         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
175             return GL_TRIANGLE_FAN;
176 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
177             // The following are valid after gles 3.2
178         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
179             return GL_LINES_ADJACENCY;
180         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
181             return GL_LINE_STRIP_ADJACENCY;
182         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
183             return GL_TRIANGLES_ADJACENCY;
184         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
185             return GL_TRIANGLE_STRIP_ADJACENCY;
186         case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
187             return GL_PATCHES;
188 #endif
189         default:
190             PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
191             break;
192     }
193     return GL_POINTS;
194 }
195 
GetBlendOp(BlendOp func)196 GLenum GetBlendOp(BlendOp func)
197 {
198     switch (func) {
199         case CORE_BLEND_OP_ADD:
200             return GL_FUNC_ADD;
201         case CORE_BLEND_OP_SUBTRACT:
202             return GL_FUNC_SUBTRACT;
203         case CORE_BLEND_OP_REVERSE_SUBTRACT:
204             return GL_FUNC_REVERSE_SUBTRACT;
205         case CORE_BLEND_OP_MIN:
206             return GL_MIN;
207         case CORE_BLEND_OP_MAX:
208             return GL_MAX;
209         default:
210             break;
211     }
212     return GL_FUNC_ADD;
213 }
214 
GetBlendFactor(BlendFactor factor)215 GLenum GetBlendFactor(BlendFactor factor)
216 {
217     switch (factor) {
218         case CORE_BLEND_FACTOR_ZERO:
219             return GL_ZERO;
220         case CORE_BLEND_FACTOR_ONE:
221             return GL_ONE;
222         case CORE_BLEND_FACTOR_SRC_COLOR:
223             return GL_SRC_COLOR;
224         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
225             return GL_ONE_MINUS_SRC_COLOR;
226         case CORE_BLEND_FACTOR_DST_COLOR:
227             return GL_DST_COLOR;
228         case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
229             return GL_ONE_MINUS_DST_COLOR;
230         case CORE_BLEND_FACTOR_SRC_ALPHA:
231             return GL_SRC_ALPHA;
232         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
233             return GL_ONE_MINUS_SRC_ALPHA;
234         case CORE_BLEND_FACTOR_DST_ALPHA:
235             return GL_DST_ALPHA;
236         case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
237             return GL_ONE_MINUS_DST_ALPHA;
238         case CORE_BLEND_FACTOR_CONSTANT_COLOR:
239             return GL_CONSTANT_COLOR;
240         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
241             return GL_ONE_MINUS_CONSTANT_COLOR;
242         case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
243             return GL_CONSTANT_ALPHA;
244         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
245             return GL_ONE_MINUS_CONSTANT_ALPHA;
246         case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
247             return GL_SRC_ALPHA_SATURATE;
248             // NOTE: check the GLES3.2...
249             /* following requires EXT_blend_func_extended (dual source blending) */
250         case CORE_BLEND_FACTOR_SRC1_COLOR:
251         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
252         case CORE_BLEND_FACTOR_SRC1_ALPHA:
253         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
254         default:
255             break;
256     }
257     return GL_ONE;
258 }
259 
GetCompareOp(CompareOp aOp)260 GLenum GetCompareOp(CompareOp aOp)
261 {
262     switch (aOp) {
263         case CORE_COMPARE_OP_NEVER:
264             return GL_NEVER;
265         case CORE_COMPARE_OP_LESS:
266             return GL_LESS;
267         case CORE_COMPARE_OP_EQUAL:
268             return GL_EQUAL;
269         case CORE_COMPARE_OP_LESS_OR_EQUAL:
270             return GL_LEQUAL;
271         case CORE_COMPARE_OP_GREATER:
272             return GL_GREATER;
273         case CORE_COMPARE_OP_NOT_EQUAL:
274             return GL_NOTEQUAL;
275         case CORE_COMPARE_OP_GREATER_OR_EQUAL:
276             return GL_GEQUAL;
277         case CORE_COMPARE_OP_ALWAYS:
278             return GL_ALWAYS;
279         default:
280             break;
281     }
282     return GL_ALWAYS;
283 }
284 
GetStencilOp(StencilOp aOp)285 GLenum GetStencilOp(StencilOp aOp)
286 {
287     switch (aOp) {
288         case CORE_STENCIL_OP_KEEP:
289             return GL_KEEP;
290         case CORE_STENCIL_OP_ZERO:
291             return GL_ZERO;
292         case CORE_STENCIL_OP_REPLACE:
293             return GL_REPLACE;
294         case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
295             return GL_INCR;
296         case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
297             return GL_DECR;
298         case CORE_STENCIL_OP_INVERT:
299             return GL_INVERT;
300         case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
301             return GL_INCR_WRAP;
302         case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
303             return GL_DECR_WRAP;
304         default:
305             break;
306     }
307     return GL_KEEP;
308 }
309 
SetState(GLenum type, bool enabled)310 void SetState(GLenum type, bool enabled)
311 {
312     if (enabled) {
313         glEnable(type);
314     } else {
315         glDisable(type);
316     }
317 }
318 
SetCullMode(const GraphicsState::RasterizationState& rs)319 void SetCullMode(const GraphicsState::RasterizationState& rs)
320 {
321     SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
322 
323     switch (rs.cullModeFlags) {
324         case CORE_CULL_MODE_FRONT_BIT:
325             glCullFace(GL_FRONT);
326             break;
327         case CORE_CULL_MODE_BACK_BIT:
328             glCullFace(GL_BACK);
329             break;
330         case CORE_CULL_MODE_FRONT_AND_BACK:
331             glCullFace(GL_FRONT_AND_BACK);
332             break;
333         case CORE_CULL_MODE_NONE:
334         default:
335             break;
336     }
337 }
338 
SetFrontFace(const GraphicsState::RasterizationState& rs)339 void SetFrontFace(const GraphicsState::RasterizationState& rs)
340 {
341     switch (rs.frontFace) {
342         case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
343             glFrontFace(GL_CCW);
344             break;
345         case CORE_FRONT_FACE_CLOCKWISE:
346             glFrontFace(GL_CW);
347             break;
348         default:
349             break;
350     }
351 }
352 
353 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState& rs)354 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
355 {
356     GLenum mode;
357     switch (rs.polygonMode) {
358         default:
359         case CORE_POLYGON_MODE_FILL:
360             mode = GL_FILL;
361             break;
362         case CORE_POLYGON_MODE_LINE:
363             mode = GL_LINE;
364             break;
365         case CORE_POLYGON_MODE_POINT:
366             mode = GL_POINT;
367             break;
368     }
369     glPolygonMode(GL_FRONT_AND_BACK, mode);
370 }
371 #endif
372 
Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd, const LowlevelFramebufferGL& frameBuffer)373 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
374     const LowlevelFramebufferGL& frameBuffer)
375 {
376     if (count > 0) {
377         if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
378             // Invalidate the whole buffer.  (attachment sizes match render area)
379             glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
380         } else {
381             // invalidate only a part of the render target..
382             // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
383             const GLint X = static_cast<const GLint>(rpd.renderArea.offsetX);
384             const GLint Y = static_cast<const GLint>(rpd.renderArea.offsetY);
385             const GLsizei W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
386             const GLsizei H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
387             glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
388         }
389     }
390 }
391 
392 struct BlitData {
393     const GpuImagePlatformDataGL& iPlat;
394     const GpuImageDesc& imageDesc;
395     const BufferImageCopy& bufferImageCopy;
396     uintptr_t data { 0 };
397     uint64_t size { 0 };
398     uint64_t sizeOfData { 0 };
399     bool compressed { false };
400 };
401 
BlitArray(DeviceGLES& device_, const BlitData& bd)402 void BlitArray(DeviceGLES& device_, const BlitData& bd)
403 {
404     const auto& iPlat = bd.iPlat;
405     const auto& bufferImageCopy = bd.bufferImageCopy;
406     const auto& imageSubresource = bufferImageCopy.imageSubresource;
407     const auto& imageDesc = bd.imageDesc;
408     const uint32_t mip = imageSubresource.mipLevel;
409     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
410     // NOTE: image offset depth is ignored
411     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
412     const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
413         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
414         Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
415     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
416     if (valid) {
417         uintptr_t data = bd.data;
418         const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
419         for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
420             const Math::UVec3 offset3D { offset.x, offset.y, layer };
421             if (bd.compressed) {
422                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
423                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
424             } else {
425                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
426                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
427             }
428             data += static_cast<ptrdiff_t>(bd.sizeOfData);
429         }
430     }
431 }
432 
Blit2D(DeviceGLES& device_, const BlitData& bd)433 void Blit2D(DeviceGLES& device_, const BlitData& bd)
434 {
435     const auto& iPlat = bd.iPlat;
436     const auto& bufferImageCopy = bd.bufferImageCopy;
437     const auto& imageSubresource = bufferImageCopy.imageSubresource;
438     const auto& imageDesc = bd.imageDesc;
439     const uint32_t mip = imageSubresource.mipLevel;
440     const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
441     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
442     const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
443         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
444     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
445         "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
446     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
447     const uintptr_t data = bd.data;
448     if (valid && bd.compressed) {
449         device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
450             iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
451     } else if (valid) {
452         device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
453             iPlat.dataType, reinterpret_cast<const void*>(data));
454     }
455 }
456 
Blit3D(DeviceGLES& device_, const BlitData& bd)457 void Blit3D(DeviceGLES& device_, const BlitData& bd)
458 {
459     const auto& iPlat = bd.iPlat;
460     const auto& bufferImageCopy = bd.bufferImageCopy;
461     const auto& imageSubresource = bufferImageCopy.imageSubresource;
462     const auto& imageDesc = bd.imageDesc;
463     const uint32_t mip = imageSubresource.mipLevel;
464     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
465     const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
466         bufferImageCopy.imageOffset.depth };
467     Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
468         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
469     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
470     if (valid) {
471         uintptr_t data = bd.data;
472         for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
473             const Math::UVec3 offset3D { offset.x, offset.y, slice };
474             if (bd.compressed) {
475                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
476                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
477             } else {
478                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
479                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
480             }
481             // offsets one slice
482             data += static_cast<ptrdiff_t>(bd.sizeOfData);
483         }
484     }
485 }
486 
BlitCube(DeviceGLES& device_, const BlitData& bd)487 void BlitCube(DeviceGLES& device_, const BlitData& bd)
488 {
489     const auto& iPlat = bd.iPlat;
490     const auto& bufferImageCopy = bd.bufferImageCopy;
491     const auto& imageSubresource = bufferImageCopy.imageSubresource;
492     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
493     const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
494     constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
495         GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
496         GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
497     PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
498     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
499         "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
500     uintptr_t data = bd.data;
501     const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
502     for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
503         const GLenum face = faceId[i]; // convert layer index to cube map face id.
504         if (face == 0) {
505             // reached the end of cubemap faces (see faceId)
506             // so must stop copying.
507             break;
508         }
509         if (bd.compressed) {
510             device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
511                 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
512         } else {
513             device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
514                 iPlat.dataType, reinterpret_cast<const void*>(data));
515         }
516         data += static_cast<ptrdiff_t>(bd.sizeOfData);
517     }
518 }
519 template<bool usePixelUnpackBuffer>
520 
SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer, const GpuImageGLES& dstGpuImage)521 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
522     const GpuImageGLES& dstGpuImage)
523 {
524     const auto& iPlat = dstGpuImage.GetPlatformData();
525     const auto& imageOffset = bufferImageCopy.imageOffset;
526     PLUGIN_UNUSED(imageOffset);
527     const auto& imageExtent = bufferImageCopy.imageExtent;
528     // size is calculated for single layer / slice
529     const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
530                           static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
531                           static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
532     uintptr_t data = bufferImageCopy.bufferOffset;
533     if constexpr (usePixelUnpackBuffer) {
534         const auto& plat = srcGpuBuffer.GetPlatformData();
535         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
536     } else {
537         // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
538         // simulator and crash with ETC2 textures on NVIDIA..
539         data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
540     }
541     uint64_t sizeOfData = size;
542     const auto& compinfo = iPlat.compression;
543     if (compinfo.compressed) {
544         // how many blocks in width
545         const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
546         // how many blocks in height
547         const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
548         // size in bytes..
549         sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
550 
551         // Warn for partial copies. we do not handle those at the moment.
552         if (bufferImageCopy.bufferRowLength != 0) {
553             if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
554                 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
555                              "Stride must match image width (with block align). "
556                              "bufferImageCopy.bufferRowLength(%d) "
557                              "imageExtent.width(%d) ",
558                     bufferImageCopy.bufferRowLength, imageExtent.width);
559             }
560         }
561         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
562         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
563     } else {
564         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
565         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
566     }
567     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
568     return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
569 }
570 
571 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)572 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
573 {
574     if constexpr (usePixelUnpackBuffer) {
575         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
576     } else {
577         srcGpuBuffer.Unmap();
578     }
579 }
580 
581 template<typename T, size_t N>
582 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
583 {
584     for (size_t i = 0; i < N; i++) {
585         if (a[i] != b[i])
586             return false;
587     }
588     return true;
589 }
590 
591 template<typename T, size_t N>
592 
593 constexpr size_t Set(T (&a)[N], const T (&b)[N])
594 {
595     for (size_t i = 0; i < N; i++) {
596         a[i] = b[i];
597     }
598     return true;
599 }
600 
CompareBlendFactors( const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)601 bool CompareBlendFactors(
602     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
603 {
604     return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
605            (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
606 }
607 
SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)608 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
609 {
610     a.srcColorBlendFactor = b.srcColorBlendFactor;
611     a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
612     a.dstColorBlendFactor = b.dstColorBlendFactor;
613     a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
614 }
615 
CompareBlendOps( const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)616 bool CompareBlendOps(
617     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
618 {
619     return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
620 }
621 
SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)622 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
623 {
624     a.colorBlendOp = b.colorBlendOp;
625     a.alphaBlendOp = b.alphaBlendOp;
626 }
627 
CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)628 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
629 {
630     return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
631 }
632 
SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)633 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
634 {
635     a.failOp = b.failOp;
636     a.depthFailOp = b.depthFailOp;
637     a.passOp = b.passOp;
638 }
639 
SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)640 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642     a.compareOp = b.compareOp;
643     a.compareMask = b.compareMask;
644     a.reference = b.reference;
645 }
646 
647 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)648 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
649 {
650     if (mipLevel >= imageDesc.mipCount) {
651         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
652     }
653     if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
654         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
655     }
656     if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
657         ((offset.z + extent.depth) > imageDesc.depth)) {
658         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
659     }
660 }
661 
ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)662 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
663 {
664     ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
665     ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
666 }
667 #endif
668 
ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)669 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
670 {
671     if (srcOffset < 0) {
672         size += srcOffset;
673         dstOffset -= srcOffset;
674         srcOffset = 0;
675     }
676 }
677 
ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)678 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
679 {
680     ClampOffset(srcOffset.x, dstOffset.x, size.width);
681     ClampOffset(srcOffset.y, dstOffset.y, size.height);
682     ClampOffset(srcOffset.z, dstOffset.z, size.depth);
683 }
684 
ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)685 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
686 {
687     if (size > (maxSize - offset)) {
688         size = maxSize - offset;
689     }
690 }
691 
ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)692 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
693 {
694     ClampSize(offset.x, desc.width, size.width);
695     ClampSize(offset.y, desc.height, size.height);
696     ClampSize(offset.z, desc.depth, size.depth);
697 }
698 } // namespace
699 
RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)700 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
701     : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
702 {
703 #if (RENDER_PERF_ENABLED == 1)
704     validGpuQueries_ = false;
705 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
706     gpuQueryMgr_ = make_unique<GpuQueryManager>();
707 #if RENDER_HAS_GL_BACKEND
708     if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
709         validGpuQueries_ = true;
710     }
711 #endif
712 #if RENDER_HAS_GLES_BACKEND
713     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
714         // Check if GL_EXT_disjoint_timer_query is available.
715         validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
716     }
717 #endif
718 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
719 #endif // RENDER_PERF_ENABLED
720 #if RENDER_HAS_GLES_BACKEND
721     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
722         multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
723     }
724 #endif
725     PLUGIN_ASSERT(device_.IsActive());
726     PrimeCache(GraphicsState {}); // Initializes cache.
727     glGenFramebuffers(1, &blitImageSourceFbo_);
728     glGenFramebuffers(1, &blitImageDestinationFbo_);
729 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
730     PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
731     PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
732 #endif
733 #if !RENDER_HAS_GLES_BACKEND
734     glEnable(GL_PROGRAM_POINT_SIZE);
735 #endif
736 }
737 
~RenderBackendGLES()738 RenderBackendGLES::~RenderBackendGLES()
739 {
740     PLUGIN_ASSERT(device_.IsActive());
741     device_.DeleteFrameBuffer(blitImageSourceFbo_);
742     device_.DeleteFrameBuffer(blitImageDestinationFbo_);
743 }
744 
Present(const RenderBackendBackBufferConfiguration& backBufferConfig)745 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
746 {
747     if (!backBufferConfig.swapchainData.empty()) {
748         if (device_.HasSwapchain()) {
749 #if (RENDER_PERF_ENABLED == 1)
750             commonCpuTimers_.present.Begin();
751 #endif
752             for (const auto& swapchainData : backBufferConfig.swapchainData) {
753 #if (RENDER_DEV_ENABLED == 1)
754                 if (swapchainData.config.gpuSemaphoreHandle) {
755                     // NOTE: not implemented
756                     PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
757                 }
758 #endif
759                 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
760                 if (swp) {
761 #if RENDER_GL_FLIP_Y_SWAPCHAIN
762                     // Blit and flip our swapchain frame to backbuffer..
763                     const auto& sdesc = swp->GetDesc();
764                     if (scissorEnabled_) {
765                         glDisable(GL_SCISSOR_TEST);
766                         scissorEnabled_ = false;
767                     }
768                     const auto& platSwapchain = swp->GetPlatformData();
769                     device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
770                     device_.BindWriteFrameBuffer(0); // FBO 0  is the surface bound to current context..
771                     glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
772                         (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
773                     device_.BindReadFrameBuffer(0);
774 #endif
775                     device_.SwapBuffers(*swp);
776                 }
777             }
778 #if (RENDER_PERF_ENABLED == 1)
779             commonCpuTimers_.present.End();
780 #endif
781         }
782     }
783 }
784 
ResetState()785 void RenderBackendGLES::ResetState()
786 {
787     boundProgram_ = {};
788     boundIndexBuffer_ = {};
789     vertexAttribBinds_ = 0;
790     renderingToDefaultFbo_ = false;
791     boundComputePipeline_ = nullptr;
792     boundGraphicsPipeline_ = nullptr;
793     currentPsoHandle_ = {};
794     renderArea_ = {};
795     activeRenderPass_ = {};
796     currentSubPass_ = 0;
797     currentFrameBuffer_ = nullptr;
798     scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
799     inRenderpass_ = 0;
800 }
801 
ResetBindings()802 void RenderBackendGLES::ResetBindings()
803 {
804     for (auto& b : boundObjects_) {
805         b.dirty = true;
806     }
807     boundComputePipeline_ = nullptr;
808     boundGraphicsPipeline_ = nullptr;
809     currentPsoHandle_ = {};
810 }
811 
Render( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)812 void RenderBackendGLES::Render(
813     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
814 {
815     // NOTE: all command lists are validated before entering here
816     PLUGIN_ASSERT(device_.IsActive());
817 #if (RENDER_PERF_ENABLED == 1)
818     commonCpuTimers_.full.Begin();
819     commonCpuTimers_.acquire.Begin();
820 #endif
821     presentationInfo_ = {};
822 
823     if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
824         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
825             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
826             if (const SwapchainGLES* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
827                 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
828                 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
829                 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
830                     // remap image to backbuffer
831                     const RenderHandle currentSwapchainHandle =
832                         swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
833                     // special swapchain remapping
834                     gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
835                 }
836             }
837         }
838     }
839 #if (RENDER_PERF_ENABLED == 1)
840     commonCpuTimers_.acquire.End();
841 
842     StartFrameTimers(renderCommandFrameData);
843     commonCpuTimers_.execute.Begin();
844 #endif
845     // Reset bindings.
846     ResetState();
847     for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
848         // Reset bindings between command lists..
849         ResetBindings();
850         RenderSingleCommandList(ref);
851     }
852 #if (RENDER_PERF_ENABLED == 1)
853     commonCpuTimers_.execute.End();
854 #endif
855     RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
856 #if (RENDER_PERF_ENABLED == 1)
857     commonCpuTimers_.full.End();
858     EndFrameTimers();
859 #endif
860 }
861 
RenderProcessEndCommandLists( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)862 void RenderBackendGLES::RenderProcessEndCommandLists(
863     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
864 {
865     if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
866         frameSync->GetFrameFence();
867     }
868     // signal external GPU fences
869     if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
870         auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
871         const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
872         PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
873         if (externalSignals.size() == externalSemaphores.size()) {
874             for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
875                 // needs to be false
876                 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
877                     if (const GpuSemaphoreGles* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
878                         auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
879                         // NOTE: currently could create only one GPU sync
880                         GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
881                         plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
882                         externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
883                         externalSignals[sigIdx].signaled = true;
884 
885                         // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
886                     }
887                 }
888             }
889         }
890     }
891 }
892 
RenderCommandUndefined(const RenderCommandWithType& renderCommand)893 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
894 {
895     PLUGIN_ASSERT_MSG(false, "non-valid render command");
896 }
897 
RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)898 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
899 {
900     // these are validated in render graph
901     managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
902         renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
903 
904     managers_.poolMgr->BeginBackendFrame();
905     managers_.psoMgr->BeginBackendFrame();
906 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
907     const auto& debugName = renderCommandCtx.debugName;
908 #endif
909 #if (RENDER_PERF_ENABLED == 1)
910     perfCounters_ = {};
911     PLUGIN_ASSERT(timers_.count(debugName) == 1);
912     PerfDataSet& perfDataSet = timers_[debugName];
913     perfDataSet.cpuTimer.Begin();
914 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
915     if (validGpuQueries_) {
916 #ifdef GL_GPU_DISJOINT_EXT
917         /* Clear disjoint error */
918         GLint disjointOccurred = 0;
919         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
920 #endif
921         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
922         PLUGIN_ASSERT(gpuQuery);
923 
924         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
925         PLUGIN_ASSERT(platData.queryObject);
926         glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
927     }
928 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
929 #endif // RENDER_PERF_ENABLED
930 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
931     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
932 #endif
933     commandListValid_ = true;
934     for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
935         PLUGIN_ASSERT(ref.rc);
936         if (commandListValid_) {
937 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
938             glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
939 #endif
940             (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
941 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
942             glPopDebugGroup();
943 #endif
944         }
945     }
946 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
947     glPopDebugGroup();
948 #endif
949 #if (RENDER_PERF_ENABLED == 1)
950 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
951     if (validGpuQueries_) {
952         glEndQuery(GL_TIME_ELAPSED_EXT);
953     }
954 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
955     perfDataSet.cpuTimer.End();
956     CopyPerfTimeStamp(debugName, perfDataSet);
957 #endif // RENDER_PERF_ENABLED
958 }
959 
RenderCommandBindPipeline(const RenderCommandWithType& ref)960 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
961 {
962     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
963     const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
964     boundProgram_ = {};
965     if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
966         PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
967         BindComputePipeline(renderCmd);
968     } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
969         BindGraphicsPipeline(renderCmd);
970     }
971     currentPsoHandle_ = renderCmd.psoHandle;
972 }
973 
BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)974 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
975 {
976     const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
977         managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
978     if (pso) {
979         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
980         // Setup descriptorset bind cache..
981         SetupCache(data.pipelineLayout);
982     }
983     boundComputePipeline_ = pso;
984     boundGraphicsPipeline_ = nullptr;
985 }
986 
SetupCache(const PipelineLayout& pipelineLayout)987 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
988 {
989     // based on pipeline layout. (note that compatible sets should "save state")
990     for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
991         // mark unmatching sets dirty (all for now)
992         // resize the cache stuffs.
993         const auto& s = pipelineLayout.descriptorSetLayouts[set];
994         if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
995             boundObjects_[set].dirty = true;
996 #if RENDER_HAS_GLES_BACKEND
997             boundObjects_[set].oesBinds.clear();
998 #endif
999             boundObjects_[set].resources.clear();
1000             continue;
1001         }
1002         PLUGIN_ASSERT(s.set == set);
1003 
1004         uint32_t maxB = 0;
1005         // NOTE: compatibility optimizations?
1006         // NOTE: we expect bindings to be sorted.
1007         if (s.bindings.back().binding == s.bindings.size() - 1U) {
1008             // since the last binding matches the size, expect it to be continuous.
1009             maxB = static_cast<uint32_t>(s.bindings.size());
1010         } else {
1011             // Sparse binding.
1012             // NOTE: note sparse sets will waste memory here. (see notes in
1013             // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1014             for (const auto& bind : s.bindings) {
1015                 maxB = Math::max(maxB, bind.binding);
1016             }
1017             maxB += 1U; // zero based bindings..
1018         }
1019         if (boundObjects_[set].resources.size() != maxB) {
1020             // resource count change.. (so it's dirty then)
1021             boundObjects_[set].dirty = true;
1022 #if RENDER_HAS_GLES_BACKEND
1023             boundObjects_[set].oesBinds.clear();
1024 #endif
1025             boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1026             boundObjects_[set].resources.resize(maxB);
1027         }
1028 
1029         for (const auto& b : s.bindings) {
1030             auto& o = boundObjects_[set].resources[b.binding];
1031             // ignore b.shaderStageFlags for now.
1032             if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1033                 // mark set dirty, since "not matching"
1034                 o.resources.clear();
1035                 o.resources.resize(b.descriptorCount);
1036                 o.descriptorType = b.descriptorType;
1037                 boundObjects_[set].dirty = true;
1038 #if RENDER_HAS_GLES_BACKEND
1039                 boundObjects_[set].oesBinds.clear();
1040 #endif
1041             }
1042         }
1043     }
1044 }
1045 
BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)1046 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1047 {
1048     const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1049         managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1050             activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1051     if (pso) {
1052         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1053         dynamicStateFlags_ = data.dynamicStateFlags;
1054         DoGraphicsState(data.graphicsState);
1055         // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1056         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1057             SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1058                                          static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1059         }
1060         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1061             SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1062         }
1063         // Setup descriptorset bind cache..
1064         SetupCache(data.pipelineLayout);
1065     }
1066     boundComputePipeline_ = nullptr;
1067     boundGraphicsPipeline_ = pso;
1068 }
1069 
RenderCommandDraw(const RenderCommandWithType& ref)1070 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1071 {
1072     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1073     const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1074     if (!boundGraphicsPipeline_) {
1075         return;
1076     }
1077     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1078     BindResources();
1079     const auto type = GetPrimFromTopology(topology_);
1080     const GLsizei firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1081     const GLsizei instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1082     // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1083     // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1084     if (renderCmd.indexCount) {
1085         uintptr_t offsetp = boundIndexBuffer_.offset;
1086         GLenum indexType = GL_UNSIGNED_SHORT;
1087         switch (boundIndexBuffer_.type) {
1088             case CORE_INDEX_TYPE_UINT16:
1089                 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1090                 indexType = GL_UNSIGNED_SHORT;
1091                 break;
1092             case CORE_INDEX_TYPE_UINT32:
1093                 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1094                 indexType = GL_UNSIGNED_INT;
1095                 break;
1096             default:
1097                 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1098                 break;
1099         }
1100         const GLsizei indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1101         const void* offset = reinterpret_cast<const void*>(offsetp);
1102         if (renderCmd.instanceCount > 1) {
1103             if (renderCmd.firstVertex) {
1104                 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, firstVertex);
1105             } else {
1106                 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1107             }
1108         } else {
1109             if (renderCmd.vertexOffset) {
1110                 glDrawElementsBaseVertex(
1111                     type, indexCount, indexType, offset, static_cast<GLint>(renderCmd.vertexOffset));
1112             } else {
1113                 glDrawElements(type, indexCount, indexType, offset);
1114             }
1115         }
1116 #if (RENDER_PERF_ENABLED == 1)
1117         ++perfCounters_.drawCount;
1118         perfCounters_.instanceCount += renderCmd.instanceCount;
1119         perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1120 #endif
1121     } else {
1122         const GLsizei vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1123         if (renderCmd.instanceCount > 1) {
1124             glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1125         } else {
1126             glDrawArrays(type, firstVertex, vertexCount);
1127         }
1128 #if (RENDER_PERF_ENABLED == 1)
1129         ++perfCounters_.drawCount;
1130         perfCounters_.instanceCount += renderCmd.instanceCount;
1131         perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1132 #endif
1133     }
1134 }
1135 
RenderCommandDrawIndirect(const RenderCommandWithType& ref)1136 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1137 {
1138     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1139     const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1140     if (!boundGraphicsPipeline_) {
1141         return;
1142     }
1143     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1144     BindResources();
1145     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1146         const auto& plat = gpuBuffer->GetPlatformData();
1147         device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1148         const auto type = GetPrimFromTopology(topology_);
1149         auto offset = static_cast<GLintptr>(renderCmd.offset);
1150         if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1151             GLenum indexType = GL_UNSIGNED_SHORT;
1152             switch (boundIndexBuffer_.type) {
1153                 case CORE_INDEX_TYPE_UINT16:
1154                     indexType = GL_UNSIGNED_SHORT;
1155                     break;
1156                 case CORE_INDEX_TYPE_UINT32:
1157                     indexType = GL_UNSIGNED_INT;
1158                     break;
1159                 default:
1160                     PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1161                     break;
1162             }
1163             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1164                 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1165                 offset += renderCmd.stride;
1166             }
1167         } else {
1168             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1169                 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1170                 offset += renderCmd.stride;
1171             }
1172         }
1173 #if (RENDER_PERF_ENABLED == 1)
1174         perfCounters_.drawIndirectCount += renderCmd.drawCount;
1175 #endif
1176     }
1177 }
1178 
RenderCommandDispatch(const RenderCommandWithType& ref)1179 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1180 {
1181     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1182     const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1183     if (!boundComputePipeline_) {
1184         return;
1185     }
1186     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1187     BindResources();
1188     glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1189 #if (RENDER_PERF_ENABLED == 1)
1190     ++perfCounters_.dispatchCount;
1191 #endif
1192 }
1193 
RenderCommandDispatchIndirect(const RenderCommandWithType& ref)1194 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1195 {
1196     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1197     const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1198     if (!boundComputePipeline_) {
1199         return;
1200     }
1201     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1202     BindResources();
1203     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1204         const auto& plat = gpuBuffer->GetPlatformData();
1205         device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1206         glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1207 #if (RENDER_PERF_ENABLED == 1)
1208         ++perfCounters_.dispatchIndirectCount;
1209 #endif
1210     }
1211 }
1212 
ClearScissorInit(const RenderPassDesc::RenderArea& aArea)1213 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1214 {
1215     resetScissor_ = false;           // need to reset scissor state after clear?
1216     clearScissorSet_ = true;         // need to setup clear scissors before clear?
1217     clearScissor_ = aArea;           // area to be cleared
1218     if (scissorPrimed_) {            // have scissors been set yet?
1219         if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1220             (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1221             (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1222             (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1223             // Current scissors match clearscissor area, so no need to set it again.
1224             clearScissorSet_ = false;
1225         }
1226     }
1227 }
1228 
ClearScissorSet()1229 void RenderBackendGLES::ClearScissorSet()
1230 {
1231     if (clearScissorSet_) {       // do we need to set clear scissors.
1232         clearScissorSet_ = false; // clear scissors have been set now.
1233         resetScissor_ = true;     // we are modifying scissors, so remember to reset them afterwards.
1234         glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1235             static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1236     }
1237 }
1238 
ClearScissorReset()1239 void RenderBackendGLES::ClearScissorReset()
1240 {
1241     if (resetScissor_) { // need to reset correct scissors?
1242         if (!scissorPrimed_) {
1243             // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1244             // setting)
1245             scissorPrimed_ = true;
1246             scissorBox_.offsetX = clearScissor_.offsetX;
1247             scissorBox_.offsetY = clearScissor_.offsetY;
1248             scissorBox_.extentHeight = clearScissor_.extentHeight;
1249             scissorBox_.extentWidth = clearScissor_.extentWidth;
1250         } else {
1251             // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1252             scissorBoxUpdated_ = true; // ie. request to update scissor state.
1253         }
1254     }
1255 }
1256 
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)1257 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1258 {
1259     constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1260                                              CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1261     const auto& cBlend = cacheState_.colorBlendState;
1262     for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1263         if (colorAttachments[idx] == nullptr) {
1264             continue;
1265         }
1266         const auto& ref = *(colorAttachments[idx]);
1267         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1268             const auto& cBlendState = cBlend.colorAttachments[idx];
1269             if (clearAll != cBlendState.colorWriteMask) {
1270                 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1271             }
1272             ClearScissorSet();
1273             // glClearBufferfv only for float formats?
1274             // glClearBufferiv & glClearbufferuv only for integer formats?
1275             glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1276             if (clearAll != cBlendState.colorWriteMask) {
1277                 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1278                 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1279                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1280                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1281                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1282             }
1283         }
1284     }
1285 }
1286 
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)1287 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1288 {
1289     const GLuint allBits = 0xFFFFFFFFu;
1290     const auto& ref = depthAttachment;
1291     const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1292     const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1293     // Change state if needed.
1294     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1295         glDepthMask(GL_TRUE);
1296     }
1297     if (clearStencil) {
1298         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1299             glStencilMaskSeparate(GL_FRONT, allBits);
1300         }
1301         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1302             glStencilMaskSeparate(GL_BACK, allBits);
1303         }
1304     }
1305     if (clearDepth || clearStencil) {
1306         // Set the scissors for clear..
1307         ClearScissorSet();
1308     }
1309     // Do clears.
1310     if (clearDepth && clearStencil) {
1311         glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1312             static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1313     } else if (clearDepth) {
1314         glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1315     } else if (clearStencil) {
1316         glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1317     }
1318 
1319     // Restore cached state, if we touched the state.
1320     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1321         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1322         glDepthMask(GL_FALSE);
1323     }
1324     if (clearStencil) {
1325         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1326         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1327             glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1328         }
1329         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1330             glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1331         }
1332     }
1333 }
1334 
DoSubPass(uint32_t subPass)1335 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1336 {
1337     if (currentFrameBuffer_ == nullptr) {
1338         // Completely invalid state in backend.
1339         return;
1340     }
1341     const auto& rpd = activeRenderPass_.renderPassDesc;
1342     const auto& sb = activeRenderPass_.subpasses[subPass];
1343 
1344     // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1345     if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1346         auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1347         device_.Activate(color);
1348     }
1349     device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1350     ClearScissorInit(renderArea_);
1351     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1352         SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1353     }
1354     {
1355         // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1356         // possible. (ie. all buffers at once)
1357         renderingToDefaultFbo_ = false;
1358         if (sb.colorAttachmentCount > 0) {
1359             // collect color attachment infos..
1360             const RenderPassDesc::AttachmentDesc*
1361                 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1362             for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1363                 uint32_t index = sb.colorAttachmentIndices[ci];
1364                 if (resolveToBackbuffer_[index]) {
1365                     // NOTE: this could fail with multiple color attachments....
1366                     renderingToDefaultFbo_ = true;
1367                 }
1368                 if (!attachmentCleared_[index]) {
1369                     attachmentCleared_[index] = true;
1370                     colorAttachments[ci] = &rpd.attachments[index];
1371                 } else {
1372                     colorAttachments[ci] = nullptr;
1373                 }
1374             }
1375             HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1376         }
1377         if (sb.depthAttachmentCount) {
1378             if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1379                 attachmentCleared_[sb.depthAttachmentIndex] = true;
1380                 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1381             }
1382         }
1383     }
1384     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1385         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1386         SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1387     }
1388     ClearScissorReset();
1389 }
1390 
ScanPasses(const RenderPassDesc& rpd)1391 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1392 {
1393     for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1394         const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1395         for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1396             uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1397             if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1398                 attachmentFirstUse_[resolveTo] = sub;
1399             }
1400             attachmentLastUse_[resolveTo] = sub;
1401             const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1402             if ((p.image == 0) && (p.renderBuffer == 0)) {
1403                 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1404                 // backbuffer...
1405                 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1406                 resolveToBackbuffer_[resolveFrom] = true;
1407             }
1408         }
1409         for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1410             uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1411             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1412                 attachmentFirstUse_[index] = sub;
1413             }
1414             attachmentLastUse_[index] = sub;
1415         }
1416         for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1417             uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1418             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1419                 attachmentFirstUse_[index] = sub;
1420             }
1421             attachmentLastUse_[index] = sub;
1422             if (attachmentImage_[index]) {
1423                 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1424                 if ((p.image == 0) && (p.renderBuffer == 0)) {
1425                     resolveToBackbuffer_[index] = true;
1426                 }
1427             }
1428         }
1429         if (currentSubPass.depthAttachmentCount > 0) {
1430             uint32_t index = currentSubPass.depthAttachmentIndex;
1431             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1432                 attachmentFirstUse_[index] = sub;
1433             }
1434             attachmentLastUse_[index] = sub;
1435         }
1436     }
1437 }
1438 
RenderCommandBeginRenderPass(const RenderCommandWithType& ref)1439 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1440 {
1441     PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1442     const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1443     switch (renderCmd.beginType) {
1444         case RenderPassBeginType::RENDER_PASS_BEGIN: {
1445             ++inRenderpass_;
1446             PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1447             activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1448 
1449             const auto& rpd = activeRenderPass_.renderPassDesc;
1450             renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1451             auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1452             if (multisampledRenderToTexture_) {
1453                 cpm.FilterRenderPass(activeRenderPass_);
1454             }
1455             currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1456             if (currentFrameBuffer_ == nullptr) {
1457                 // Completely invalid state in backend.
1458                 commandListValid_ = false;
1459                 --inRenderpass_;
1460                 return;
1461             }
1462             PLUGIN_ASSERT_MSG(
1463                 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1464             currentSubPass_ = 0;
1465             // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1466             for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1467                 attachmentCleared_[i] = false;
1468                 attachmentFirstUse_[i] = 0xFFFFFFFF;
1469                 attachmentLastUse_[i] = 0;
1470                 resolveToBackbuffer_[i] = false;
1471                 attachmentImage_[i] =
1472                     static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1473             }
1474             ScanPasses(rpd);
1475             DoSubPass(0);
1476 #if (RENDER_PERF_ENABLED == 1)
1477             ++perfCounters_.renderPassCount;
1478 #endif
1479         } break;
1480 
1481         case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1482             ++currentSubPass_;
1483             PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1484             DoSubPass(activeRenderPass_.subpassStartIndex);
1485         } break;
1486 
1487         default:
1488             break;
1489     }
1490 }
1491 
1492 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1493 {
1494     PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1495     const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1496     PLUGIN_UNUSED(renderCmd);
1497     PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1498     ++currentSubPass_;
1499     PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1500     DoSubPass(currentSubPass_);
1501 }
1502 
1503 int32_t RenderBackendGLES::InvalidateDepthStencil(
1504     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1505 {
1506     int32_t depthCount = 0;
1507     if (currentSubPass.depthAttachmentCount > 0) {
1508         const uint32_t index = currentSubPass.depthAttachmentIndex;
1509         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1510             const auto& image = attachmentImage_[index];
1511             const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1512             // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1513             if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1514                 bool depth = false;
1515                 bool stencil = false;
1516                 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1517                     if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1518                         depth = true;
1519                     }
1520                 }
1521                 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1522                     if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1523                         stencil = true;
1524                     }
1525                 }
1526                 if (depth && stencil) {
1527                     invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1528                     depthCount++;
1529                 } else if (stencil) {
1530                     invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1531                     depthCount++;
1532                 } else if (depth) {
1533                     invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1534                     depthCount++;
1535                 }
1536             }
1537         }
1538     }
1539     return depthCount;
1540 }
1541 
InvalidateColor( array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)1542 int32_t RenderBackendGLES::InvalidateColor(
1543     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1544 {
1545     int32_t colorCount = 0;
1546     // see which parts of the fbo can be invalidated...
1547     // collect color attachment infos..
1548     for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1549         const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1550         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1551             if (const auto* image = attachmentImage_[index]) {
1552                 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1553                 if (dplat.image || dplat.renderBuffer) {
1554                     if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1555                         invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1556                         colorCount++;
1557                     }
1558                 }
1559             }
1560         }
1561     }
1562     return colorCount;
1563 }
1564 
ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)1565 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1566 {
1567     const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1568                             ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1569     if (mask) {
1570         // Resolve MSAA buffers.
1571         // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1572         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1573         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1574         if (scissorEnabled_) {
1575             glDisable(GL_SCISSOR_TEST);
1576             scissorEnabled_ = false;
1577         }
1578         // FLIP_RESOLVE_DEFAULT_FBO not needed, since we render flipped if end result will be resolved to fbo..
1579         // hopefully it works now.
1580 #if defined(FLIP_RESOLVE_DEFAULT_FBO) && FLIP_RESOLVE_DEFAULT_FBO
1581         if (currentFrameBuffer_->resolveFbo[currentSubPass_] == 0) {
1582             // flip if resolving to default fbo. (NOTE: sample count of destination must be zero or equal to source)
1583             // and in mali devices src and dst rects MUST be equal. (which is not according to spec)
1584             // IE. can't flip and resolve at the same time on MALI based devices.
1585             // NEED A FIX HERE!
1586             glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1587                 static_cast<GLint>(currentFrameBuffer_->height), 0, static_cast<GLint>(currentFrameBuffer_->height),
1588                 static_cast<GLint>(currentFrameBuffer_->width), 0, mask, GL_NEAREST);
1589             return GL_READ_FRAMEBUFFER;
1590         }
1591 #endif
1592         glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1593             static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1594             static_cast<GLint>(currentFrameBuffer_->height), mask,
1595             GL_NEAREST); // no flip
1596         return GL_READ_FRAMEBUFFER;
1597     }
1598     return GL_FRAMEBUFFER;
1599 }
1600 
RenderCommandEndRenderPass(const RenderCommandWithType& ref)1601 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1602 {
1603     PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1604     const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1605     if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1606         PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1607         inRenderpass_--;
1608     }
1609     if (currentFrameBuffer_ == nullptr) {
1610         // Completely invalid state in backend.
1611         return;
1612     }
1613     const auto& rpd = activeRenderPass_.renderPassDesc;
1614     const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1615 
1616     // Resolve MSAA
1617     const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1618 
1619     // Finally invalidate color and depth..
1620     GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1621     int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1622     invalidateCount += InvalidateDepthStencil(
1623         array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1624 
1625     // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1626     Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1627 
1628     if (inRenderpass_ == 0) {
1629         currentFrameBuffer_ = nullptr;
1630     }
1631 }
1632 
RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)1633 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1634 {
1635     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1636     const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1637     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1638     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1639     if (!boundGraphicsPipeline_) {
1640         return;
1641     }
1642     vertexAttribBinds_ = renderCmd.vertexBufferCount;
1643     for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1644         const auto& currVb = renderCmd.vertexBuffers[i];
1645         if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1646             const auto& plat = gpuBuffer->GetPlatformData();
1647             uintptr_t offset = currVb.bufferOffset;
1648             offset += plat.currentByteOffset;
1649             vertexAttribBindSlots_[i].id = plat.buffer;
1650             vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1651         } else {
1652             vertexAttribBindSlots_[i].id = 0;
1653             vertexAttribBindSlots_[i].offset = 0;
1654         }
1655     }
1656 }
1657 
RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)1658 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1659 {
1660     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1661     const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1662     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1663         gpuBuffer) {
1664         const auto& plat = gpuBuffer->GetPlatformData();
1665         boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1666         boundIndexBuffer_.offset += plat.currentByteOffset;
1667         boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1668         boundIndexBuffer_.id = plat.buffer;
1669     }
1670 }
1671 
RenderCommandBlitImage(const RenderCommandWithType& ref)1672 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1673 {
1674     PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1675     const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1676     const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1677     const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1678     if ((srcImage == nullptr) || (dstImage == nullptr)) {
1679         return;
1680     }
1681     const auto& srcDesc = srcImage->GetDesc();
1682     const auto& srcPlat = srcImage->GetPlatformData();
1683     const auto& dstDesc = dstImage->GetDesc();
1684     const auto& dstPlat = dstImage->GetPlatformData();
1685     const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1686     const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1687     const auto& src = renderCmd.imageBlit.srcSubresource;
1688     const auto& dst = renderCmd.imageBlit.dstSubresource;
1689     const GLint srcMipLevel = static_cast<GLint>(src.mipLevel);
1690     const GLint dstMipLevel = static_cast<GLint>(dst.mipLevel);
1691     const uint32_t srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1692     const uint32_t dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1693     PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1694     PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1695     glDisable(GL_SCISSOR_TEST);
1696     scissorEnabled_ = false;
1697     // NOTE: LAYERS! (texture arrays)
1698     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1699     device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1700     for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1701         const GLenum srcType = getTarget(srcPlat.type, layer, srcSampleCount);
1702         const GLenum dstType = getTarget(dstPlat.type, layer, dstSampleCount);
1703         // glFramebufferTextureLayer for array textures....
1704         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1705         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1706         DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1707             { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1708         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1709         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1710     }
1711 }
1712 
RenderCommandCopyBuffer(const RenderCommandWithType& ref)1713 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1714 {
1715     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1716     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1717     const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1718     const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1719     if (srcGpuBuffer && dstGpuBuffer) {
1720         const auto& srcData = srcGpuBuffer->GetPlatformData();
1721         const auto& dstData = dstGpuBuffer->GetPlatformData();
1722         const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1723         const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1724         device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1725         device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1726         glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1727             static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1728             static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1729         device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1730         device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1731     }
1732 }
1733 
BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)1734 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1735 {
1736 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1737     // use the workaround only for gles backend on windows. (pvr simulator bug)
1738     constexpr const bool usePixelUnpackBuffer = false;
1739 #else
1740     // expect this to work, and the nvidia bug to be fixed.
1741     constexpr const bool usePixelUnpackBuffer = true;
1742 #endif
1743     auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1744     auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1745     if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1746         return;
1747     }
1748     const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1749     if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1750         BlitCube(device_, info);
1751     } else if (info.iPlat.type == GL_TEXTURE_2D) {
1752         Blit2D(device_, info);
1753     } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1754         BlitArray(device_, info);
1755     } else if (info.iPlat.type == GL_TEXTURE_3D) {
1756         Blit3D(device_, info);
1757 #if RENDER_HAS_GLES_BACKEND
1758     } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1759         PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1760 #endif
1761     } else {
1762         PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1763     }
1764     FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1765 }
1766 
ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)1767 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1768 {
1769     const auto& bc = renderCmd.bufferImageCopy;
1770     const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1771     const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1772     PLUGIN_ASSERT(srcGpuImage);
1773     PLUGIN_ASSERT(dstGpuBuffer);
1774     if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1775         return;
1776     }
1777     const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1778     const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1779     if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1780         PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1781         return;
1782     }
1783     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1784     PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1785     GLenum type = GL_TEXTURE_2D;
1786     if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1787         type = getCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1788     }
1789     // glFramebufferTextureLayer for array textures....
1790     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1791         static_cast<GLint>(bc.imageSubresource.mipLevel));
1792     const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1793     const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1794     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1795     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1796     glPixelStorei(GL_PACK_ALIGNMENT, 1);
1797     uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1798     glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1799         static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1800         static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1801     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1802     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1803 }
1804 
RenderCommandCopyBufferImage(const RenderCommandWithType& ref)1805 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1806 {
1807     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1808     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1809     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1810     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1811         BufferToImageCopy(renderCmd);
1812     } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1813         ImageToBufferCopy(renderCmd);
1814     }
1815 }
1816 
RenderCommandCopyImage(const RenderCommandWithType& ref)1817 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1818 {
1819     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1820     const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1821     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1822     const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1823     const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1824     if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1825         return;
1826     }
1827     const auto& srcDesc = srcGpuImage->GetDesc();
1828     const auto& dstDesc = dstGpuImage->GetDesc();
1829 #if RENDER_VALIDATION_ENABLED
1830     ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1831 #endif
1832     const auto srcMipLevel =
1833         static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1834     const auto dstMipLevel =
1835         static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1836 
1837     auto sOffset = renderCmd.imageCopy.srcOffset;
1838     auto dOffset = renderCmd.imageCopy.dstOffset;
1839     auto size = renderCmd.imageCopy.extent;
1840 
1841     // clamp negative offsets to zero and adjust extent and other offset accordingly
1842     ClampOffset(sOffset, dOffset, size);
1843     ClampOffset(dOffset, sOffset, size);
1844 
1845     // clamp size to fit src and dst
1846     ClampSize(sOffset, srcDesc, size);
1847     ClampSize(dOffset, dstDesc, size);
1848 
1849     const auto& srcPlatData = srcGpuImage->GetPlatformData();
1850     const auto& dstPlatData = dstGpuImage->GetPlatformData();
1851     glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1852         dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1853         static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1854 }
1855 
RenderCommandBarrierPoint(const RenderCommandWithType& ref)1856 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1857 {
1858     PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1859     const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1860     const auto& rbList = *managers_.rbList;
1861     // NOTE: proper flagging of barriers.
1862     if (const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1863             rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1864         barrierPointBarriers) {
1865         const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1866         const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1867         GLbitfield barriers = 0;
1868         GLbitfield barriersByRegion = 0;
1869         for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1870             if (nextBarrierList == nullptr) {
1871                 // cannot be null, just a safety
1872                 PLUGIN_ASSERT(false);
1873                 return;
1874             }
1875             const auto& barrierListRef = *nextBarrierList;
1876             nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1877             const uint32_t barrierCount = barrierListRef.count;
1878             // helper which covers barriers supported by Barrier and BarrierByRegion
1879             auto commonBarrierBits = [](AccessFlags accessFlags, RenderHandleType resourceType) -> GLbitfield {
1880                 GLbitfield barriers = 0;
1881                 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
1882                     barriers |= GL_UNIFORM_BARRIER_BIT;
1883                 }
1884                 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
1885                     // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
1886                     if (resourceType == RenderHandleType::GPU_IMAGE) {
1887                         barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1888                     } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1889                         barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1890                     } else {
1891                         barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT |
1892                                     GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1893                     }
1894                 }
1895                 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
1896                     if (resourceType == RenderHandleType::GPU_IMAGE) {
1897                         barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
1898                     } else if (resourceType == RenderHandleType::GPU_BUFFER) {
1899                         barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
1900                     } else {
1901                         barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
1902                     }
1903                 }
1904                 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1905                                       CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
1906                     barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
1907                 }
1908                 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
1909                 return barriers;
1910             };
1911             for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1912                 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1913 
1914                 // check if written by previous shader as an attachment or storage/ image buffer
1915                 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1916                                                   CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1917                     const auto resourceHandle = barrier.resourceHandle;
1918                     const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1919 
1920                     // barrier by region is between fragment shaders and supports a subset of barriers.
1921                     if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1922                         (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1923                         barriersByRegion |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1924                     } else {
1925                         // check the barriers shared with ByRegion
1926                         barriers |= commonBarrierBits(barrier.dst.accessFlags, handleType);
1927 
1928                         // the rest are invalid for ByRegion
1929                         if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1930                             barriers |= GL_COMMAND_BARRIER_BIT;
1931                         }
1932                         if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1933                             barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1934                         }
1935                         if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1936                             barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1937                         }
1938                         // which are the correct accessFlags?
1939                         // GL_PIXEL_BUFFER_BARRIER_BIT:
1940                         // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1941                         // glReadPixels, glTexSubImage1D, etc.)
1942                         // GL_TEXTURE_UPDATE_BARRIER_BIT:
1943                         // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
1944                         // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
1945                         // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
1946                         // mapped
1947                         //  by glMapBuffer or glMapBufferRange
1948                         // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
1949                         // CORE_ACCESS_MEMORY_WRITE_BIT?
1950                         if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
1951                                                           CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
1952                             if (handleType == RenderHandleType::GPU_IMAGE) {
1953                                 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
1954                             } else if (handleType == RenderHandleType::GPU_BUFFER) {
1955                                 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
1956                             }
1957                         }
1958                         // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
1959                     }
1960                 }
1961             }
1962         }
1963         if (barriers) {
1964             glMemoryBarrier(barriers);
1965         }
1966         if (barriersByRegion) {
1967             // only for fragment-fragment
1968             glMemoryBarrierByRegion(barriersByRegion);
1969         }
1970     }
1971 }
1972 
SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)1973 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
1974 {
1975     PLUGIN_ASSERT(binding.binding < resources.size());
1976     auto& obj = resources[binding.binding];
1977     PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
1978     PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
1979     return obj;
1980 }
1981 
BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)1982 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
1983 {
1984     const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
1985     if (gpuSampler) {
1986         const auto& plat = gpuSampler->GetPlatformData();
1987         obj.resources[index].sampler.samplerId = plat.sampler;
1988     } else {
1989         obj.resources[index].sampler.samplerId = 0;
1990     }
1991 }
1992 
BindImage( const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)1993 void RenderBackendGLES::BindImage(
1994     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
1995 {
1996     const AccessFlags accessFlags = resState.accessFlags;
1997     auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
1998     auto& ref = obj.resources[index];
1999     ref.image.image = gpuImage;
2000     const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2001     const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2002     if (read && write) {
2003         ref.image.mode = GL_READ_WRITE;
2004     } else if (read) {
2005         ref.image.mode = GL_READ_ONLY;
2006     } else if (write) {
2007         ref.image.mode = GL_WRITE_ONLY;
2008     } else {
2009         // no read and no write?
2010         ref.image.mode = GL_READ_WRITE;
2011     }
2012     ref.image.mipLevel = res.mip;
2013 }
2014 
BindImageSampler( const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)2015 void RenderBackendGLES::BindImageSampler(
2016     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2017 {
2018     BindImage(res, resState, obj, index);
2019     BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2020 }
2021 
BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)2022 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2023 {
2024     const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2025     if (gpuBuffer) {
2026         const auto& plat = gpuBuffer->GetPlatformData();
2027         const uint32_t baseOffset = res.byteOffset;
2028         obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2029         obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2030         obj.resources[index].buffer.bufferId = plat.buffer;
2031     } else {
2032         obj.resources[index].buffer.offset = 0;
2033         obj.resources[index].buffer.size = 0;
2034         obj.resources[index].buffer.bufferId = 0;
2035     }
2036 }
2037 
ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd, const DescriptorSetLayoutBindingResources& data, uint32_t set)2038 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2039     const DescriptorSetLayoutBindingResources& data, uint32_t set)
2040 {
2041     BindState& bind = boundObjects_[set];
2042     vector<Gles::Bind>& resources = bind.resources;
2043 #if RENDER_HAS_GLES_BACKEND
2044     bind.oesBinds.clear();
2045 #endif
2046     const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2047     const auto& buffers = data.buffers;
2048     const auto& images = data.images;
2049     const auto& samplers = data.samplers;
2050     uint32_t currDynamic = 0U;
2051     for (const auto& res : data.bindings) {
2052         auto& obj = SetupBind(res.binding, resources);
2053 #if RENDER_HAS_GLES_BACKEND
2054         bool hasOes = false;
2055 #endif
2056         auto GetArrayOffset = [](const auto& data, const auto& res) {
2057             const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
2058             if (type == RenderHandleType::GPU_BUFFER) {
2059                 return data.buffers[res.resourceIndex].arrayOffset;
2060             } else if (type == RenderHandleType::GPU_IMAGE) {
2061                 return data.images[res.resourceIndex].arrayOffset;
2062             } else if (type == RenderHandleType::GPU_SAMPLER) {
2063                 return data.samplers[res.resourceIndex].arrayOffset;
2064             }
2065             return 0u;
2066         };
2067         const bool hasArrOffset = (res.binding.descriptorCount > 1);
2068         const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2069         for (uint8_t index = 0; index < res.binding.descriptorCount; index++) {
2070             const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2071             GpuImageGLES* image = nullptr;
2072             switch (res.binding.descriptorType) {
2073                 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2074                     const auto& bRes = samplers[resIdx];
2075                     BindSampler(bRes.resource, obj, index);
2076                     break;
2077                 }
2078                 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2079                 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2080                 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2081                     const auto& bRes = images[resIdx];
2082                     BindImage(bRes.resource, bRes.state, obj, index);
2083                     image = obj.resources[index].image.image;
2084                     break;
2085                 }
2086                 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2087                     const auto& bRes = images[resIdx];
2088                     BindImageSampler(bRes.resource, bRes.state, obj, index);
2089                     image = obj.resources[index].image.image;
2090                     break;
2091                 }
2092                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2093                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2094                     const auto& bRes = buffers[resIdx];
2095                     uint32_t dynamicOffset = 0;
2096                     if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2097                         dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2098                         currDynamic++;
2099                     }
2100                     BindBuffer(bRes.resource, obj, dynamicOffset, index);
2101                     break;
2102                 }
2103                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2104                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2105                     const auto& bRes = buffers[resIdx];
2106                     BindBuffer(bRes.resource, obj, 0, index);
2107                     break;
2108                 }
2109                 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2110                 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2111                 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2112                 default:
2113                     PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2114                     break;
2115             }
2116 #if RENDER_HAS_GLES_BACKEND
2117             if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2118                 hasOes = true;
2119             }
2120 #endif
2121         }
2122 #if RENDER_HAS_GLES_BACKEND
2123         if (hasOes) {
2124             bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2125         }
2126 #endif
2127     }
2128 }
2129 
RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)2130 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2131 {
2132     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2133     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2134         return;
2135     }
2136     const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2137     PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2138 
2139     const auto& aNcdsm = *managers_.descriptorSetMgr;
2140     for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2141         PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2142         const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2143         PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2144         const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2145         boundObjects_[idx].dirty = true; // mark the set as "changed"
2146         ProcessBindings(renderCmd, data, idx);
2147         // (note, nothing actually gets bound yet.. just the bind cache is updated)
2148     }
2149 }
2150 
2151 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2152 {
2153     const GLint location = static_cast<GLint>(pc.location);
2154     // the consts list has been filtered and cleared of unused uniforms.
2155     PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2156     GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2157     switch (pc.type) {
2158         case GL_UNSIGNED_INT: {
2159             glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2160             break;
2161         }
2162         case GL_FLOAT: {
2163             glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2164             break;
2165         }
2166         case GL_FLOAT_VEC2: {
2167             glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2168             break;
2169         }
2170         case GL_FLOAT_VEC4: {
2171             glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2172             break;
2173         }
2174         case GL_FLOAT_MAT4: {
2175             glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2176             break;
2177         }
2178         case GL_UNSIGNED_INT_VEC4: {
2179             glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2180             break;
2181         }
2182         default:
2183             PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2184     }
2185 }
2186 
2187 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2188 {
2189     if (boundProgram_.setPushConstants) {
2190         boundProgram_.setPushConstants = false;
2191         const auto& renderCmd = boundProgram_.pushConstants;
2192         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2193         PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2194         PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2195         if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2196             return;
2197         // ASSERT: expecting data is valid
2198         // NOTE: handle rest of the types
2199         for (const auto& pc : consts) {
2200             const size_t offs = pc.offset;
2201             if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2202                 PLUGIN_LOG_E(
2203                     "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2204                     pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2205                 continue;
2206             }
2207             /*
2208             NOTE: handle the strides....
2209             consts[i].array_stride;
2210             consts[i].matrix_stride; */
2211             SetPushConstant(program, pc, &renderCmd.data[offs]);
2212         }
2213     }
2214 }
2215 
RenderCommandPushConstant(const RenderCommandWithType& ref)2216 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2217 {
2218     PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2219     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2220         return;
2221     }
2222     const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2223     if (renderCmd.pushConstant.byteSize > 0) {
2224         PLUGIN_ASSERT(renderCmd.data);
2225         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2226         boundProgram_.setPushConstants = true;
2227         boundProgram_.pushConstants = renderCmd;
2228     }
2229 }
2230 
RenderCommandClearColorImage(const RenderCommandWithType& ref)2231 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2232 {
2233     PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2234 #if RENDER_HAS_GLES_BACKEND
2235 #if (RENDER_VALIDATION_ENABLED == 1)
2236     PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2237         "Render command clear color image not support with GLES. One should implement higher level path for "
2238         "clearing.");
2239 #endif
2240 #else
2241     const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2242 
2243     const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2244     if (imagePtr) {
2245         const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2246         // NOTE: mip levels and array layers should be handled separately
2247         for (const auto& subresRef : renderCmd.ranges) {
2248             glClearTexImage(platImage.image,     // texture
2249                 (int32_t)subresRef.baseMipLevel, // level
2250                 platImage.format,                // format
2251                 platImage.dataType,              // type
2252                 &renderCmd.color);               // data
2253         }
2254     }
2255 #endif
2256 }
2257 
2258 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)2259 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2260 {
2261     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2262     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2263     const ViewportDesc& vd = renderCmd.viewportDesc;
2264     SetViewport(renderArea_, vd);
2265 }
2266 
RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)2267 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2268 {
2269     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2270     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2271     const ScissorDesc& sd = renderCmd.scissorDesc;
2272     SetScissor(renderArea_, sd);
2273 }
2274 
RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)2275 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2276 {
2277     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2278     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2279     if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2280         cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2281         glLineWidth(renderCmd.lineWidth);
2282     }
2283 }
2284 
RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)2285 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2286 {
2287     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2288     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2289 }
2290 
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)2291 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2292 {
2293     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2294     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2295 }
2296 
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)2297 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2298 {
2299     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2300     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2301 }
2302 
SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front, const uint32_t backFlags, const GraphicsState::StencilOpState& back)2303 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2304     const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2305 {
2306     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2307     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2308     const uint32_t FUNCMASK =
2309         (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2310     if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2311         cFront.writeMask = front.writeMask;
2312         glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2313     }
2314     if (frontFlags & FUNCMASK) {
2315         SetStencilCompareOp(cFront, front);
2316         glStencilFuncSeparate(
2317             GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2318     }
2319     if (frontFlags & StencilSetFlags::SETOP) {
2320         SetStencilOp(cFront, front);
2321         glStencilOpSeparate(
2322             GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2323     }
2324     if (backFlags & StencilSetFlags::SETWRITEMASK) {
2325         cBack.writeMask = back.writeMask;
2326         glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2327     }
2328     if (backFlags & FUNCMASK) {
2329         SetStencilCompareOp(cBack, back);
2330         glStencilFuncSeparate(
2331             GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2332     }
2333     if (backFlags & StencilSetFlags::SETOP) {
2334         SetStencilOp(cBack, back);
2335         glStencilOpSeparate(
2336             GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2337     }
2338 }
2339 
RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)2340 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2341 {
2342     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2343     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2344     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2345     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2346     uint32_t setFront = 0;
2347     uint32_t setBack = 0;
2348     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2349         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2350             if (renderCmd.mask != cFront.compareMask) {
2351                 cFront.compareMask = renderCmd.mask;
2352                 setFront |= StencilSetFlags::SETCOMPAREMASK;
2353             }
2354         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2355             if (renderCmd.mask != cFront.writeMask) {
2356                 cFront.writeMask = renderCmd.mask;
2357                 setFront |= StencilSetFlags::SETWRITEMASK;
2358             }
2359         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2360             if (renderCmd.mask != cFront.reference) {
2361                 cFront.reference = renderCmd.mask;
2362                 setFront |= StencilSetFlags::SETREFERENCE;
2363             }
2364         }
2365     }
2366     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2367         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2368             if (renderCmd.mask != cBack.compareMask) {
2369                 cBack.compareMask = renderCmd.mask;
2370                 setBack |= StencilSetFlags::SETCOMPAREMASK;
2371             }
2372         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2373             if (renderCmd.mask != cBack.writeMask) {
2374                 cBack.writeMask = renderCmd.mask;
2375                 setBack |= StencilSetFlags::SETWRITEMASK;
2376             }
2377         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2378             if (renderCmd.mask != cBack.reference) {
2379                 cBack.reference = renderCmd.mask;
2380                 setBack |= StencilSetFlags::SETREFERENCE;
2381             }
2382         }
2383     }
2384     SetStencilState(setFront, cFront, setBack, cBack);
2385 }
2386 
RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)2387 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2388 {
2389 #if (RENDER_VALIDATION_ENABLED == 1)
2390     PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2391         "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2392 #endif
2393 }
2394 
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)2395 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2396 {
2397     PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2398 }
2399 
RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)2400 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2401 {
2402     PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2403 }
2404 
BindVertexInputs( const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)2405 void RenderBackendGLES::BindVertexInputs(
2406     const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2407 {
2408     // update bindings for the VAO.
2409     // process with attribute descriptions to only bind the needed vertex buffers
2410     // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2411     // but we only bind the ones needed for the shader
2412     const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2413     for (uint32_t i = 0; i < minBinding; ++i) {
2414         const auto& attributeRef = decldata.attributeDescriptions[i];
2415         const uint32_t location = attributeRef.location;
2416         const uint32_t binding = attributeRef.binding;
2417         // NOTE: we need to bind all the buffers to the correct bindings.
2418         // shader optimized check (vertexInputs, some locations are not in use)
2419         if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2420             const auto& slot = vertexAttribBindSlots_[binding];
2421             const auto& bindingRef = decldata.bindingDescriptions[binding];
2422             PLUGIN_ASSERT(bindingRef.binding == binding);
2423             // buffer bound to slot, and it's used by the shader.
2424             device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2425             /*
2426             core/vulkan
2427             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_VERTEX (0)  attribute index advances per vertex
2428             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_INSTANCE (1)  attribute index advances per instance
2429 
2430             gl/gles
2431             If divisor is  0, the attributes using the buffer bound to bindingindex advance once per vertex.
2432             If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2433             rendered.
2434 
2435             so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2436             ie. enum happens to match and can simply cast.
2437             */
2438             static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2439             device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2440         }
2441     }
2442 }
2443 
BindResources()2444 void RenderBackendGLES::BindResources()
2445 {
2446 #if RENDER_HAS_GLES_BACKEND
2447     // scan all sets here to see if any of the sets has oes.
2448     // we don't actually need to rebuild this info every time.
2449     // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2450     // data there)
2451     oesBinds_.clear();
2452     for (const auto& state : boundObjects_) {
2453         const auto& oes = state.oesBinds;
2454         if (!oes.empty()) {
2455             oesBinds_.insert(oesBinds_.end(), oes.begin(), oes.end());
2456         }
2457     }
2458 #endif
2459     const array_view<Binder>* resourceList = nullptr;
2460     const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2461     int32_t flipLocation = Gles::INVALID_LOCATION;
2462     uint32_t program = 0;
2463     // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2464     if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2465         PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2466         PLUGIN_ASSERT(boundGraphicsPipeline_);
2467         if (!boundGraphicsPipeline_) {
2468             return;
2469         }
2470         array_view<const int32_t> vertexInputs;
2471         const auto& pipelineData =
2472             static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2473         const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2474 #if RENDER_HAS_GLES_BACKEND
2475         if (!oesBinds_.empty()) {
2476             // okay, oes vector contains the set/bind to which an OES texture is bounds
2477             // ask for a compatible program from the boundGraphicsPipeline_
2478             shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2479         }
2480 #endif
2481         const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2482         program = sd.program;
2483         vertexInputs = { sd.inputs, countof(sd.inputs) };
2484         FlushViewportScissors();
2485         if (!scissorEnabled_) {
2486             scissorEnabled_ = true;
2487             glEnable(GL_SCISSOR_TEST); // Always enabled
2488         }
2489 #if (RENDER_PERF_ENABLED == 1)
2490         if (device_.BoundProgram() != program) {
2491             ++perfCounters_.bindProgram;
2492         }
2493 #endif
2494         device_.UseProgram(program);
2495         device_.BindVertexArray(pipelineData.vao);
2496         BindVertexInputs(pipelineData.vertexInputDeclaration, vertexInputs);
2497         device_.BindElementBuffer(boundIndexBuffer_.id);
2498         resourceList = &sd.resourceList;
2499         flipLocation = sd.flipLocation;
2500         pushConstants = &sd.pushConstants;
2501     } else {
2502         PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2503         PLUGIN_ASSERT(boundComputePipeline_);
2504         if (!boundComputePipeline_) {
2505             return;
2506         }
2507         const auto& pipelineData =
2508             static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2509         if (pipelineData.computeShader) {
2510             const auto& sd =
2511                 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2512             program = sd.program;
2513 #if (RENDER_PERF_ENABLED == 1)
2514             if (device_.BoundProgram() != program) {
2515                 ++perfCounters_.bindProgram;
2516             }
2517 #endif
2518             device_.UseProgram(program);
2519             resourceList = &sd.resourceList;
2520             flipLocation = sd.flipLocation;
2521             pushConstants = &sd.pushConstants;
2522         }
2523     }
2524 
2525     SetPushConstants(program, *pushConstants);
2526     if (flipLocation != Gles::INVALID_LOCATION) {
2527         const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2528         glProgramUniform1fv(program, flipLocation, 1, &flip);
2529     }
2530 
2531     for (const auto& r : *resourceList) {
2532         PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2533         if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2534             continue;
2535         }
2536         const auto& res = boundObjects_[r.set].resources[r.bind];
2537         PLUGIN_ASSERT(res.resources.size() == r.id.size());
2538         auto resType = res.descriptorType;
2539         if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2540             resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2541         } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2542             resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2543         }
2544 
2545         // a few helpers for updating perf counters and binding the sampler/texture/buffer
2546         auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2547 #if (RENDER_PERF_ENABLED == 1)
2548             if (device_.BoundSampler(textureUnit) != samplerId) {
2549                 ++perfCounters_.bindSampler;
2550             }
2551 #endif
2552             device_.BindSampler(textureUnit, samplerId);
2553         };
2554         auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2555 #if (RENDER_PERF_ENABLED == 1)
2556             if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2557                 ++perfCounters_.bindTexture;
2558             }
2559 #endif
2560             device_.BindTexture(textureUnit, dplat.type, dplat.image);
2561         };
2562         auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2563                                     const GpuImagePlatformDataGL& dplat) {
2564             uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2565             device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2566         };
2567         auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2568 #if (RENDER_PERF_ENABLED == 1)
2569             if (device_.BoundBuffer(target) != buffer.bufferId) {
2570                 ++perfCounters_.bindBuffer;
2571             }
2572 #endif
2573             device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2574         };
2575         auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2576             // either force the defined mip level or use defaults.
2577             glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2578                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2579             glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2580                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 1000U));
2581         };
2582 
2583 #if (RENDER_VALIDATION_ENABLED == 1)
2584         if (resType != r.type) {
2585             PLUGIN_LOG_ONCE_E(
2586                 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2587         }
2588 #endif
2589 
2590         for (uint32_t index = 0; index < res.resources.size(); index++) {
2591             const auto& obj = res.resources[index];
2592             for (const auto& id : r.id[index]) {
2593                 const auto binding = index + id;
2594                 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2595                     bindSampler(binding, obj.sampler.samplerId);
2596                 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2597                            (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2598                            (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2599                     if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2600                         bindSampler(binding, obj.sampler.samplerId);
2601                     } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2602                         bindSampler(binding, 0U);
2603                     }
2604                     if (obj.image.image) {
2605                         auto& dplat = obj.image.image->GetPlatformData();
2606                         bindTexture(binding, dplat);
2607 
2608                         // NOTE: the last setting is active, can not have different miplevels bound from single
2609                         // resource.
2610                         // Check and update (if needed) the forced miplevel.
2611                         if (dplat.mipLevel != obj.image.mipLevel) {
2612                             // NOTE: we are actually modifying the texture object bound above
2613                             const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2614                             setMipLevel(dplat.type, dplat.mipLevel);
2615                         }
2616                     }
2617                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2618                     if (obj.image.image) {
2619                         auto& dplat = obj.image.image->GetPlatformData();
2620                         bindTextureImage(binding, obj.image, dplat);
2621                     }
2622                 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2623                     bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2624                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2625                     bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2626                 }
2627             }
2628         }
2629     }
2630     // mark all bound.
2631     for (auto& b : boundObjects_) {
2632         b.dirty = false;
2633     }
2634 }
2635 
2636 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)2637 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2638 {
2639     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2640         const string_view& debugName = renderCommandContext.debugName;
2641         if (timers_.count(debugName) == 0) { // new timers
2642 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2643             PerfDataSet& perfDataSet = timers_[debugName];
2644             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2645             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2646             perfDataSet.counter = 0u;
2647 #else
2648             timers_.insert({ debugName, {} });
2649 #endif
2650         }
2651     }
2652 }
2653 
EndFrameTimers()2654 void RenderBackendGLES::EndFrameTimers()
2655 {
2656     int64_t fullGpuTime = 0;
2657 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2658     // already in micros
2659     fullGpuTime = fullGpuCounter_;
2660     fullGpuCounter_ = 0;
2661 #endif
2662     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2663             CORE_NS::GetInstance<CORE_NS ::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2664         globalPerfData) {
2665         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2666         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2667         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2668         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2669         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2670         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2671         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2672     }
2673 }
2674 
CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)2675 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2676 {
2677 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2678     int64_t gpuMicroSeconds = 0;
2679     if (validGpuQueries_) {
2680         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2681         PLUGIN_ASSERT(gpuQuery);
2682 
2683         gpuQuery->NextQueryIndex();
2684 
2685         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2686         PLUGIN_ASSERT(platData.queryObject);
2687 
2688         GLint disjointOccurred = 0;
2689 #ifdef GL_GPU_DISJOINT_EXT
2690         // Clear disjoint error
2691         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2692 #endif
2693         if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2694             GLuint64 gpuNanoSeconds = 0U;
2695 #ifdef GL_GPU_DISJOINT_EXT
2696             glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2697 #else
2698             glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2699 #endif
2700             static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2701             gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2702             if (gpuMicroSeconds > UINT32_MAX) {
2703                 gpuMicroSeconds = 0;
2704             }
2705             fullGpuCounter_ += gpuMicroSeconds;
2706         } else if (disjointOccurred) {
2707             PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2708         }
2709     }
2710 #endif
2711     const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2712 
2713     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2714             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2715         globalPerfData) {
2716         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2717 
2718         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2719 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2720         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2721 #endif
2722         perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2723         perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2724         perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2725         perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2726         perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2727         perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2728         perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2729         perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2730         perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2731         perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2732         perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2733     }
2734 }
2735 #endif
2736 
PrimeDepthStencilState(const GraphicsState& graphicsState)2737 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2738 {
2739     auto& cDepth = cacheState_.depthStencilState;
2740     cDepth = graphicsState.depthStencilState;
2741     // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2742     SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2743     SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2744     glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2745     glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2746     const uint32_t updateAllFlags =
2747         (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2748             StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2749     SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2750 }
2751 
PrimeBlendState(const GraphicsState& graphicsState)2752 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2753 {
2754     auto& cBlend = cacheState_.colorBlendState;
2755     cBlend = graphicsState.colorBlendState;
2756     glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2757         cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2758     GLuint maxColorAttachments;
2759     glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2760     maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2761     for (GLuint i = 0; i < maxColorAttachments; i++) {
2762         const auto& cBlendState = cBlend.colorAttachments[i];
2763         glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2764             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2765             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2766             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2767         if (cBlendState.enableBlend) {
2768             glEnablei(GL_BLEND, i);
2769         } else {
2770             glDisablei(GL_BLEND, i);
2771         }
2772         glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2773             GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2774             GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2775         glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2776     }
2777     // logicops are unsupported on GLES
2778 }
2779 
PrimeCache(const GraphicsState& graphicsState)2780 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2781 {
2782     if (cachePrimed_) {
2783         return;
2784     }
2785     cachePrimed_ = true;
2786     /// GRAPHICSSTATE     inputAssembly
2787     const auto& ia = graphicsState.inputAssembly;
2788     auto& cia = cacheState_.inputAssembly;
2789     cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2790     SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2791     topology_ = ia.primitiveTopology;
2792     /// GRAPHICSSTATE     rasterizationState
2793     const auto& rs = graphicsState.rasterizationState;
2794     auto& crs = cacheState_.rasterizationState;
2795     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2796     polygonMode_ = rs.polygonMode;
2797     // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED    CHECK GLES 3.2
2798     crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2799     SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2800     crs.enableDepthBias = rs.enableDepthBias;
2801     SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2802     crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2803     crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2804     glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2805     // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2806     // If cull mode Flags change...
2807     crs.cullModeFlags = rs.cullModeFlags;
2808     SetCullMode(crs);
2809     crs.frontFace = rs.frontFace;
2810     SetFrontFace(crs);
2811     crs.lineWidth = rs.lineWidth;
2812     glLineWidth(rs.lineWidth);
2813     PrimeDepthStencilState(graphicsState);
2814     PrimeBlendState(graphicsState);
2815 }
2816 
UpdateDepthState(const GraphicsState& graphicsState)2817 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2818 {
2819     const auto& depth = graphicsState.depthStencilState;
2820     auto& cDepth = cacheState_.depthStencilState;
2821     if (depth.enableDepthTest != cDepth.enableDepthTest) {
2822         cDepth.enableDepthTest = depth.enableDepthTest;
2823         SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2824     }
2825     if (depth.depthCompareOp != cDepth.depthCompareOp) {
2826         cDepth.depthCompareOp = depth.depthCompareOp;
2827         glDepthFunc(GetCompareOp(depth.depthCompareOp));
2828     }
2829     if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2830         cDepth.enableDepthWrite = depth.enableDepthWrite;
2831         glDepthMask((depth.enableDepthWrite == GL_TRUE));
2832     }
2833     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2834         // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2835     }
2836 }
2837 
UpdateStencilState(const GraphicsState& graphicsState)2838 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2839 {
2840     const auto& depth = graphicsState.depthStencilState;
2841     auto& cDepth = cacheState_.depthStencilState;
2842     if (depth.enableStencilTest != cDepth.enableStencilTest) {
2843         cDepth.enableStencilTest = depth.enableStencilTest;
2844         SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2845     }
2846     uint32_t setFront = 0;
2847     uint32_t setBack = 0;
2848     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2849         if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2850             setFront |= StencilSetFlags::SETREFERENCE;
2851         }
2852         if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2853             setBack |= StencilSetFlags::SETREFERENCE;
2854         }
2855     }
2856     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2857         if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2858             setFront |= StencilSetFlags::SETCOMPAREMASK;
2859         }
2860         if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2861             setBack |= StencilSetFlags::SETCOMPAREMASK;
2862         }
2863     }
2864     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2865         if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2866             setFront |= StencilSetFlags::SETWRITEMASK;
2867         }
2868         if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2869             setBack |= StencilSetFlags::SETWRITEMASK;
2870         }
2871     }
2872     if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2873         setFront |= StencilSetFlags::SETCOMPAREOP;
2874     }
2875     if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2876         setBack |= StencilSetFlags::SETCOMPAREOP;
2877     }
2878     if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2879         setFront |= StencilSetFlags::SETOP;
2880     }
2881     if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2882         setBack |= StencilSetFlags::SETOP;
2883     }
2884     SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2885 }
2886 
UpdateDepthStencilState(const GraphicsState& graphicsState)2887 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2888 {
2889     UpdateDepthState(graphicsState);
2890     UpdateStencilState(graphicsState);
2891 }
2892 
UpdateBlendState(const GraphicsState& graphicsState)2893 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2894 {
2895     const auto& blend = graphicsState.colorBlendState;
2896     auto& cBlend = cacheState_.colorBlendState;
2897     for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2898         const auto& blendState = blend.colorAttachments[i];
2899         auto& cBlendState = cBlend.colorAttachments[i];
2900         if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2901             cBlendState.colorWriteMask = blendState.colorWriteMask;
2902             glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2903                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2904                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2905                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2906         }
2907 
2908         // Check if blend state has changed
2909         bool factorsChanged = false;
2910         bool opsChanged = false;
2911 
2912         if (blendState.enableBlend) {
2913             factorsChanged = !CompareBlendFactors(cBlendState, blendState);
2914             opsChanged = !CompareBlendOps(cBlendState, blendState);
2915         }
2916 
2917         if (blendState.enableBlend != cBlendState.enableBlend || factorsChanged || opsChanged) {
2918             cBlendState.enableBlend = blendState.enableBlend;
2919             if (blendState.enableBlend) {
2920                 glEnablei(GL_BLEND, i);
2921                 if (factorsChanged) {
2922                     SetBlendFactors(cBlendState, blendState);
2923                     glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2924                         GetBlendFactor(cBlendState.dstColorBlendFactor),
2925                         GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2926                         GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2927                 }
2928                 if (opsChanged) {
2929                     SetBlendOps(cBlendState, blendState);
2930                     glBlendEquationSeparatei(
2931                         i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2932                 }
2933             } else {
2934                 glDisablei(GL_BLEND, i);
2935             }
2936         }
2937     }
2938     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
2939         if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
2940             Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
2941             glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
2942                 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
2943         }
2944     }
2945     // logicOps in blend not supported on GLES
2946 }
2947 
UpdateRasterizationState(const GraphicsState& graphicsState)2948 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
2949 {
2950     const auto& rs = graphicsState.rasterizationState;
2951     auto& crs = cacheState_.rasterizationState;
2952     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2953     polygonMode_ = rs.polygonMode;
2954 #if RENDER_HAS_GL_BACKEND
2955     if (rs.polygonMode != crs.polygonMode) {
2956         crs.polygonMode = rs.polygonMode;
2957         SetPolygonMode(rs);
2958     }
2959 #endif
2960     if (rs.enableDepthClamp != crs.enableDepthClamp) {
2961         crs.enableDepthClamp = rs.enableDepthClamp;
2962         // NOT SUPPORTED    (needs an extension)
2963     }
2964     if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
2965         crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2966         SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2967     }
2968     if (rs.enableDepthBias != crs.enableDepthBias) {
2969         crs.enableDepthBias = rs.enableDepthBias;
2970         SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2971     }
2972     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
2973         if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
2974             (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
2975             crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2976             crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2977             glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2978         }
2979         // depthBiasClamp NOT SUPPORTED    (needs an extension)
2980     }
2981     // If cull mode Flags change...
2982     if (rs.cullModeFlags != crs.cullModeFlags) {
2983         crs.cullModeFlags = rs.cullModeFlags;
2984         SetCullMode(crs);
2985     }
2986     auto frontFace = rs.frontFace;
2987     if (!renderingToDefaultFbo_) {
2988         // Flip winding for default fbo.
2989         if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
2990             frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
2991         } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
2992             frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
2993         }
2994     }
2995     if (frontFace != crs.frontFace) {
2996         crs.frontFace = frontFace;
2997         SetFrontFace(crs);
2998     }
2999     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3000         if (rs.lineWidth != crs.lineWidth) {
3001             crs.lineWidth = rs.lineWidth;
3002             glLineWidth(rs.lineWidth);
3003         }
3004     }
3005 }
3006 
DoGraphicsState(const GraphicsState& graphicsState)3007 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3008 {
3009     /// GRAPHICSSTATE     inputAssembly
3010     const auto& ia = graphicsState.inputAssembly;
3011     if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3012         auto& cia = cacheState_.inputAssembly;
3013         cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3014         SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3015     }
3016     topology_ = ia.primitiveTopology;
3017     UpdateRasterizationState(graphicsState);
3018     UpdateDepthStencilState(graphicsState);
3019     UpdateBlendState(graphicsState);
3020 }
3021 
SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)3022 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3023 {
3024     // NOTE: viewportdesc is in floats?!?
3025     bool forceV = false;
3026     bool forceD = false;
3027     if (!viewportPrimed_) {
3028         viewportPrimed_ = true;
3029         forceV = true;
3030         forceD = true;
3031     }
3032     if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3033         (vd.height != viewport_.height)) {
3034         forceV = true;
3035     }
3036     if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3037         forceD = true;
3038     }
3039 
3040     if (forceV) {
3041         viewport_.x = vd.x;
3042         viewport_.y = vd.y;
3043         viewport_.width = vd.width;
3044         viewport_.height = vd.height;
3045         viewportUpdated_ = true;
3046     }
3047     if (forceD) {
3048         viewport_.minDepth = vd.minDepth;
3049         viewport_.maxDepth = vd.maxDepth;
3050         viewportDepthRangeUpdated_ = true;
3051     }
3052 }
3053 
SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)3054 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3055 {
3056     // NOTE: scissordesc is in floats?!?
3057     bool force = false;
3058     if (!scissorPrimed_) {
3059         scissorPrimed_ = true;
3060         force = true;
3061     }
3062     if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3063         (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3064         force = true;
3065     }
3066     if (force) {
3067         scissorBox_ = sd;
3068         scissorBoxUpdated_ = true;
3069     }
3070 }
3071 
FlushViewportScissors()3072 void RenderBackendGLES::FlushViewportScissors()
3073 {
3074     if (!currentFrameBuffer_) {
3075         return;
3076     }
3077     bool force = false;
3078     if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3079         force = true;
3080         scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3081     }
3082     if ((viewportUpdated_) || (force)) {
3083         viewportUpdated_ = false;
3084         // Handle top-left / bottom-left origin conversion
3085         PLUGIN_ASSERT(currentFrameBuffer_);
3086         GLint y = static_cast<GLint>(viewport_.y);
3087         const GLsizei h = static_cast<GLsizei>(viewport_.height);
3088         if (renderingToDefaultFbo_) {
3089             const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3090             y = fh - (y + h);
3091         }
3092         glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3093     }
3094     if ((scissorBoxUpdated_) || (force)) {
3095         scissorBoxUpdated_ = false;
3096         // Handle top-left / bottom-left origin conversion
3097         GLint y = static_cast<GLint>(scissorBox_.offsetY);
3098         const GLsizei h = static_cast<GLsizei>(scissorBox_.extentHeight);
3099         if (renderingToDefaultFbo_) {
3100             const GLsizei fh = static_cast<GLint>(currentFrameBuffer_->height);
3101             y = fh - (y + h);
3102         }
3103         glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3104     }
3105     if (viewportDepthRangeUpdated_) {
3106         viewportDepthRangeUpdated_ = false;
3107         glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3108     }
3109 }
3110 RENDER_END_NAMESPACE()
3111