1 //
2 // Copyright 2019 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VulkanBarriersPerf:
7 //   Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
8 //
9 
10 #include <sstream>
11 
12 #include "ANGLEPerfTest.h"
13 #include "test_utils/gl_raii.h"
14 #include "util/shader_utils.h"
15 
16 using namespace angle;
17 
18 namespace
19 {
20 constexpr unsigned int kIterationsPerStep = 10;
21 
22 struct VulkanBarriersPerfParams final : public RenderTestParams
23 {
VulkanBarriersPerfParams__anon20284::final24     VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)
25     {
26         iterationsPerStep = kIterationsPerStep;
27 
28         // Common default parameters
29         eglParameters = egl_platform::VULKAN();
30         majorVersion  = 3;
31         minorVersion  = 0;
32         windowWidth   = 256;
33         windowHeight  = 256;
34         trackGpuTime  = true;
35 
36         doBufferCopy          = bufferCopy;
37         doLargeTransfers      = largeTransfers;
38         doSlowFragmentShaders = slowFS;
39     }
40 
41     std::string story() const override;
42 
43     // Static parameters
44     static constexpr int kImageSizes[3] = {256, 512, 4096};
45     static constexpr int kBufferSize    = 4096 * 4096;
46 
47     bool doBufferCopy;
48     bool doLargeTransfers;
49     bool doSlowFragmentShaders;
50 };
51 
52 constexpr int VulkanBarriersPerfParams::kImageSizes[];
53 
operator <<(std::ostream &os, const VulkanBarriersPerfParams &params)54 std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams &params)
55 {
56     os << params.backendAndStory().substr(1);
57     return os;
58 }
59 
60 class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
61                                     public ::testing::WithParamInterface<VulkanBarriersPerfParams>
62 {
63   public:
64     VulkanBarriersPerfBenchmark();
65 
66     void initializeBenchmark() override;
67     void destroyBenchmark() override;
68     void drawBenchmark() override;
69 
70   private:
71     void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
72     void createUniformBuffer();
73     void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
74     void createResources();
75 
76     // Handle to the program object
77     GLProgram mProgram;
78 
79     // Attribute locations
80     GLint mPositionLoc;
81     GLint mTexCoordLoc;
82 
83     // Sampler location
84     GLint mSamplerLoc;
85 
86     // Texture handles
87     GLTexture mTextures[4];
88 
89     // Uniform buffer handles
90     GLBuffer mUniformBuffers[2];
91 
92     // Framebuffer handles
93     GLFramebuffer mFbos[2];
94 
95     // Buffer handle
96     GLBuffer mVertexBuffer;
97     GLBuffer mIndexBuffer;
98 
99     static constexpr size_t kSmallFboIndex = 0;
100     static constexpr size_t kLargeFboIndex = 1;
101 
102     static constexpr size_t kUniformBuffer1Index = 0;
103     static constexpr size_t kUniformBuffer2Index = 1;
104 
105     static constexpr size_t kSmallTextureIndex     = 0;
106     static constexpr size_t kLargeTextureIndex     = 1;
107     static constexpr size_t kTransferTexture1Index = 2;
108     static constexpr size_t kTransferTexture2Index = 3;
109 
110     static constexpr size_t kSmallSizeIndex = 0;
111     static constexpr size_t kLargeSizeIndex = 1;
112     static constexpr size_t kHugeSizeIndex  = 2;
113 };
114 
story() const115 std::string VulkanBarriersPerfParams::story() const
116 {
117     std::ostringstream sout;
118 
119     sout << RenderTestParams::story();
120 
121     if (doBufferCopy)
122     {
123         sout << "_buffer_copy";
124     }
125     if (doLargeTransfers)
126     {
127         sout << "_transfer";
128     }
129     if (doSlowFragmentShaders)
130     {
131         sout << "_slowfs";
132     }
133 
134     return sout.str();
135 }
136 
VulkanBarriersPerfBenchmark()137 VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
138     : ANGLERenderTest("VulkanBarriersPerf", GetParam()),
139       mPositionLoc(-1),
140       mTexCoordLoc(-1),
141       mSamplerLoc(-1)
142 {
143     // Fails on Windows7 NVIDIA Vulkan, presumably due to old drivers. http://crbug.com/1096510
144     if (IsNVIDIA() && IsWindows7())
145     {
146         mSkipTest = true;
147     }
148 }
149 
150 constexpr char kVS[] = R"(attribute vec4 a_position;
151 attribute vec2 a_texCoord;
152 varying vec2 v_texCoord;
153 void main()
154 {
155     gl_Position = a_position;
156     v_texCoord  = a_texCoord;
157 })";
158 
159 constexpr char kShortFS[] = R"(precision mediump float;
160 varying vec2 v_texCoord;
161 uniform sampler2D s_texture;
162 void main()
163 {
164     gl_FragColor = texture2D(s_texture, v_texCoord);
165 })";
166 
167 constexpr char kSlowFS[] = R"(precision mediump float;
168 varying vec2 v_texCoord;
169 uniform sampler2D s_texture;
170 void main()
171 {
172     vec4 outColor = vec4(0);
173     if (v_texCoord.x < 0.2)
174     {
175         for (int i = 0; i < 100; ++i)
176         {
177             outColor += texture2D(s_texture, v_texCoord);
178         }
179     }
180     gl_FragColor = outColor;
181 })";
182 
createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed)183 void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
184                                                 uint32_t sizeIndex,
185                                                 bool compressed)
186 {
187     const auto &params = GetParam();
188 
189     // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
190     // backend. http://anglebug.com/2999
191 
192     glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
193     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
194                  params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
195 
196     // Disable mipmapping
197     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
198     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
199 }
200 
createUniformBuffer()201 void VulkanBarriersPerfBenchmark::createUniformBuffer()
202 {
203     const auto &params = GetParam();
204 
205     glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);
206     glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
207     glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);
208     glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
209     glBindBuffer(GL_UNIFORM_BUFFER, 0);
210 }
211 
createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex)212 void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
213                                                     uint32_t textureIndex,
214                                                     uint32_t sizeIndex)
215 {
216     createTexture(textureIndex, sizeIndex, false);
217 
218     glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
219     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
220                            mTextures[textureIndex], 0);
221 }
222 
createResources()223 void VulkanBarriersPerfBenchmark::createResources()
224 {
225     const auto &params = GetParam();
226 
227     mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
228     ASSERT_TRUE(mProgram.valid());
229 
230     // Get the attribute locations
231     mPositionLoc = glGetAttribLocation(mProgram, "a_position");
232     mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
233 
234     // Get the sampler location
235     mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
236 
237     // Build the vertex buffer
238     GLfloat vertices[] = {
239         -0.5f, 0.5f,  0.0f,  // Position 0
240         0.0f,  0.0f,         // TexCoord 0
241         -0.5f, -0.5f, 0.0f,  // Position 1
242         0.0f,  1.0f,         // TexCoord 1
243         0.5f,  -0.5f, 0.0f,  // Position 2
244         1.0f,  1.0f,         // TexCoord 2
245         0.5f,  0.5f,  0.0f,  // Position 3
246         1.0f,  0.0f          // TexCoord 3
247     };
248 
249     glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
250     glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
251 
252     GLushort indices[] = {0, 1, 2, 0, 2, 3};
253     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
254     glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
255 
256     // Use tightly packed data
257     glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
258 
259     // Create four textures.  Two of them are going to be framebuffers, and two are used for large
260     // transfers.
261     createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
262     createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
263     createUniformBuffer();
264 
265     if (params.doLargeTransfers)
266     {
267         createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
268         createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
269     }
270 }
271 
initializeBenchmark()272 void VulkanBarriersPerfBenchmark::initializeBenchmark()
273 {
274     createResources();
275 
276     glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
277 
278     ASSERT_GL_NO_ERROR();
279 }
280 
destroyBenchmark()281 void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
282 
drawBenchmark()283 void VulkanBarriersPerfBenchmark::drawBenchmark()
284 {
285     const auto &params = GetParam();
286 
287     glUseProgram(mProgram);
288 
289     // Bind the buffers
290     glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
291     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
292 
293     // Load the vertex position
294     glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
295     // Load the texture coordinate
296     glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
297                           reinterpret_cast<void *>(3 * sizeof(GLfloat)));
298 
299     glEnableVertexAttribArray(mPositionLoc);
300     glEnableVertexAttribArray(mTexCoordLoc);
301 
302     // Set the texture sampler to texture unit to 0
303     glUniform1i(mSamplerLoc, 0);
304 
305     /*
306      * The perf benchmark does the following:
307      *
308      * - Alternately clear and draw from fbo 1 into fbo 2 and back.  This would use the color
309      * attachment and shader read-only layouts in the fragment shader and color attachment stages.
310      *
311      * - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts
312      * in the transfer stage.
313      *
314      * Once compressed texture copies are supported, alternately copy large chunks of data from
315      * texture 1 into texture 2 and back.  This would use the transfer layouts in the transfer
316      * stage.
317      *
318      * Once compute shader support is added, another independent set of operations could be a few
319      * dispatches.  This would use the general and shader read-only layouts in the compute stage.
320      *
321      * The idea is to create independent pipelines of operations that would run in parallel on the
322      * GPU.  Regressions or inefficiencies in the barrier implementation could result in
323      * serialization of these jobs, resulting in a hit in performance.
324      *
325      * The above operations for example should ideally run on the GPU threads in parallel:
326      *
327      * + |---draw---||---draw---||---draw---||---draw---||---draw---|
328      * + |----buffer copy----||----buffer copy----||----buffer copy----|
329      * + |-----------texture copy------------||-----------texture copy------------|
330      * + |-----dispatch------||------dispatch------||------dispatch------|
331      *
332      * If barriers are too restrictive, situations like this could happen (draw is blocking
333      * copy):
334      *
335      * + |---draw---||---draw---||---draw---||---draw---||---draw---|
336      * +             |------------copy------------||-----------copy------------|
337      *
338      * Or like this (copy is blocking draw):
339      *
340      * + |---draw---|                     |---draw---|                     |---draw---|
341      * + |--------------copy-------------||-------------copy--------------|
342      *
343      * Or like this (draw and copy blocking each other):
344      *
345      * + |---draw---|                                 |---draw---|
346      * +             |------------copy---------------|            |------------copy------------|
347      *
348      * The idea of doing slow FS calls is to make the second case above slower (by making the draw
349      * slower than the transfer):
350      *
351      * + |------------------draw------------------|                                 |-...draw...-|
352      * + |--------------copy----------------|       |-------------copy-------------|
353      */
354 
355     startGpuTimer();
356     for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
357     {
358         bool altEven = iteration % 2 == 0;
359 
360         const int fboDestIndex            = altEven ? kLargeFboIndex : kSmallFboIndex;
361         const int fboTexSrcIndex          = altEven ? kSmallTextureIndex : kLargeTextureIndex;
362         const int fboDestSizeIndex        = altEven ? kLargeSizeIndex : kSmallSizeIndex;
363         const int uniformBufferReadIndex  = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;
364         const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;
365 
366         if (params.doBufferCopy)
367         {
368             // Transfer data between the 2 Uniform buffers
369             glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);
370             glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);
371             glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
372                                 params.kBufferSize);
373         }
374 
375         // Bind the framebuffer
376         glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
377 
378         // Set the viewport
379         glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],
380                    params.kImageSizes[fboDestSizeIndex]);
381 
382         // Clear the color buffer
383         glClear(GL_COLOR_BUFFER_BIT);
384 
385         // Bind the texture
386         glActiveTexture(GL_TEXTURE0);
387         glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
388 
389         ASSERT_GL_NO_ERROR();
390 
391         glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
392     }
393     stopGpuTimer();
394 
395     ASSERT_GL_NO_ERROR();
396 }
397 
398 }  // namespace
399 
TEST_P(VulkanBarriersPerfBenchmark, Run)400 TEST_P(VulkanBarriersPerfBenchmark, Run)
401 {
402     run();
403 }
404 
405 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);
406 ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
407                        VulkanBarriersPerfParams(false, false, false),
408                        VulkanBarriersPerfParams(true, false, false),
409                        VulkanBarriersPerfParams(false, true, false),
410                        VulkanBarriersPerfParams(false, true, true));
411