1 //
2 // Copyright 2019 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VulkanBarriersPerf:
7 // Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
8 //
9
10 #include <sstream>
11
12 #include "ANGLEPerfTest.h"
13 #include "test_utils/gl_raii.h"
14 #include "util/shader_utils.h"
15
16 using namespace angle;
17
18 namespace
19 {
20 constexpr unsigned int kIterationsPerStep = 10;
21
22 struct VulkanBarriersPerfParams final : public RenderTestParams
23 {
VulkanBarriersPerfParams__anon20284::final24 VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)
25 {
26 iterationsPerStep = kIterationsPerStep;
27
28 // Common default parameters
29 eglParameters = egl_platform::VULKAN();
30 majorVersion = 3;
31 minorVersion = 0;
32 windowWidth = 256;
33 windowHeight = 256;
34 trackGpuTime = true;
35
36 doBufferCopy = bufferCopy;
37 doLargeTransfers = largeTransfers;
38 doSlowFragmentShaders = slowFS;
39 }
40
41 std::string story() const override;
42
43 // Static parameters
44 static constexpr int kImageSizes[3] = {256, 512, 4096};
45 static constexpr int kBufferSize = 4096 * 4096;
46
47 bool doBufferCopy;
48 bool doLargeTransfers;
49 bool doSlowFragmentShaders;
50 };
51
52 constexpr int VulkanBarriersPerfParams::kImageSizes[];
53
operator <<(std::ostream &os, const VulkanBarriersPerfParams ¶ms)54 std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams ¶ms)
55 {
56 os << params.backendAndStory().substr(1);
57 return os;
58 }
59
60 class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
61 public ::testing::WithParamInterface<VulkanBarriersPerfParams>
62 {
63 public:
64 VulkanBarriersPerfBenchmark();
65
66 void initializeBenchmark() override;
67 void destroyBenchmark() override;
68 void drawBenchmark() override;
69
70 private:
71 void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
72 void createUniformBuffer();
73 void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
74 void createResources();
75
76 // Handle to the program object
77 GLProgram mProgram;
78
79 // Attribute locations
80 GLint mPositionLoc;
81 GLint mTexCoordLoc;
82
83 // Sampler location
84 GLint mSamplerLoc;
85
86 // Texture handles
87 GLTexture mTextures[4];
88
89 // Uniform buffer handles
90 GLBuffer mUniformBuffers[2];
91
92 // Framebuffer handles
93 GLFramebuffer mFbos[2];
94
95 // Buffer handle
96 GLBuffer mVertexBuffer;
97 GLBuffer mIndexBuffer;
98
99 static constexpr size_t kSmallFboIndex = 0;
100 static constexpr size_t kLargeFboIndex = 1;
101
102 static constexpr size_t kUniformBuffer1Index = 0;
103 static constexpr size_t kUniformBuffer2Index = 1;
104
105 static constexpr size_t kSmallTextureIndex = 0;
106 static constexpr size_t kLargeTextureIndex = 1;
107 static constexpr size_t kTransferTexture1Index = 2;
108 static constexpr size_t kTransferTexture2Index = 3;
109
110 static constexpr size_t kSmallSizeIndex = 0;
111 static constexpr size_t kLargeSizeIndex = 1;
112 static constexpr size_t kHugeSizeIndex = 2;
113 };
114
story() const115 std::string VulkanBarriersPerfParams::story() const
116 {
117 std::ostringstream sout;
118
119 sout << RenderTestParams::story();
120
121 if (doBufferCopy)
122 {
123 sout << "_buffer_copy";
124 }
125 if (doLargeTransfers)
126 {
127 sout << "_transfer";
128 }
129 if (doSlowFragmentShaders)
130 {
131 sout << "_slowfs";
132 }
133
134 return sout.str();
135 }
136
VulkanBarriersPerfBenchmark()137 VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
138 : ANGLERenderTest("VulkanBarriersPerf", GetParam()),
139 mPositionLoc(-1),
140 mTexCoordLoc(-1),
141 mSamplerLoc(-1)
142 {
143 // Fails on Windows7 NVIDIA Vulkan, presumably due to old drivers. http://crbug.com/1096510
144 if (IsNVIDIA() && IsWindows7())
145 {
146 mSkipTest = true;
147 }
148 }
149
150 constexpr char kVS[] = R"(attribute vec4 a_position;
151 attribute vec2 a_texCoord;
152 varying vec2 v_texCoord;
153 void main()
154 {
155 gl_Position = a_position;
156 v_texCoord = a_texCoord;
157 })";
158
159 constexpr char kShortFS[] = R"(precision mediump float;
160 varying vec2 v_texCoord;
161 uniform sampler2D s_texture;
162 void main()
163 {
164 gl_FragColor = texture2D(s_texture, v_texCoord);
165 })";
166
167 constexpr char kSlowFS[] = R"(precision mediump float;
168 varying vec2 v_texCoord;
169 uniform sampler2D s_texture;
170 void main()
171 {
172 vec4 outColor = vec4(0);
173 if (v_texCoord.x < 0.2)
174 {
175 for (int i = 0; i < 100; ++i)
176 {
177 outColor += texture2D(s_texture, v_texCoord);
178 }
179 }
180 gl_FragColor = outColor;
181 })";
182
createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed)183 void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
184 uint32_t sizeIndex,
185 bool compressed)
186 {
187 const auto ¶ms = GetParam();
188
189 // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
190 // backend. http://anglebug.com/2999
191
192 glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
193 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
194 params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
195
196 // Disable mipmapping
197 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
198 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
199 }
200
createUniformBuffer()201 void VulkanBarriersPerfBenchmark::createUniformBuffer()
202 {
203 const auto ¶ms = GetParam();
204
205 glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);
206 glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
207 glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);
208 glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
209 glBindBuffer(GL_UNIFORM_BUFFER, 0);
210 }
211
createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex)212 void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
213 uint32_t textureIndex,
214 uint32_t sizeIndex)
215 {
216 createTexture(textureIndex, sizeIndex, false);
217
218 glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
219 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
220 mTextures[textureIndex], 0);
221 }
222
createResources()223 void VulkanBarriersPerfBenchmark::createResources()
224 {
225 const auto ¶ms = GetParam();
226
227 mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
228 ASSERT_TRUE(mProgram.valid());
229
230 // Get the attribute locations
231 mPositionLoc = glGetAttribLocation(mProgram, "a_position");
232 mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
233
234 // Get the sampler location
235 mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
236
237 // Build the vertex buffer
238 GLfloat vertices[] = {
239 -0.5f, 0.5f, 0.0f, // Position 0
240 0.0f, 0.0f, // TexCoord 0
241 -0.5f, -0.5f, 0.0f, // Position 1
242 0.0f, 1.0f, // TexCoord 1
243 0.5f, -0.5f, 0.0f, // Position 2
244 1.0f, 1.0f, // TexCoord 2
245 0.5f, 0.5f, 0.0f, // Position 3
246 1.0f, 0.0f // TexCoord 3
247 };
248
249 glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
250 glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
251
252 GLushort indices[] = {0, 1, 2, 0, 2, 3};
253 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
254 glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
255
256 // Use tightly packed data
257 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
258
259 // Create four textures. Two of them are going to be framebuffers, and two are used for large
260 // transfers.
261 createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
262 createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
263 createUniformBuffer();
264
265 if (params.doLargeTransfers)
266 {
267 createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
268 createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
269 }
270 }
271
initializeBenchmark()272 void VulkanBarriersPerfBenchmark::initializeBenchmark()
273 {
274 createResources();
275
276 glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
277
278 ASSERT_GL_NO_ERROR();
279 }
280
destroyBenchmark()281 void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
282
drawBenchmark()283 void VulkanBarriersPerfBenchmark::drawBenchmark()
284 {
285 const auto ¶ms = GetParam();
286
287 glUseProgram(mProgram);
288
289 // Bind the buffers
290 glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
291 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
292
293 // Load the vertex position
294 glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
295 // Load the texture coordinate
296 glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
297 reinterpret_cast<void *>(3 * sizeof(GLfloat)));
298
299 glEnableVertexAttribArray(mPositionLoc);
300 glEnableVertexAttribArray(mTexCoordLoc);
301
302 // Set the texture sampler to texture unit to 0
303 glUniform1i(mSamplerLoc, 0);
304
305 /*
306 * The perf benchmark does the following:
307 *
308 * - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color
309 * attachment and shader read-only layouts in the fragment shader and color attachment stages.
310 *
311 * - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts
312 * in the transfer stage.
313 *
314 * Once compressed texture copies are supported, alternately copy large chunks of data from
315 * texture 1 into texture 2 and back. This would use the transfer layouts in the transfer
316 * stage.
317 *
318 * Once compute shader support is added, another independent set of operations could be a few
319 * dispatches. This would use the general and shader read-only layouts in the compute stage.
320 *
321 * The idea is to create independent pipelines of operations that would run in parallel on the
322 * GPU. Regressions or inefficiencies in the barrier implementation could result in
323 * serialization of these jobs, resulting in a hit in performance.
324 *
325 * The above operations for example should ideally run on the GPU threads in parallel:
326 *
327 * + |---draw---||---draw---||---draw---||---draw---||---draw---|
328 * + |----buffer copy----||----buffer copy----||----buffer copy----|
329 * + |-----------texture copy------------||-----------texture copy------------|
330 * + |-----dispatch------||------dispatch------||------dispatch------|
331 *
332 * If barriers are too restrictive, situations like this could happen (draw is blocking
333 * copy):
334 *
335 * + |---draw---||---draw---||---draw---||---draw---||---draw---|
336 * + |------------copy------------||-----------copy------------|
337 *
338 * Or like this (copy is blocking draw):
339 *
340 * + |---draw---| |---draw---| |---draw---|
341 * + |--------------copy-------------||-------------copy--------------|
342 *
343 * Or like this (draw and copy blocking each other):
344 *
345 * + |---draw---| |---draw---|
346 * + |------------copy---------------| |------------copy------------|
347 *
348 * The idea of doing slow FS calls is to make the second case above slower (by making the draw
349 * slower than the transfer):
350 *
351 * + |------------------draw------------------| |-...draw...-|
352 * + |--------------copy----------------| |-------------copy-------------|
353 */
354
355 startGpuTimer();
356 for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
357 {
358 bool altEven = iteration % 2 == 0;
359
360 const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex;
361 const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex;
362 const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;
363 const int uniformBufferReadIndex = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;
364 const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;
365
366 if (params.doBufferCopy)
367 {
368 // Transfer data between the 2 Uniform buffers
369 glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);
370 glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);
371 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
372 params.kBufferSize);
373 }
374
375 // Bind the framebuffer
376 glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
377
378 // Set the viewport
379 glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],
380 params.kImageSizes[fboDestSizeIndex]);
381
382 // Clear the color buffer
383 glClear(GL_COLOR_BUFFER_BIT);
384
385 // Bind the texture
386 glActiveTexture(GL_TEXTURE0);
387 glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
388
389 ASSERT_GL_NO_ERROR();
390
391 glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
392 }
393 stopGpuTimer();
394
395 ASSERT_GL_NO_ERROR();
396 }
397
398 } // namespace
399
TEST_P(VulkanBarriersPerfBenchmark, Run)400 TEST_P(VulkanBarriersPerfBenchmark, Run)
401 {
402 run();
403 }
404
405 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);
406 ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
407 VulkanBarriersPerfParams(false, false, false),
408 VulkanBarriersPerfParams(true, false, false),
409 VulkanBarriersPerfParams(false, true, false),
410 VulkanBarriersPerfParams(false, true, true));
411