1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL (ES) Module
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader performance measurer; handles calibration and measurement
22 *//*--------------------------------------------------------------------*/
23
24#include "glsShaderPerformanceMeasurer.hpp"
25#include "gluDefs.hpp"
26#include "tcuTestLog.hpp"
27#include "tcuRenderTarget.hpp"
28#include "deStringUtil.hpp"
29#include "deMath.h"
30#include "deClock.h"
31
32#include "glwFunctions.hpp"
33#include "glwEnums.hpp"
34
35#include <algorithm>
36
37using tcu::Vec4;
38using std::string;
39using std::vector;
40using tcu::TestLog;
41using namespace glw; // GL types
42
43namespace deqp
44{
45namespace gls
46{
47
48static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
49{
50	return v0 + (v2-v0)*x + (v1-v0)*y;
51}
52
53static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
54{
55	// \note Top left fill rule.
56	if (x + y < 1.0f)
57		return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
58	else
59		return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
60}
61
62static inline int getNumVertices (int gridSizeX, int gridSizeY)
63{
64	return (gridSizeX + 1) * (gridSizeY + 1);
65}
66
67static inline int getNumIndices (int gridSizeX, int gridSizeY)
68{
69	return gridSizeX*gridSizeY*6;
70}
71
72static inline deUint16 getVtxIndex (int x, int y, int gridSizeX)
73{
74	return (deUint16)(y*(gridSizeX+1) + x);
75}
76
77static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const AttribSpec& spec)
78{
79	const int numComponents = 4;
80
81	DE_ASSERT((gridSizeX + 1)*(gridSizeY + 1) <= (1<<16)); // Must fit into 16-bit indices.
82	DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
83	dst.resize((gridSizeX + 1) * (gridSizeY + 1) * 4);
84
85	for (int y = 0; y <= gridSizeY; y++)
86	{
87		for (int x = 0; x <= gridSizeX; x++)
88		{
89			float	xf	= (float)x / (float)gridSizeX;
90			float	yf	= (float)y / (float)gridSizeY;
91
92			for (int compNdx = 0; compNdx < numComponents; compNdx++)
93				dst[getVtxIndex(x, y, gridSizeX)*numComponents + compNdx] = triQuadInterpolate(xf, yf, tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]));
94		}
95	}
96}
97
98static void generateIndices (std::vector<deUint16>& dst, int gridSizeX, int gridSizeY)
99{
100	const int	numIndicesPerQuad	= 6;
101	int			numIndices			= gridSizeX * gridSizeY * numIndicesPerQuad;
102	dst.resize(numIndices);
103
104	for (int y = 0; y < gridSizeY; y++)
105	{
106		for (int x = 0; x < gridSizeX; x++)
107		{
108			int quadNdx = y*gridSizeX + x;
109
110			dst[quadNdx*numIndicesPerQuad + 0] = getVtxIndex(x+0, y+0, gridSizeX);
111			dst[quadNdx*numIndicesPerQuad + 1] = getVtxIndex(x+1, y+0, gridSizeX);
112			dst[quadNdx*numIndicesPerQuad + 2] = getVtxIndex(x+0, y+1, gridSizeX);
113
114			dst[quadNdx*numIndicesPerQuad + 3] = getVtxIndex(x+0, y+1, gridSizeX);
115			dst[quadNdx*numIndicesPerQuad + 4] = getVtxIndex(x+1, y+0, gridSizeX);
116			dst[quadNdx*numIndicesPerQuad + 5] = getVtxIndex(x+1, y+1, gridSizeX);
117		}
118	}
119}
120
121ShaderPerformanceMeasurer::ShaderPerformanceMeasurer (const glu::RenderContext& renderCtx, PerfCaseType measureType)
122	: m_renderCtx			(renderCtx)
123	, m_gridSizeX			(measureType == CASETYPE_FRAGMENT	? 1		: 255)
124	, m_gridSizeY			(measureType == CASETYPE_FRAGMENT	? 1		: 255)
125	, m_viewportWidth		(measureType == CASETYPE_VERTEX		? 32	: renderCtx.getRenderTarget().getWidth())
126	, m_viewportHeight		(measureType == CASETYPE_VERTEX		? 32	: renderCtx.getRenderTarget().getHeight())
127	, m_state(STATE_UNINITIALIZED)
128	, m_isFirstIteration	(false)
129	, m_prevRenderStartTime	(0)
130	, m_result				(-1.0f, -1.0f)
131	, m_indexBuffer			(0)
132	, m_vao					(0)
133{
134}
135
136void ShaderPerformanceMeasurer::logParameters (TestLog& log) const
137{
138	log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage
139		<< TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
140}
141
142void ShaderPerformanceMeasurer::init (deUint32 program, const vector<AttribSpec>& attributes, int calibratorInitialNumCalls)
143{
144	DE_ASSERT(m_state == STATE_UNINITIALIZED);
145
146	const glw::Functions&	gl		= m_renderCtx.getFunctions();
147	const bool				useVAO	= glu::isContextTypeGLCore(m_renderCtx.getType());
148
149	if (useVAO)
150	{
151		DE_ASSERT(!m_vao);
152		gl.genVertexArrays(1, &m_vao);
153		gl.bindVertexArray(m_vao);
154		GLU_EXPECT_NO_ERROR(gl.getError(), "Create VAO");
155	}
156
157	// Validate that we have sane grid and viewport setup.
158
159	DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
160
161	{
162		bool widthTooSmall		= m_renderCtx.getRenderTarget().getWidth() < m_viewportWidth;
163		bool heightTooSmall		= m_renderCtx.getRenderTarget().getHeight() < m_viewportHeight;
164
165		if (widthTooSmall || heightTooSmall)
166			throw tcu::NotSupportedError("Render target too small (" +
167											 (widthTooSmall  ?									   "width must be at least "  + de::toString(m_viewportWidth)  : "") +
168											 (heightTooSmall ? string(widthTooSmall ? ", " : "") + "height must be at least " + de::toString(m_viewportHeight) : "") +
169											 ")");
170	}
171
172	TCU_CHECK_INTERNAL(de::inRange(m_viewportWidth,		1, m_renderCtx.getRenderTarget().getWidth()) &&
173					   de::inRange(m_viewportHeight,	1, m_renderCtx.getRenderTarget().getHeight()));
174
175	// Insert a_position to attributes.
176	m_attributes = attributes;
177	m_attributes.push_back(AttribSpec("a_position",
178									  Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
179									  Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
180									  Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
181									  Vec4( 1.0f,  1.0f, 0.0f, 1.0f)));
182
183	// Generate indices.
184	{
185		std::vector<deUint16> indices;
186		generateIndices(indices, m_gridSizeX, m_gridSizeY);
187
188		gl.genBuffers(1, &m_indexBuffer);
189		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
190		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (GLsizeiptr)(indices.size()*sizeof(deUint16)), &indices[0], GL_STATIC_DRAW);
191
192		GLU_EXPECT_NO_ERROR(gl.getError(), "Upload index data");
193	}
194
195	// Generate vertices.
196	m_attribBuffers.resize(m_attributes.size(), 0);
197	gl.genBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
198
199	for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
200	{
201		std::vector<float> vertices;
202		generateVertices(vertices, m_gridSizeX, m_gridSizeY, m_attributes[attribNdx]);
203
204		gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
205		gl.bufferData(GL_ARRAY_BUFFER, (GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
206	}
207
208	GLU_EXPECT_NO_ERROR(gl.getError(), "Upload vertex data");
209
210	// Setup attribute bindings.
211	for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
212	{
213		int location = gl.getAttribLocation(program, m_attributes[attribNdx].name.c_str());
214
215		if (location >= 0)
216		{
217			gl.enableVertexAttribArray(location);
218			gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
219			gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
220		}
221
222		GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex attribute state");
223	}
224
225	gl.useProgram(program);
226	GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram()");
227
228	m_state = STATE_MEASURING;
229	m_isFirstIteration = true;
230
231	m_calibrator.clear(CalibratorParameters(calibratorInitialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
232											1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
233}
234
235void ShaderPerformanceMeasurer::deinit (void)
236{
237	const glw::Functions& gl = m_renderCtx.getFunctions();
238
239	if (m_indexBuffer)
240	{
241		gl.deleteBuffers(1, &m_indexBuffer);
242		m_indexBuffer = 0;
243	}
244
245	if (m_vao)
246	{
247		gl.deleteVertexArrays(1, &m_vao);
248		m_vao = 0;
249	}
250
251	if (!m_attribBuffers.empty())
252	{
253		gl.deleteBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
254		m_attribBuffers.clear();
255	}
256
257	m_state = STATE_UNINITIALIZED;
258}
259
260void ShaderPerformanceMeasurer::render (int numDrawCalls)
261{
262	const glw::Functions&	gl			= m_renderCtx.getFunctions();
263	GLsizei					numIndices	= (GLsizei)getNumIndices(m_gridSizeX, m_gridSizeY);
264
265	gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
266
267	for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
268		gl.drawElements(GL_TRIANGLES, numIndices, GL_UNSIGNED_SHORT, DE_NULL);
269}
270
271void ShaderPerformanceMeasurer::iterate (void)
272{
273	DE_ASSERT(m_state == STATE_MEASURING);
274
275	deUint64 renderStartTime = deGetMicroseconds();
276	render(m_calibrator.getCallCount()); // Always render. This gives more stable performance behavior.
277
278	TheilSenCalibrator::State calibratorState = m_calibrator.getState();
279
280	if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
281	{
282		m_calibrator.recomputeParameters();
283
284		m_isFirstIteration = true;
285		m_prevRenderStartTime = renderStartTime;
286	}
287	else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
288	{
289		if (!m_isFirstIteration)
290			m_calibrator.recordIteration(renderStartTime - m_prevRenderStartTime);
291
292		m_isFirstIteration = false;
293		m_prevRenderStartTime = renderStartTime;
294	}
295	else
296	{
297		DE_ASSERT(calibratorState == TheilSenCalibrator::STATE_FINISHED);
298
299		GLU_EXPECT_NO_ERROR(m_renderCtx.getFunctions().getError(), "End of rendering");
300
301		const MeasureState& measureState = m_calibrator.getMeasureState();
302
303		// Compute result.
304		deUint64	totalTime			= measureState.getTotalTime();
305		int			numFrames			= (int)measureState.frameTimes.size();
306		deInt64		numQuadGrids		= measureState.numDrawCalls * numFrames;
307		deInt64		numPixels			= (deInt64)m_viewportWidth * (deInt64)m_viewportHeight * numQuadGrids;
308		deInt64		numVertices			= (deInt64)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
309		double		mfragPerSecond		= (double)numPixels / (double)totalTime;
310		double		mvertPerSecond		= (double)numVertices / (double)totalTime;
311
312		m_result = Result((float)mvertPerSecond, (float)mfragPerSecond);
313		m_state = STATE_FINISHED;
314	}
315}
316
317void ShaderPerformanceMeasurer::logMeasurementInfo (TestLog& log) const
318{
319	DE_ASSERT(m_state == STATE_FINISHED);
320
321	const MeasureState& measureState(m_calibrator.getMeasureState());
322
323	// Compute totals.
324	deUint64	totalTime			= measureState.getTotalTime();
325	int			numFrames			= (int)measureState.frameTimes.size();
326	deInt64		numQuadGrids		= measureState.numDrawCalls * numFrames;
327	deInt64		numPixels			= (deInt64)m_viewportWidth * (deInt64)m_viewportHeight * numQuadGrids;
328	deInt64		numVertices			= (deInt64)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
329	double		mfragPerSecond		= (double)numPixels / (double)totalTime;
330	double		mvertPerSecond		= (double)numVertices / (double)totalTime;
331	double		framesPerSecond		= (double)numFrames / ((double)totalTime / 1000000.0);
332
333	logCalibrationInfo(log, m_calibrator);
334
335	log << TestLog::Float("FramesPerSecond",		"Frames per second in measurement",	"Frames/s",				QP_KEY_TAG_PERFORMANCE,	(float)framesPerSecond)
336		<< TestLog::Float("FragmentsPerVertices",	"Vertex-fragment ratio",			"Fragments/Vertices",	QP_KEY_TAG_NONE,		(float)numPixels / (float)numVertices)
337		<< TestLog::Float("FragmentPerf",			"Fragment performance",				"MPix/s",				QP_KEY_TAG_PERFORMANCE, (float)mfragPerSecond)
338		<< TestLog::Float("VertexPerf",				"Vertex performance",				"MVert/s",				QP_KEY_TAG_PERFORMANCE, (float)mvertPerSecond);
339}
340
341void ShaderPerformanceMeasurer::setGridSize (int gridW, int gridH)
342{
343	DE_ASSERT(m_state == STATE_UNINITIALIZED);
344	DE_ASSERT(de::inBounds(gridW, 1, 256) && de::inBounds(gridH, 1, 256));
345	m_gridSizeX		= gridW;
346	m_gridSizeY		= gridH;
347}
348
349void ShaderPerformanceMeasurer::setViewportSize (int width, int height)
350{
351	DE_ASSERT(m_state == STATE_UNINITIALIZED);
352	DE_ASSERT(de::inRange(width,	1, m_renderCtx.getRenderTarget().getWidth()) &&
353			  de::inRange(height,	1, m_renderCtx.getRenderTarget().getHeight()));
354	m_viewportWidth		= width;
355	m_viewportHeight	= height;
356}
357
358} // gls
359} // deqp
360