1/* 2 * Copyright 2019 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7#include "bench/Benchmark.h" 8#include "bench/ResultsWriter.h" 9#include "bench/SkSLBench.h" 10#include "include/core/SkCanvas.h" 11#include "src/gpu/GrCaps.h" 12#include "src/gpu/GrRecordingContextPriv.h" 13#include "src/gpu/mock/GrMockCaps.h" 14#include "src/sksl/SkSLCompiler.h" 15#include "src/sksl/SkSLDSLParser.h" 16 17class SkSLCompilerStartupBench : public Benchmark { 18protected: 19 const char* onGetName() override { 20 return "sksl_compiler_startup"; 21 } 22 23 bool isSuitableFor(Backend backend) override { 24 return backend == kNonRendering_Backend; 25 } 26 27 void onDraw(int loops, SkCanvas*) override { 28 GrShaderCaps caps; 29 for (int i = 0; i < loops; i++) { 30 SkSL::Compiler compiler(&caps); 31 } 32 } 33}; 34 35DEF_BENCH(return new SkSLCompilerStartupBench();) 36 37enum class Output { 38 kNone, 39 kGLSL, 40 kMetal, 41 kSPIRV 42}; 43 44class SkSLCompileBench : public Benchmark { 45public: 46 static const char* output_string(Output output) { 47 switch (output) { 48 case Output::kNone: return ""; 49 case Output::kGLSL: return "glsl_"; 50 case Output::kMetal: return "metal_"; 51 case Output::kSPIRV: return "spirv_"; 52 } 53 SkUNREACHABLE; 54 } 55 56 SkSLCompileBench(SkSL::String name, const char* src, bool optimize, Output output) 57 : fName(SkSL::String("sksl_") + (optimize ? "" : "unoptimized_") + output_string(output) + 58 name) 59 , fSrc(src) 60 , fCaps(GrContextOptions(), GrMockOptions()) 61 , fCompiler(fCaps.shaderCaps()) 62 , fOutput(output) { 63 fSettings.fOptimize = optimize; 64 fSettings.fDSLMangling = false; 65 // The test programs we compile don't follow Vulkan rules and thus produce invalid 66 // SPIR-V. This is harmless, so long as we don't try to validate them. 67 fSettings.fValidateSPIRV = false; 68 } 69 70protected: 71 const char* onGetName() override { 72 return fName.c_str(); 73 } 74 75 bool isSuitableFor(Backend backend) override { 76 return backend == kNonRendering_Backend; 77 } 78 79 void onDraw(int loops, SkCanvas* canvas) override { 80 for (int i = 0; i < loops; i++) { 81 std::unique_ptr<SkSL::Program> program = SkSL::DSLParser(&fCompiler, 82 fSettings, 83 SkSL::ProgramKind::kFragment, 84 fSrc).program(); 85 if (fCompiler.errorCount()) { 86 SK_ABORT("shader compilation failed: %s\n", fCompiler.errorText().c_str()); 87 } 88 SkSL::String result; 89 switch (fOutput) { 90 case Output::kNone: break; 91 case Output::kGLSL: SkAssertResult(fCompiler.toGLSL(*program, &result)); break; 92 case Output::kMetal: SkAssertResult(fCompiler.toMetal(*program, &result)); break; 93 case Output::kSPIRV: SkAssertResult(fCompiler.toSPIRV(*program, &result)); break; 94 } 95 } 96 } 97 98private: 99 SkSL::String fName; 100 SkSL::String fSrc; 101 GrMockCaps fCaps; 102 SkSL::Compiler fCompiler; 103 SkSL::Program::Settings fSettings; 104 Output fOutput; 105 106 using INHERITED = Benchmark; 107}; 108 109/////////////////////////////////////////////////////////////////////////////// 110 111#define COMPILER_BENCH(name, text) \ 112static constexpr char name ## _SRC[] = text; \ 113DEF_BENCH(return new SkSLCompileBench(#name, name ## _SRC, /*optimize=*/false, Output::kNone);) \ 114DEF_BENCH(return new SkSLCompileBench(#name, name ## _SRC, /*optimize=*/true, Output::kNone);) \ 115DEF_BENCH(return new SkSLCompileBench(#name, name ## _SRC, /*optimize=*/true, Output::kGLSL);) \ 116DEF_BENCH(return new SkSLCompileBench(#name, name ## _SRC, /*optimize=*/true, Output::kMetal);) \ 117DEF_BENCH(return new SkSLCompileBench(#name, name ## _SRC, /*optimize=*/true, Output::kSPIRV);) 118 119// This fragment shader is from the third tile on the top row of GM_gradients_2pt_conical_outside. 120COMPILER_BENCH(large, R"( 121layout(set=0, binding=0) uniform half urange_Stage1_c0; 122layout(set=0, binding=0) uniform half4 uleftBorderColor_Stage1_c0_c0_c0; 123layout(set=0, binding=0) uniform half4 urightBorderColor_Stage1_c0_c0_c0; 124layout(set=0, binding=0) uniform float3x3 umatrix_Stage1_c0_c0_c0_c0; 125layout(set=0, binding=0) uniform half2 ufocalParams_Stage1_c0_c0_c0_c0_c0; 126layout(set=0, binding=0) uniform float4 uscale0_1_Stage1_c0_c0_c0_c1; 127layout(set=0, binding=0) uniform float4 uscale2_3_Stage1_c0_c0_c0_c1; 128layout(set=0, binding=0) uniform float4 uscale4_5_Stage1_c0_c0_c0_c1; 129layout(set=0, binding=0) uniform float4 uscale6_7_Stage1_c0_c0_c0_c1; 130layout(set=0, binding=0) uniform float4 ubias0_1_Stage1_c0_c0_c0_c1; 131layout(set=0, binding=0) uniform float4 ubias2_3_Stage1_c0_c0_c0_c1; 132layout(set=0, binding=0) uniform float4 ubias4_5_Stage1_c0_c0_c0_c1; 133layout(set=0, binding=0) uniform float4 ubias6_7_Stage1_c0_c0_c0_c1; 134layout(set=0, binding=0) uniform half4 uthresholds1_7_Stage1_c0_c0_c0_c1; 135layout(set=0, binding=0) uniform half4 uthresholds9_13_Stage1_c0_c0_c0_c1; 136flat in half4 vcolor_Stage0; 137noperspective in float2 vTransformedCoords_0_Stage0; 138out half4 sk_FragColor; 139half4 TwoPointConicalGradientLayout_Stage1_c0_c0_c0_c0_c0(half4 _input) 140{ 141 float t = -1.0; 142 half v = 1.0; 143 @switch (2) 144 { 145 case 1: 146 { 147 half r0_2 = ufocalParams_Stage1_c0_c0_c0_c0_c0.y; 148 t = float(r0_2) - vTransformedCoords_0_Stage0.y * vTransformedCoords_0_Stage0.y; 149 if (t >= 0.0) 150 { 151 t = vTransformedCoords_0_Stage0.x + sqrt(t); 152 } 153 else 154 { 155 v = -1.0; 156 } 157 } 158 break; 159 case 0: 160 { 161 half r0 = ufocalParams_Stage1_c0_c0_c0_c0_c0.x; 162 @if (true) 163 { 164 t = length(vTransformedCoords_0_Stage0) - float(r0); 165 } 166 else 167 { 168 t = -length(vTransformedCoords_0_Stage0) - float(r0); 169 } 170 } 171 break; 172 case 2: 173 { 174 half invR1 = ufocalParams_Stage1_c0_c0_c0_c0_c0.x; 175 half fx = ufocalParams_Stage1_c0_c0_c0_c0_c0.y; 176 float x_t = -1.0; 177 @if (false) 178 { 179 x_t = dot(vTransformedCoords_0_Stage0, vTransformedCoords_0_Stage0) / vTransformedCoords_0_Stage0.x; 180 } 181 else if (false) 182 { 183 x_t = length(vTransformedCoords_0_Stage0) - vTransformedCoords_0_Stage0.x * float(invR1); 184 } 185 else 186 { 187 float temp = vTransformedCoords_0_Stage0.x * vTransformedCoords_0_Stage0.x - vTransformedCoords_0_Stage0.y * vTransformedCoords_0_Stage0.y; 188 if (temp >= 0.0) 189 { 190 @if (false || !true) 191 { 192 x_t = -sqrt(temp) - vTransformedCoords_0_Stage0.x * float(invR1); 193 } 194 else 195 { 196 x_t = sqrt(temp) - vTransformedCoords_0_Stage0.x * float(invR1); 197 } 198 } 199 } 200 @if (!false) 201 { 202 if (x_t <= 0.0) 203 { 204 v = -1.0; 205 } 206 } 207 @if (true) 208 { 209 @if (false) 210 { 211 t = x_t; 212 } 213 else 214 { 215 t = x_t + float(fx); 216 } 217 } 218 else 219 { 220 @if (false) 221 { 222 t = -x_t; 223 } 224 else 225 { 226 t = -x_t + float(fx); 227 } 228 } 229 @if (false) 230 { 231 t = 1.0 - t; 232 } 233 } 234 break; 235 } 236 return half4(half(t), v, 0.0, 0.0); 237} 238half4 MatrixEffect_Stage1_c0_c0_c0_c0(half4 _input) 239{ 240 return TwoPointConicalGradientLayout_Stage1_c0_c0_c0_c0_c0(_input); 241} 242half4 UnrolledBinaryGradientColorizer_Stage1_c0_c0_c0_c1(half4 _input, float2 _coords) 243{ 244 half t = half(_coords.x); 245 float4 scale; 246 float4 bias; 247 if (4 <= 4 || t < uthresholds1_7_Stage1_c0_c0_c0_c1.w) 248 { 249 if (4 <= 2 || t < uthresholds1_7_Stage1_c0_c0_c0_c1.y) 250 { 251 if (4 <= 1 || t < uthresholds1_7_Stage1_c0_c0_c0_c1.x) 252 { 253 scale = uscale0_1_Stage1_c0_c0_c0_c1; 254 bias = ubias0_1_Stage1_c0_c0_c0_c1; 255 } 256 else 257 { 258 scale = uscale2_3_Stage1_c0_c0_c0_c1; 259 bias = ubias2_3_Stage1_c0_c0_c0_c1; 260 } 261 } 262 else 263 { 264 if (4 <= 3 || t < uthresholds1_7_Stage1_c0_c0_c0_c1.z) 265 { 266 scale = uscale4_5_Stage1_c0_c0_c0_c1; 267 bias = ubias4_5_Stage1_c0_c0_c0_c1; 268 } 269 else 270 { 271 scale = uscale6_7_Stage1_c0_c0_c0_c1; 272 bias = ubias6_7_Stage1_c0_c0_c0_c1; 273 } 274 } 275 } 276 else 277 { 278 if (4 <= 6 || t < uthresholds9_13_Stage1_c0_c0_c0_c1.y) 279 { 280 if (4 <= 5 || t < uthresholds9_13_Stage1_c0_c0_c0_c1.x) 281 { 282 scale = float4(0); 283 bias = float4(0); 284 } 285 else 286 { 287 scale = float4(0); 288 bias = float4(0); 289 } 290 } 291 else 292 { 293 if (4 <= 7 || t < uthresholds9_13_Stage1_c0_c0_c0_c1.z) 294 { 295 scale = float4(0); 296 bias = float4(0); 297 } 298 else 299 { 300 scale = float4(0); 301 bias = float4(0); 302 } 303 } 304 } 305 return half4(float(t) * scale + bias); 306} 307half4 ClampedGradientEffect_Stage1_c0_c0_c0(half4 _input) 308{ 309 half4 t = MatrixEffect_Stage1_c0_c0_c0_c0(_input); 310 half4 outColor; 311 if (!false && t.y < 0.0) 312 { 313 outColor = half4(0.0); 314 } 315 else if (t.x < 0.0) 316 { 317 outColor = uleftBorderColor_Stage1_c0_c0_c0; 318 } 319 else if (t.x > 1.0) 320 { 321 outColor = urightBorderColor_Stage1_c0_c0_c0; 322 } 323 else 324 { 325 outColor = UnrolledBinaryGradientColorizer_Stage1_c0_c0_c0_c1(_input, float2(half2(t.x, 0.0))); 326 } 327 @if (false) 328 { 329 outColor.xyz *= outColor.w; 330 } 331 return outColor; 332} 333half4 OverrideInputFragmentProcessor_Stage1_c0_c0(half4 _input) 334{ 335 return ClampedGradientEffect_Stage1_c0_c0_c0(false ? half4(0) : half4(1.000000, 1.000000, 1.000000, 1.000000)); 336} 337half4 DitherEffect_Stage1_c0(half4 _input) 338{ 339 half4 color = OverrideInputFragmentProcessor_Stage1_c0_c0(_input); 340 half value; 341 @if (sk_Caps.integerSupport) 342 { 343 uint x = uint(sk_FragCoord.x); 344 uint y = uint(sk_FragCoord.y) ^ x; 345 uint m = (((((y & 1) << 5 | (x & 1) << 4) | (y & 2) << 2) | (x & 2) << 1) | (y & 4) >> 1) | (x & 4) >> 2; 346 value = half(m) / 64.0 - 0.4921875; 347 } 348 else 349 { 350 half4 bits = mod(half4(sk_FragCoord.yxyx), half4(2.0, 2.0, 4.0, 4.0)); 351 bits.zw = step(2.0, bits.zw); 352 bits.xz = abs(bits.xz - bits.yw); 353 value = dot(bits, half4(0.5, 0.25, 0.125, 0.0625)) - 0.46875; 354 } 355 return half4(clamp(color.xyz + value * urange_Stage1_c0, 0.0, color.w), color.w); 356} 357void main() 358{ 359 // Stage 0, QuadPerEdgeAAGeometryProcessor 360 half4 outputColor_Stage0; 361 outputColor_Stage0 = vcolor_Stage0; 362 const half4 outputCoverage_Stage0 = half4(1); 363 half4 output_Stage1; 364 output_Stage1 = DitherEffect_Stage1_c0(outputColor_Stage0); 365 { 366 // Xfer Processor: Porter Duff 367 sk_FragColor = output_Stage1 * outputCoverage_Stage0; 368 } 369} 370)"); 371 372// This fragment shader is taken from GM_BlurDrawImage. 373COMPILER_BENCH(medium, R"( 374layout(set=0, binding=0) uniform float3x3 umatrix_Stage1_c0_c0_c0; 375layout(set=0, binding=0) uniform half4 urectH_Stage2_c1; 376layout(set=0, binding=0) uniform float3x3 umatrix_Stage2_c1_c0; 377layout(set=0, binding=0) uniform sampler2D uTextureSampler_0_Stage1; 378layout(set=0, binding=0) uniform sampler2D uTextureSampler_0_Stage2; 379flat in half4 vcolor_Stage0; 380noperspective in float2 vTransformedCoords_0_Stage0; 381out half4 sk_FragColor; 382half4 TextureEffect_Stage1_c0_c0_c0_c0(half4 _input) 383{ 384 return sample(uTextureSampler_0_Stage1, vTransformedCoords_0_Stage0); 385} 386half4 MatrixEffect_Stage1_c0_c0_c0(half4 _input) 387{ 388 return TextureEffect_Stage1_c0_c0_c0_c0(_input); 389} 390half4 Blend_Stage1_c0_c0(half4 _input) 391{ 392 // Blend mode: SrcIn (Compose-One behavior) 393 return blend_src_in(MatrixEffect_Stage1_c0_c0_c0(half4(1)), _input); 394} 395half4 OverrideInputFragmentProcessor_Stage1_c0(half4 _input) 396{ 397 return Blend_Stage1_c0_c0(false ? half4(0) : half4(1.000000, 1.000000, 1.000000, 1.000000)); 398} 399half4 TextureEffect_Stage2_c1_c0_c0(half4 _input, float2 _coords) 400{ 401 return sample(uTextureSampler_0_Stage2, _coords).000r; 402} 403half4 MatrixEffect_Stage2_c1_c0(half4 _input, float2 _coords) 404{ 405 return TextureEffect_Stage2_c1_c0_c0(_input, ((umatrix_Stage2_c1_c0) * _coords.xy1).xy); 406} 407half4 RectBlurEffect_Stage2_c1(half4 _input) 408{ 409 /* key */ const bool highPrecision = false; 410 half xCoverage; 411 half yCoverage; 412 float2 pos = sk_FragCoord.xy; 413 @if (false) 414 { 415 pos = (float3x3(1) * float3(pos, 1.0)).xy; 416 } 417 @if (true) 418 { 419 half2 xy; 420 @if (highPrecision) 421 { 422 xy = max(half2(float4(0).xy - pos), half2(pos - float4(0).zw)); 423 } 424 else 425 { 426 xy = max(half2(float2(urectH_Stage2_c1.xy) - pos), half2(pos - float2(urectH_Stage2_c1.zw))); 427 } 428 xCoverage = MatrixEffect_Stage2_c1_c0(_input, float2(half2(xy.x, 0.5))).w; 429 yCoverage = MatrixEffect_Stage2_c1_c0(_input, float2(half2(xy.y, 0.5))).w; 430 } 431 else 432 { 433 half4 rect; 434 @if (highPrecision) 435 { 436 rect.xy = half2(float4(0).xy - pos); 437 rect.zw = half2(pos - float4(0).zw); 438 } 439 else 440 { 441 rect.xy = half2(float2(urectH_Stage2_c1.xy) - pos); 442 rect.zw = half2(pos - float2(urectH_Stage2_c1.zw)); 443 } 444 xCoverage = (1.0 - MatrixEffect_Stage2_c1_c0(_input, float2(half2(rect.x, 0.5))).w) - MatrixEffect_Stage2_c1_c0(_input, float2(half2(rect.z, 0.5))).w; 445 yCoverage = (1.0 - MatrixEffect_Stage2_c1_c0(_input, float2(half2(rect.y, 0.5))).w) - MatrixEffect_Stage2_c1_c0(_input, float2(half2(rect.w, 0.5))).w; 446 } 447 return (_input * xCoverage) * yCoverage; 448} 449void main() 450{ 451 // Stage 0, QuadPerEdgeAAGeometryProcessor 452 half4 outputColor_Stage0; 453 outputColor_Stage0 = vcolor_Stage0; 454 const half4 outputCoverage_Stage0 = half4(1); 455 half4 output_Stage1; 456 output_Stage1 = OverrideInputFragmentProcessor_Stage1_c0(outputColor_Stage0); 457 half4 output_Stage2; 458 output_Stage2 = RectBlurEffect_Stage2_c1(outputCoverage_Stage0); 459 { 460 // Xfer Processor: Porter Duff 461 sk_FragColor = output_Stage1 * output_Stage2; 462 } 463} 464)"); 465 466// This is the fragment shader used to blit the Viewer window when running the software rasterizer. 467COMPILER_BENCH(small, R"( 468layout(set=0, binding=0) uniform float3x3 umatrix_Stage1_c0_c0; 469layout(set=0, binding=0) uniform sampler2D uTextureSampler_0_Stage1; 470noperspective in float2 vTransformedCoords_0_Stage0; 471out half4 sk_FragColor; 472half4 TextureEffect_Stage1_c0_c0_c0(half4 _input) 473{ 474 return sample(uTextureSampler_0_Stage1, vTransformedCoords_0_Stage0); 475} 476half4 MatrixEffect_Stage1_c0_c0(half4 _input) 477{ 478 return TextureEffect_Stage1_c0_c0_c0(_input); 479} 480half4 Blend_Stage1_c0(half4 _input) 481{ 482 // Blend mode: Modulate (Compose-One behavior) 483 return blend_modulate(MatrixEffect_Stage1_c0_c0(half4(1)), _input); 484} 485void main() 486{ 487 // Stage 0, QuadPerEdgeAAGeometryProcessor 488 half4 outputColor_Stage0 = half4(1); 489 const half4 outputCoverage_Stage0 = half4(1); 490 half4 output_Stage1; 491 output_Stage1 = Blend_Stage1_c0(outputColor_Stage0); 492 { 493 // Xfer Processor: Porter Duff 494 sk_FragColor = output_Stage1 * outputCoverage_Stage0; 495 } 496} 497)"); 498 499COMPILER_BENCH(tiny, "void main() { sk_FragColor = half4(1); }"); 500 501#if defined(SK_BUILD_FOR_UNIX) 502 503#include <malloc.h> 504 505// These benchmarks aren't timed, they produce memory usage statistics. They run standalone, and 506// directly add their results to the nanobench log. 507void RunSkSLMemoryBenchmarks(NanoJSONResultsWriter* log) { 508 auto heap_bytes_used = []() { return mallinfo().uordblks; }; 509 auto bench = [log](const char* name, int bytes) { 510 log->beginObject(name); // test 511 log->beginObject("meta"); // config 512 log->appendS32("bytes", bytes); // sub_result 513 log->endObject(); // config 514 log->endObject(); // test 515 }; 516 517 // Heap used by a default compiler (with no modules loaded) 518 { 519 int before = heap_bytes_used(); 520 GrShaderCaps caps; 521 SkSL::Compiler compiler(&caps); 522 int after = heap_bytes_used(); 523 bench("sksl_compiler_baseline", after - before); 524 } 525 526 // Heap used by a compiler with the two main GPU modules (fragment + vertex) loaded 527 { 528 int before = heap_bytes_used(); 529 GrShaderCaps caps; 530 SkSL::Compiler compiler(&caps); 531 compiler.moduleForProgramKind(SkSL::ProgramKind::kVertex); 532 compiler.moduleForProgramKind(SkSL::ProgramKind::kFragment); 533 int after = heap_bytes_used(); 534 bench("sksl_compiler_gpu", after - before); 535 } 536 537 // Heap used by a compiler with the runtime shader, color filter and blending modules loaded 538 { 539 int before = heap_bytes_used(); 540 GrShaderCaps caps; 541 SkSL::Compiler compiler(&caps); 542 compiler.moduleForProgramKind(SkSL::ProgramKind::kRuntimeColorFilter); 543 compiler.moduleForProgramKind(SkSL::ProgramKind::kRuntimeShader); 544 compiler.moduleForProgramKind(SkSL::ProgramKind::kRuntimeBlender); 545 int after = heap_bytes_used(); 546 bench("sksl_compiler_runtimeeffect", after - before); 547 } 548} 549 550#else 551 552void RunSkSLMemoryBenchmarks(NanoJSONResultsWriter*) {} 553 554#endif 555