1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "experimental/graphite/src/DrawPass.h"
9 
10 #include "experimental/graphite/include/GraphiteTypes.h"
11 #include "experimental/graphite/src/Buffer.h"
12 #include "experimental/graphite/src/ContextUtils.h"
13 #include "experimental/graphite/src/DrawBufferManager.h"
14 #include "experimental/graphite/src/DrawContext.h"
15 #include "experimental/graphite/src/DrawList.h"
16 #include "experimental/graphite/src/ProgramCache.h"
17 #include "experimental/graphite/src/Recorder.h"
18 #include "experimental/graphite/src/Renderer.h"
19 #include "experimental/graphite/src/TextureProxy.h"
20 #include "experimental/graphite/src/UniformCache.h"
21 #include "experimental/graphite/src/geom/BoundsManager.h"
22 
23 #include "src/core/SkMathPriv.h"
24 #include "src/core/SkUtils.h"
25 #include "src/gpu/BufferWriter.h"
26 
27 #include <algorithm>
28 
29 namespace {
30 
31 // Retrieve the program ID and uniformData ID
get_ids_from_paint(skgpu::Recorder* recorder, skgpu::PaintParams params)32 std::tuple<uint32_t, uint32_t> get_ids_from_paint(skgpu::Recorder* recorder,
33                                                   skgpu::PaintParams params) {
34     // TODO: add an ExtractCombo that takes PaintParams directly?
35     SkPaint p;
36 
37     p.setColor(params.color());
38     p.setBlendMode(params.blendMode());
39     p.setShader(params.refShader());
40 
41     // TODO: perhaps just return the ids here rather than the sk_sps?
42     auto [ combo, uniformData] = ExtractCombo(recorder->uniformCache(), p);
43     auto programInfo = recorder->programCache()->findOrCreateProgram(combo);
44 
45     return { programInfo->id(), uniformData->id() };
46 }
47 
48 } // anonymous namespace
49 
50 namespace skgpu {
51 
52 /**
53  * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
54  * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
55  *
56  * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
57  * within a pipeline, while still respecting the overall painter's order. This decreases the number
58  * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
59  * to operate more efficiently and have fewer bubbles within its own instruction stream.
60  *
61  * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
62  * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
63  * description is encoded in two steps:
64  *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
65  *     ordered correctly.
66  *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
67  *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
68  *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
69  * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
70  * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
71  * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
72  * the pipeline description index are packed into indices and not actual pointers, a given SortKey
73  * is only valid for the a specific DrawList->DrawPass conversion.
74  */
75 class DrawPass::SortKey {
76 public:
SortKey(const DrawList::Draw* draw, int renderStep, uint32_t pipelineIndex, uint32_t geomUniformIndex, uint32_t shadingUniformIndex)77     SortKey(const DrawList::Draw* draw,
78             int renderStep,
79             uint32_t pipelineIndex,
80             uint32_t geomUniformIndex,
81             uint32_t shadingUniformIndex)
82         : fPipelineKey{draw->fOrder.paintOrder().bits(),
83                        draw->fOrder.stencilIndex().bits(),
84                        static_cast<uint32_t>(renderStep),
85                        pipelineIndex}
86         , fUniformKey{geomUniformIndex, shadingUniformIndex}
fDraw(draw)87         , fDraw(draw) {
88     }
89 
operator <(const SortKey& k) const90     bool operator<(const SortKey& k) const {
91         uint64_t k1 = this->pipelineKey();
92         uint64_t k2 = k.pipelineKey();
93         return k1 < k2 || (k1 == k2 && this->uniformKey() < k.uniformKey());
94     }
95 
draw() const96     const DrawList::Draw* draw() const { return fDraw; }
pipeline() const97     uint32_t pipeline() const { return fPipelineKey.fPipeline; }
renderStep() const98     int renderStep() const { return static_cast<int>(fPipelineKey.fRenderStep); }
99 
geometryUniforms() const100     uint32_t geometryUniforms() const { return fUniformKey.fGeometryIndex; }
shadingUniforms() const101     uint32_t shadingUniforms() const { return fUniformKey.fShadingIndex; }
102 
103 private:
104     // Fields are ordered from most-significant to lowest when sorting by 128-bit value.
105     struct {
106         uint32_t fColorDepthOrder : 16; // sizeof(CompressedPaintersOrder)
107         uint32_t fStencilOrder    : 16; // sizeof(DisjointStencilIndex)
108         uint32_t fRenderStep      : 2;  // bits >= log2(Renderer::kMaxRenderSteps)
109         uint32_t fPipeline        : 30; // bits >= log2(max steps * DrawList::kMaxDraws)
110     } fPipelineKey; // NOTE: named for bit-punning, can't take address of a bit-field
111 
pipelineKey() const112     uint64_t pipelineKey() const { return sk_bit_cast<uint64_t>(fPipelineKey); }
113 
114     struct {
115         uint32_t fGeometryIndex; // bits >= log2(max steps * max draw count)
116         uint32_t fShadingIndex;  //  ""
117     } fUniformKey;
118 
uniformKey() const119     uint64_t uniformKey() const { return sk_bit_cast<uint64_t>(fUniformKey); }
120 
121     // Backpointer to the draw that produced the sort key
122     const DrawList::Draw* fDraw;
123 
124     static_assert(16 >= sizeof(CompressedPaintersOrder));
125     static_assert(16 >= sizeof(DisjointStencilIndex));
126     static_assert(2  >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
127     static_assert(30 >= SkNextLog2_portable(Renderer::kMaxRenderSteps * DrawList::kMaxDraws));
128 };
129 
130 ///////////////////////////////////////////////////////////////////////////////////////////////////
131 
132 namespace {
133 
lookup(skgpu::Recorder* recorder, uint32_t uniformID)134 skgpu::UniformData* lookup(skgpu::Recorder* recorder, uint32_t uniformID) {
135     // TODO: just return a raw 'UniformData*' here
136     sk_sp<skgpu::UniformData> tmp = recorder->uniformCache()->lookup(uniformID);
137     return tmp.get();
138 }
139 
140 } // anonymous namespace
141 
DrawPass(sk_sp<TextureProxy> target, const SkIRect& bounds, std::pair<LoadOp, StoreOp> ops, std::array<float, 4> clearColor, bool requiresStencil, bool requiresMSAA)142 DrawPass::DrawPass(sk_sp<TextureProxy> target, const SkIRect& bounds,
143                    std::pair<LoadOp, StoreOp> ops, std::array<float, 4> clearColor,
144                    bool requiresStencil, bool requiresMSAA)
145         : fTarget(std::move(target))
146         , fBounds(bounds)
147         , fOps(ops)
148         , fClearColor(clearColor)
149         , fRequiresStencil(requiresStencil)
150         , fRequiresMSAA(requiresMSAA) {}
151 
152 DrawPass::~DrawPass() = default;
153 
Make(Recorder* recorder, std::unique_ptr<DrawList> draws, sk_sp<TextureProxy> target, std::pair<LoadOp, StoreOp> ops, std::array<float, 4> clearColor, const BoundsManager* occlusionCuller)154 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
155                                          std::unique_ptr<DrawList> draws,
156                                          sk_sp<TextureProxy> target,
157                                          std::pair<LoadOp, StoreOp> ops,
158                                          std::array<float, 4> clearColor,
159                                          const BoundsManager* occlusionCuller) {
160     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
161     // its size should be done with care and good reason. The performance of sorting the keys is
162     // heavily tied to the total size.
163     //
164     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
165     // 16 bytes. There are several ways this could be done if necessary:
166     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
167     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
168     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
169     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
170     //  - Compact the Draw* to an index into the original collection, although that has extra
171     //    indirection and does not work as well with SkTBlockList.
172     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
173     // than an 8 byte key and unmodified pointer.
174     static_assert(sizeof(DrawPass::SortKey) == 16 + sizeof(void*));
175 
176     bool requiresStencil = false;
177     bool requiresMSAA = false;
178     Rect passBounds = Rect::InfiniteInverted();
179 
180     std::vector<SortKey> keys;
181     keys.reserve(draws->renderStepCount()); // will not exceed but may use less with occluded draws
182 
183     for (const DrawList::Draw& draw : draws->fDraws.items()) {
184         if (occlusionCuller && occlusionCuller->isOccluded(draw.fClip.drawBounds(),
185                                                            draw.fOrder.depth())) {
186             continue;
187         }
188 
189         // If we have two different descriptors, such that the uniforms from the PaintParams can be
190         // bound independently of those used by the rest of the RenderStep, then we can upload now
191         // and remember the location for re-use on any RenderStep that does shading.
192         uint32_t programID = ProgramCache::kInvalidProgramID;
193         uint32_t shadingUniformID = UniformData::kInvalidUniformID;
194         if (draw.fPaintParams.has_value()) {
195             std::tie(programID, shadingUniformID) = get_ids_from_paint(recorder,
196                                                                        draw.fPaintParams.value());
197         }
198 
199         for (int stepIndex = 0; stepIndex < draw.fRenderer.numRenderSteps(); ++stepIndex) {
200             const RenderStep* const step = draw.fRenderer.steps()[stepIndex];
201 
202             // TODO ask step to generate a pipeline description based on the above shading code, and
203             // have pipelineIndex point to that description in the accumulated list of descs
204             uint32_t pipelineIndex = 0;
205             // TODO step writes out geometry uniforms and have geomIndex point to that buffer data,
206             // providing shape, transform, scissor, and paint depth to RenderStep
207             uint32_t geometryIndex = 0;
208 
209             uint32_t shadingIndex = UniformData::kInvalidUniformID;
210 
211             const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
212             if (performsShading) {
213                 // TODO: we need to combine the 'programID' with the RenderPass info and the
214                 // geometric rendering method to get the true 'pipelineIndex'
215                 pipelineIndex = programID;
216                 shadingIndex = shadingUniformID;
217             } else {
218                 // TODO: fill in 'pipelineIndex' for Chris' stencil/depth draws
219             }
220 
221             keys.push_back({&draw, stepIndex, pipelineIndex, geometryIndex, shadingIndex});
222         }
223 
224         passBounds.join(draw.fClip.drawBounds());
225         requiresStencil |= draw.fRenderer.requiresStencil();
226         requiresMSAA |= draw.fRenderer.requiresMSAA();
227     }
228 
229     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
230     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
231     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
232     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
233     // vs. algorithms that require an extra O(n) storage.
234     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
235     // bugs in the DrawOrder determination code?
236     std::sort(keys.begin(), keys.end());
237 
238     DrawBufferManager* bufferMgr = recorder->drawBufferManager();
239 
240     uint32_t lastPipeline = 0;
241     uint32_t lastShadingUniforms = UniformData::kInvalidUniformID;
242     uint32_t lastGeometryUniforms = 0;
243     SkIRect lastScissor = SkIRect::MakeSize(target->dimensions());
244     Buffer* lastBoundVertexBuffer = nullptr;
245     Buffer* lastBoundIndexBuffer = nullptr;
246 
247     for (const SortKey& key : keys) {
248         const DrawList::Draw& draw = *key.draw();
249         int renderStep = key.renderStep();
250 
251         size_t vertexSize = draw.requiredVertexSpace(renderStep);
252         size_t indexSize = draw.requiredIndexSpace(renderStep);
253         auto [vertexWriter, vertexInfo] = bufferMgr->getVertexWriter(vertexSize);
254         auto [indexWriter, indexInfo] = bufferMgr->getIndexWriter(indexSize);
255         // TODO: handle the case where we fail to get a vertex or index writer besides asserting
256         SkASSERT(!vertexSize || (vertexWriter && vertexInfo.fBuffer));
257         SkASSERT(!indexSize || (indexWriter && indexInfo.fBuffer));
258         draw.writeVertices(std::move(vertexWriter), std::move(indexWriter), renderStep);
259 
260         if (vertexSize) {
261             if (lastBoundVertexBuffer != vertexInfo.fBuffer) {
262                 // TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
263             }
264             // TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
265             // executes.
266         }
267         if (indexSize) {
268             if (lastBoundIndexBuffer != indexInfo.fBuffer) {
269                 // TODO: Record a vertex bind call that stores the vertexInfo.fBuffer.
270             }
271             // TODO: Store the vertexInfo.fOffset so the draw will know its vertex offset when it
272             // executes.
273         }
274 
275         // TODO: Have the render step write out vertices and figure out what draw call function and
276         // primitive type it uses. The vertex buffer binding/offset and draw params will be examined
277         // to determine if the active draw can be updated to include the new vertices, or if it has
278         // to be ended and a new one begun for this step. In addition to checking this state, must
279         // also check if pipeline, uniform, scissor etc. would require the active draw to end.
280         //
281         // const RenderStep* const step = draw.fRenderer.steps()[key.renderStep()];
282 
283         if (key.pipeline() != lastPipeline) {
284             // TODO: Look up pipeline description from key's index and record binding it
285             lastPipeline = key.pipeline();
286             lastShadingUniforms = UniformData::kInvalidUniformID;
287             lastGeometryUniforms = 0;
288         }
289         if (key.geometryUniforms() != lastGeometryUniforms) {
290             // TODO: Look up uniform buffer binding info corresponding to key's index and record it
291             lastGeometryUniforms = key.geometryUniforms();
292         }
293         if (key.shadingUniforms() != lastShadingUniforms) {
294             auto ud = lookup(recorder, key.shadingUniforms());
295 
296             auto [writer, bufferInfo] = bufferMgr->getUniformWriter(ud->dataSize());
297             writer.write(ud->data(), ud->dataSize());
298             // TODO: recording 'bufferInfo' somewhere to allow a later uniform bind call
299 
300             lastShadingUniforms = key.shadingUniforms();
301         }
302 
303         if (draw.fClip.scissor() != lastScissor) {
304             // TODO: Record new scissor rectangle
305         }
306 
307         // TODO: Write vertex and index data for the draw step
308     }
309 
310     // if (currentDraw) {
311         // TODO: End the current draw if it has pending vertices
312     // }
313 
314     passBounds.roundOut();
315     SkIRect pxPassBounds = SkIRect::MakeLTRB((int) passBounds.left(), (int) passBounds.top(),
316                                              (int) passBounds.right(), (int) passBounds.bot());
317     return std::unique_ptr<DrawPass>(new DrawPass(std::move(target), pxPassBounds, ops, clearColor,
318                                                   requiresStencil, requiresMSAA));
319 }
320 
addCommands(CommandBuffer* buffer) const321 void DrawPass::addCommands(CommandBuffer* buffer) const {
322     // TODO
323 }
324 
325 } // namespace skgpu
326