1#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2019 Google Inc.  All rights reserved.
4// https://github.com/protocolbuffers/protobuf
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32
33using BenchmarkDotNet.Attributes;
34using System;
35using System.Buffers.Binary;
36using System.Collections.Generic;
37using System.IO;
38using System.Buffers;
39
40namespace Google.Protobuf.Benchmarks
41{
42    /// <summary>
43    /// Benchmarks throughput when parsing raw primitives.
44    /// </summary>
45    [MemoryDiagnoser]
46    public class ParseRawPrimitivesBenchmark
47    {
48        // key is the encodedSize of varint values
49        Dictionary<int, byte[]> varintInputBuffers;
50
51        byte[] doubleInputBuffer;
52        byte[] floatInputBuffer;
53        byte[] fixedIntInputBuffer;
54
55        // key is the encodedSize of string values
56        Dictionary<int, byte[]> stringInputBuffers;
57        Dictionary<int, ReadOnlySequence<byte>> stringInputBuffersSegmented;
58
59        Random random = new Random(417384220);  // random but deterministic seed
60
61        public IEnumerable<int> StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 };
62        public IEnumerable<int> StringSegmentedEncodedSizes => new[] { 105, 10080 };
63
64        [GlobalSetup]
65        public void GlobalSetup()
66        {
67            // add some extra values that we won't read just to make sure we are far enough from the end of the buffer
68            // which allows the parser fastpath to always kick in.
69            const int paddingValueCount = 100;
70
71            varintInputBuffers = new Dictionary<int, byte[]>();
72            for (int encodedSize = 1; encodedSize <= 10; encodedSize++)
73            {
74                byte[] buffer = CreateBufferWithRandomVarints(random, BytesToParse / encodedSize, encodedSize, paddingValueCount);
75                varintInputBuffers.Add(encodedSize, buffer);
76            }
77
78            doubleInputBuffer = CreateBufferWithRandomDoubles(random, BytesToParse / sizeof(double), paddingValueCount);
79            floatInputBuffer = CreateBufferWithRandomFloats(random, BytesToParse / sizeof(float), paddingValueCount);
80            fixedIntInputBuffer = CreateBufferWithRandomData(random, BytesToParse / sizeof(long), sizeof(long), paddingValueCount);
81
82            stringInputBuffers = new Dictionary<int, byte[]>();
83            foreach (var encodedSize in StringEncodedSizes)
84            {
85                byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1 );
86                stringInputBuffers.Add(encodedSize, buffer);
87            }
88
89            stringInputBuffersSegmented = new Dictionary<int, ReadOnlySequence<byte>>();
90            foreach (var encodedSize in StringSegmentedEncodedSizes)
91            {
92                byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1);
93                stringInputBuffersSegmented.Add(encodedSize, ReadOnlySequenceFactory.CreateWithContent(buffer, segmentSize: 128, addEmptySegmentDelimiters: false));
94            }
95        }
96
97        // Total number of bytes that each benchmark will parse.
98        // Measuring the time taken to parse buffer of given size makes it easier to compare parsing speed for different
99        // types and makes it easy to calculate the througput (in MB/s)
100        // 10800 bytes is chosen because it is divisible by all possible encoded sizes for all primitive types {1..10}
101        [Params(10080)]
102        public int BytesToParse { get; set; }
103
104        [Benchmark]
105        [Arguments(1)]
106        [Arguments(2)]
107        [Arguments(3)]
108        [Arguments(4)]
109        [Arguments(5)]
110        public int ParseRawVarint32_CodedInputStream(int encodedSize)
111        {
112            CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
113            int sum = 0;
114            for (int i = 0; i < BytesToParse / encodedSize; i++)
115            {
116                sum += cis.ReadInt32();
117            }
118            return sum;
119        }
120
121        [Benchmark]
122        [Arguments(1)]
123        [Arguments(2)]
124        [Arguments(3)]
125        [Arguments(4)]
126        [Arguments(5)]
127        public int ParseRawVarint32_ParseContext(int encodedSize)
128        {
129            InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
130            int sum = 0;
131            for (int i = 0; i < BytesToParse / encodedSize; i++)
132            {
133                sum += ctx.ReadInt32();
134            }
135            return sum;
136        }
137
138        [Benchmark]
139        [Arguments(1)]
140        [Arguments(2)]
141        [Arguments(3)]
142        [Arguments(4)]
143        [Arguments(5)]
144        [Arguments(6)]
145        [Arguments(7)]
146        [Arguments(8)]
147        [Arguments(9)]
148        [Arguments(10)]
149        public long ParseRawVarint64_CodedInputStream(int encodedSize)
150        {
151            CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
152            long sum = 0;
153            for (int i = 0; i < BytesToParse / encodedSize; i++)
154            {
155                sum += cis.ReadInt64();
156            }
157            return sum;
158        }
159
160        [Benchmark]
161        [Arguments(1)]
162        [Arguments(2)]
163        [Arguments(3)]
164        [Arguments(4)]
165        [Arguments(5)]
166        [Arguments(6)]
167        [Arguments(7)]
168        [Arguments(8)]
169        [Arguments(9)]
170        [Arguments(10)]
171        public long ParseRawVarint64_ParseContext(int encodedSize)
172        {
173            InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
174            long sum = 0;
175            for (int i = 0; i < BytesToParse / encodedSize; i++)
176            {
177                sum += ctx.ReadInt64();
178            }
179            return sum;
180        }
181
182        [Benchmark]
183        public uint ParseFixed32_CodedInputStream()
184        {
185            const int encodedSize = sizeof(uint);
186            CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
187            uint sum = 0;
188            for (uint i = 0; i < BytesToParse / encodedSize; i++)
189            {
190                sum += cis.ReadFixed32();
191            }
192            return sum;
193        }
194
195        [Benchmark]
196        public uint ParseFixed32_ParseContext()
197        {
198            const int encodedSize = sizeof(uint);
199            InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
200            uint sum = 0;
201            for (uint i = 0; i < BytesToParse / encodedSize; i++)
202            {
203                sum += ctx.ReadFixed32();
204            }
205            return sum;
206        }
207
208        [Benchmark]
209        public ulong ParseFixed64_CodedInputStream()
210        {
211            const int encodedSize = sizeof(ulong);
212            CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
213            ulong sum = 0;
214            for (int i = 0; i < BytesToParse / encodedSize; i++)
215            {
216                sum += cis.ReadFixed64();
217            }
218            return sum;
219        }
220
221        [Benchmark]
222        public ulong ParseFixed64_ParseContext()
223        {
224            const int encodedSize = sizeof(ulong);
225            InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
226            ulong sum = 0;
227            for (int i = 0; i < BytesToParse / encodedSize; i++)
228            {
229                sum += ctx.ReadFixed64();
230            }
231            return sum;
232        }
233
234        [Benchmark]
235        public float ParseRawFloat_CodedInputStream()
236        {
237            const int encodedSize = sizeof(float);
238            CodedInputStream cis = new CodedInputStream(floatInputBuffer);
239            float sum = 0;
240            for (int i = 0; i < BytesToParse / encodedSize; i++)
241            {
242               sum += cis.ReadFloat();
243            }
244            return sum;
245        }
246
247        [Benchmark]
248        public float ParseRawFloat_ParseContext()
249        {
250            const int encodedSize = sizeof(float);
251            InitializeParseContext(floatInputBuffer, out ParseContext ctx);
252            float sum = 0;
253            for (int i = 0; i < BytesToParse / encodedSize; i++)
254            {
255               sum += ctx.ReadFloat();
256            }
257            return sum;
258        }
259
260        [Benchmark]
261        public double ParseRawDouble_CodedInputStream()
262        {
263            const int encodedSize = sizeof(double);
264            CodedInputStream cis = new CodedInputStream(doubleInputBuffer);
265            double sum = 0;
266            for (int i = 0; i < BytesToParse / encodedSize; i++)
267            {
268                sum += cis.ReadDouble();
269            }
270            return sum;
271        }
272
273        [Benchmark]
274        public double ParseRawDouble_ParseContext()
275        {
276            const int encodedSize = sizeof(double);
277            InitializeParseContext(doubleInputBuffer, out ParseContext ctx);
278            double sum = 0;
279            for (int i = 0; i < BytesToParse / encodedSize; i++)
280            {
281                sum += ctx.ReadDouble();
282            }
283            return sum;
284        }
285
286        [Benchmark]
287        [ArgumentsSource(nameof(StringEncodedSizes))]
288        public int ParseString_CodedInputStream(int encodedSize)
289        {
290            CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
291            int sum = 0;
292            for (int i = 0; i < BytesToParse / encodedSize; i++)
293            {
294                sum += cis.ReadString().Length;
295            }
296            return sum;
297        }
298
299        [Benchmark]
300        [ArgumentsSource(nameof(StringEncodedSizes))]
301        public int ParseString_ParseContext(int encodedSize)
302        {
303            InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
304            int sum = 0;
305            for (int i = 0; i < BytesToParse / encodedSize; i++)
306            {
307                sum += ctx.ReadString().Length;
308            }
309            return sum;
310        }
311
312        [Benchmark]
313        [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
314        public int ParseString_ParseContext_MultipleSegments(int encodedSize)
315        {
316            InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
317            int sum = 0;
318            for (int i = 0; i < BytesToParse / encodedSize; i++)
319            {
320                sum += ctx.ReadString().Length;
321            }
322            return sum;
323        }
324
325        [Benchmark]
326        [ArgumentsSource(nameof(StringEncodedSizes))]
327        public int ParseBytes_CodedInputStream(int encodedSize)
328        {
329            CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
330            int sum = 0;
331            for (int i = 0; i < BytesToParse / encodedSize; i++)
332            {
333                sum += cis.ReadBytes().Length;
334            }
335            return sum;
336        }
337
338        [Benchmark]
339        [ArgumentsSource(nameof(StringEncodedSizes))]
340        public int ParseBytes_ParseContext(int encodedSize)
341        {
342            InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
343            int sum = 0;
344            for (int i = 0; i < BytesToParse / encodedSize; i++)
345            {
346                sum += ctx.ReadBytes().Length;
347            }
348            return sum;
349        }
350
351        [Benchmark]
352        [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
353        public int ParseBytes_ParseContext_MultipleSegments(int encodedSize)
354        {
355            InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
356            int sum = 0;
357            for (int i = 0; i < BytesToParse / encodedSize; i++)
358            {
359                sum += ctx.ReadBytes().Length;
360            }
361            return sum;
362        }
363
364        private static void InitializeParseContext(byte[] buffer, out ParseContext ctx)
365        {
366            ParseContext.Initialize(new ReadOnlySequence<byte>(buffer), out ctx);
367        }
368
369        private static void InitializeParseContext(ReadOnlySequence<byte> buffer, out ParseContext ctx)
370        {
371            ParseContext.Initialize(buffer, out ctx);
372        }
373
374        private static byte[] CreateBufferWithRandomVarints(Random random, int valueCount, int encodedSize, int paddingValueCount)
375        {
376            MemoryStream ms = new MemoryStream();
377            CodedOutputStream cos = new CodedOutputStream(ms);
378            for (int i = 0; i < valueCount + paddingValueCount; i++)
379            {
380                cos.WriteUInt64(RandomUnsignedVarint(random, encodedSize, false));
381            }
382            cos.Flush();
383            var buffer = ms.ToArray();
384
385            if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
386            {
387                throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
388            }
389            return buffer;
390        }
391
392        private static byte[] CreateBufferWithRandomFloats(Random random, int valueCount, int paddingValueCount)
393        {
394            MemoryStream ms = new MemoryStream();
395            CodedOutputStream cos = new CodedOutputStream(ms);
396            for (int i = 0; i < valueCount + paddingValueCount; i++)
397            {
398                cos.WriteFloat((float)random.NextDouble());
399            }
400            cos.Flush();
401            var buffer = ms.ToArray();
402            return buffer;
403        }
404
405        private static byte[] CreateBufferWithRandomDoubles(Random random, int valueCount, int paddingValueCount)
406        {
407            MemoryStream ms = new MemoryStream();
408            CodedOutputStream cos = new CodedOutputStream(ms);
409            for (int i = 0; i < valueCount + paddingValueCount; i++)
410            {
411                cos.WriteDouble(random.NextDouble());
412            }
413            cos.Flush();
414            var buffer = ms.ToArray();
415            return buffer;
416        }
417
418        private static byte[] CreateBufferWithRandomData(Random random, int valueCount, int encodedSize, int paddingValueCount)
419        {
420            int bufferSize = (valueCount + paddingValueCount) * encodedSize;
421            byte[] buffer = new byte[bufferSize];
422            random.NextBytes(buffer);
423            return buffer;
424        }
425
426        /// <summary>
427        /// Generate a random value that will take exactly "encodedSize" bytes when varint-encoded.
428        /// </summary>
429        public static ulong RandomUnsignedVarint(Random random, int encodedSize, bool fitsIn32Bits)
430        {
431            Span<byte> randomBytesBuffer = stackalloc byte[8];
432
433            if (encodedSize < 1 || encodedSize > 10 || (fitsIn32Bits && encodedSize > 5))
434            {
435                throw new ArgumentException("Illegal encodedSize value requested", nameof(encodedSize));
436            }
437            const int bitsPerByte = 7;
438
439            ulong result = 0;
440            while (true)
441            {
442                random.NextBytes(randomBytesBuffer);
443                ulong randomValue = BinaryPrimitives.ReadUInt64LittleEndian(randomBytesBuffer);
444
445                // only use the number of random bits we need
446                ulong bitmask = encodedSize < 10 ? ((1UL << (encodedSize * bitsPerByte)) - 1) : ulong.MaxValue;
447                result = randomValue & bitmask;
448
449                if (fitsIn32Bits)
450                {
451                    // make sure the resulting value is representable by a uint.
452                    result &= uint.MaxValue;
453                }
454
455                if (encodedSize == 10)
456                {
457                    // for 10-byte values the highest bit always needs to be set (7*9=63)
458                    result |= ulong.MaxValue;
459                    break;
460                }
461
462                // some random values won't require the full "encodedSize" bytes, check that at least
463                // one of the top 7 bits is set. Retrying is fine since it only happens rarely
464                if (encodedSize == 1 || (result & (0x7FUL << ((encodedSize - 1) * bitsPerByte))) != 0)
465                {
466                    break;
467                }
468            }
469            return result;
470        }
471
472        private static byte[] CreateBufferWithStrings(int valueCount, int encodedSize, int paddingValueCount)
473        {
474            var str = CreateStringWithEncodedSize(encodedSize);
475
476            MemoryStream ms = new MemoryStream();
477            CodedOutputStream cos = new CodedOutputStream(ms);
478            for (int i = 0; i < valueCount + paddingValueCount; i++)
479            {
480                cos.WriteString(str);
481            }
482            cos.Flush();
483            var buffer = ms.ToArray();
484
485            if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
486            {
487                throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
488            }
489            return buffer;
490        }
491
492        public static string CreateStringWithEncodedSize(int encodedSize)
493        {
494            var str = new string('a', encodedSize);
495            while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
496            {
497                str = str.Substring(1);
498            }
499
500            if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
501            {
502                throw new InvalidOperationException($"Generated string with wrong encodedSize");
503            }
504            return str;
505        }
506
507        public static string CreateNonAsciiStringWithEncodedSize(int encodedSize)
508        {
509            if (encodedSize < 3)
510            {
511                throw new ArgumentException("Illegal encoded size for a string with non-ascii chars.");
512            }
513            var twoByteChar = '\u00DC';  // U-umlaut, UTF8 encoding has 2 bytes
514            var str = new string(twoByteChar, encodedSize / 2);
515            while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
516            {
517                str = str.Substring(1);
518            }
519
520            // add padding of ascii characters to reach the desired encoded size.
521            while (CodedOutputStream.ComputeStringSize(str) < encodedSize)
522            {
523                str += 'a';
524            }
525
526            // Note that for a few specific encodedSize values, it might be impossible to generate
527            // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that
528            // the encoded size we got is actually correct is good enough.
529            if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
530            {
531                throw new InvalidOperationException($"Generated string with wrong encodedSize");
532            }
533            return str;
534        }
535    }
536}
537