1/* Copyright 2015 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Function for fast encoding of an input fragment, independently from the input
8   history. This function uses one-pass processing: when we find a backward
9   match, we immediately emit the corresponding command and literal codes to
10   the bit stream.
11
12   Adapted from the CompressFragment() function in
13   https://github.com/google/snappy/blob/master/snappy.cc */
14
15#include "./compress_fragment.h"
16
17#include <string.h>  /* memcmp, memcpy, memset */
18
19#include "../common/constants.h"
20#include "../common/platform.h"
21#include <brotli/types.h>
22#include "./brotli_bit_stream.h"
23#include "./entropy_encode.h"
24#include "./fast_log.h"
25#include "./find_match_length.h"
26#include "./memory.h"
27#include "./write_bits.h"
28
29#if defined(__cplusplus) || defined(c_plusplus)
30extern "C" {
31#endif
32
33#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
34
35/* kHashMul32 multiplier has these properties:
36   * The multiplier must be odd. Otherwise we may lose the highest bit.
37   * No long streaks of ones or zeros.
38   * There is no effort to ensure that it is a prime, the oddity is enough
39     for this use.
40   * The number has been tuned heuristically against compression benchmarks. */
41static const uint32_t kHashMul32 = 0x1E35A7BD;
42
43static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
44  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
45  return (uint32_t)(h >> shift);
46}
47
48static BROTLI_INLINE uint32_t HashBytesAtOffset(
49    uint64_t v, int offset, size_t shift) {
50  BROTLI_DCHECK(offset >= 0);
51  BROTLI_DCHECK(offset <= 3);
52  {
53    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
54    return (uint32_t)(h >> shift);
55  }
56}
57
58static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
59  return TO_BROTLI_BOOL(
60      BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
61      p1[4] == p2[4]);
62}
63
64/* Builds a literal prefix code into "depths" and "bits" based on the statistics
65   of the "input" string and stores it into the bit stream.
66   Note that the prefix code here is built from the pre-LZ77 input, therefore
67   we can only approximate the statistics of the actual literal stream.
68   Moreover, for long inputs we build a histogram from a sample of the input
69   and thus have to assign a non-zero depth for each literal.
70   Returns estimated compression ratio millibytes/char for encoding given input
71   with generated code. */
72static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
73                                             const uint8_t* input,
74                                             const size_t input_size,
75                                             uint8_t depths[256],
76                                             uint16_t bits[256],
77                                             size_t* storage_ix,
78                                             uint8_t* storage) {
79  uint32_t histogram[256] = { 0 };
80  size_t histogram_total;
81  size_t i;
82  if (input_size < (1 << 15)) {
83    for (i = 0; i < input_size; ++i) {
84      ++histogram[input[i]];
85    }
86    histogram_total = input_size;
87    for (i = 0; i < 256; ++i) {
88      /* We weigh the first 11 samples with weight 3 to account for the
89         balancing effect of the LZ77 phase on the histogram. */
90      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
91      histogram[i] += adjust;
92      histogram_total += adjust;
93    }
94  } else {
95    static const size_t kSampleRate = 29;
96    for (i = 0; i < input_size; i += kSampleRate) {
97      ++histogram[input[i]];
98    }
99    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
100    for (i = 0; i < 256; ++i) {
101      /* We add 1 to each population count to avoid 0 bit depths (since this is
102         only a sample and we don't know if the symbol appears or not), and we
103         weigh the first 11 samples with weight 3 to account for the balancing
104         effect of the LZ77 phase on the histogram (more frequent symbols are
105         more likely to be in backward references instead as literals). */
106      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
107      histogram[i] += adjust;
108      histogram_total += adjust;
109    }
110  }
111  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
112                                     /* max_bits = */ 8,
113                                     depths, bits, storage_ix, storage);
114  if (BROTLI_IS_OOM(m)) return 0;
115  {
116    size_t literal_ratio = 0;
117    for (i = 0; i < 256; ++i) {
118      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119    }
120    /* Estimated encoding ratio, millibytes per symbol. */
121    return (literal_ratio * 125) / histogram_total;
122  }
123}
124
125/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126   "bits" based on "histogram" and stores it into the bit stream. */
127static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
128    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
129    uint8_t* storage) {
130  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
131  HuffmanTree tree[129];
132  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
133  uint16_t cmd_bits[64];
134
135  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
136  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
137  /* We have to jump through a few hoops here in order to compute
138     the command bits because the symbols are in a different order than in
139     the full alphabet. This looks complicated, but having the symbols
140     in this order in the command bits saves a few branches in the Emit*
141     functions. */
142  memcpy(cmd_depth, depth, 24);
143  memcpy(cmd_depth + 24, depth + 40, 8);
144  memcpy(cmd_depth + 32, depth + 24, 8);
145  memcpy(cmd_depth + 40, depth + 48, 8);
146  memcpy(cmd_depth + 48, depth + 32, 8);
147  memcpy(cmd_depth + 56, depth + 56, 8);
148  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
149  memcpy(bits, cmd_bits, 48);
150  memcpy(bits + 24, cmd_bits + 32, 16);
151  memcpy(bits + 32, cmd_bits + 48, 16);
152  memcpy(bits + 40, cmd_bits + 24, 16);
153  memcpy(bits + 48, cmd_bits + 40, 16);
154  memcpy(bits + 56, cmd_bits + 56, 16);
155  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
156  {
157    /* Create the bit length array for the full command alphabet. */
158    size_t i;
159    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
160    memcpy(cmd_depth, depth, 8);
161    memcpy(cmd_depth + 64, depth + 8, 8);
162    memcpy(cmd_depth + 128, depth + 16, 8);
163    memcpy(cmd_depth + 192, depth + 24, 8);
164    memcpy(cmd_depth + 384, depth + 32, 8);
165    for (i = 0; i < 8; ++i) {
166      cmd_depth[128 + 8 * i] = depth[40 + i];
167      cmd_depth[256 + 8 * i] = depth[48 + i];
168      cmd_depth[448 + 8 * i] = depth[56 + i];
169    }
170    BrotliStoreHuffmanTree(
171        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
172  }
173  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
174}
175
176/* REQUIRES: insertlen < 6210 */
177static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
178                                        const uint8_t depth[128],
179                                        const uint16_t bits[128],
180                                        uint32_t histo[128],
181                                        size_t* storage_ix,
182                                        uint8_t* storage) {
183  if (insertlen < 6) {
184    const size_t code = insertlen + 40;
185    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
186    ++histo[code];
187  } else if (insertlen < 130) {
188    const size_t tail = insertlen - 2;
189    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
190    const size_t prefix = tail >> nbits;
191    const size_t inscode = (nbits << 1) + prefix + 42;
192    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
193    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
194    ++histo[inscode];
195  } else if (insertlen < 2114) {
196    const size_t tail = insertlen - 66;
197    const uint32_t nbits = Log2FloorNonZero(tail);
198    const size_t code = nbits + 50;
199    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
200    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
201    ++histo[code];
202  } else {
203    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
204    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
205    ++histo[61];
206  }
207}
208
209static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
210                                            const uint8_t depth[128],
211                                            const uint16_t bits[128],
212                                            uint32_t histo[128],
213                                            size_t* storage_ix,
214                                            uint8_t* storage) {
215  if (insertlen < 22594) {
216    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
217    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
218    ++histo[62];
219  } else {
220    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
221    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
222    ++histo[63];
223  }
224}
225
226static BROTLI_INLINE void EmitCopyLen(size_t copylen,
227                                      const uint8_t depth[128],
228                                      const uint16_t bits[128],
229                                      uint32_t histo[128],
230                                      size_t* storage_ix,
231                                      uint8_t* storage) {
232  if (copylen < 10) {
233    BrotliWriteBits(
234        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
235    ++histo[copylen + 14];
236  } else if (copylen < 134) {
237    const size_t tail = copylen - 6;
238    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
239    const size_t prefix = tail >> nbits;
240    const size_t code = (nbits << 1) + prefix + 20;
241    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
242    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
243    ++histo[code];
244  } else if (copylen < 2118) {
245    const size_t tail = copylen - 70;
246    const uint32_t nbits = Log2FloorNonZero(tail);
247    const size_t code = nbits + 28;
248    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
249    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
250    ++histo[code];
251  } else {
252    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
253    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
254    ++histo[39];
255  }
256}
257
258static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
259                                                  const uint8_t depth[128],
260                                                  const uint16_t bits[128],
261                                                  uint32_t histo[128],
262                                                  size_t* storage_ix,
263                                                  uint8_t* storage) {
264  if (copylen < 12) {
265    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
266    ++histo[copylen - 4];
267  } else if (copylen < 72) {
268    const size_t tail = copylen - 8;
269    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
270    const size_t prefix = tail >> nbits;
271    const size_t code = (nbits << 1) + prefix + 4;
272    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
273    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
274    ++histo[code];
275  } else if (copylen < 136) {
276    const size_t tail = copylen - 8;
277    const size_t code = (tail >> 5) + 30;
278    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
279    BrotliWriteBits(5, tail & 31, storage_ix, storage);
280    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
281    ++histo[code];
282    ++histo[64];
283  } else if (copylen < 2120) {
284    const size_t tail = copylen - 72;
285    const uint32_t nbits = Log2FloorNonZero(tail);
286    const size_t code = nbits + 28;
287    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
288    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
289    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
290    ++histo[code];
291    ++histo[64];
292  } else {
293    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
294    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
295    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
296    ++histo[39];
297    ++histo[64];
298  }
299}
300
301static BROTLI_INLINE void EmitDistance(size_t distance,
302                                       const uint8_t depth[128],
303                                       const uint16_t bits[128],
304                                       uint32_t histo[128],
305                                       size_t* storage_ix, uint8_t* storage) {
306  const size_t d = distance + 3;
307  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
308  const size_t prefix = (d >> nbits) & 1;
309  const size_t offset = (2 + prefix) << nbits;
310  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
311  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
312  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
313  ++histo[distcode];
314}
315
316static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
317                                       const uint8_t depth[256],
318                                       const uint16_t bits[256],
319                                       size_t* storage_ix, uint8_t* storage) {
320  size_t j;
321  for (j = 0; j < len; j++) {
322    const uint8_t lit = input[j];
323    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
324  }
325}
326
327/* REQUIRES: len <= 1 << 24. */
328static void BrotliStoreMetaBlockHeader(
329    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
330    uint8_t* storage) {
331  size_t nibbles = 6;
332  /* ISLAST */
333  BrotliWriteBits(1, 0, storage_ix, storage);
334  if (len <= (1U << 16)) {
335    nibbles = 4;
336  } else if (len <= (1U << 20)) {
337    nibbles = 5;
338  }
339  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
340  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
341  /* ISUNCOMPRESSED */
342  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
343}
344
345static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
346    uint8_t* array) {
347  while (n_bits > 0) {
348    size_t byte_pos = pos >> 3;
349    size_t n_unchanged_bits = pos & 7;
350    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
351    size_t total_bits = n_unchanged_bits + n_changed_bits;
352    uint32_t mask =
353        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
354    uint32_t unchanged_bits = array[byte_pos] & mask;
355    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
356    array[byte_pos] =
357        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
358    n_bits -= n_changed_bits;
359    bits >>= n_changed_bits;
360    pos += n_changed_bits;
361  }
362}
363
364static void RewindBitPosition(const size_t new_storage_ix,
365                              size_t* storage_ix, uint8_t* storage) {
366  const size_t bitpos = new_storage_ix & 7;
367  const size_t mask = (1u << bitpos) - 1;
368  storage[new_storage_ix >> 3] &= (uint8_t)mask;
369  *storage_ix = new_storage_ix;
370}
371
372static BROTLI_BOOL ShouldMergeBlock(
373    const uint8_t* data, size_t len, const uint8_t* depths) {
374  size_t histo[256] = { 0 };
375  static const size_t kSampleRate = 43;
376  size_t i;
377  for (i = 0; i < len; i += kSampleRate) {
378    ++histo[data[i]];
379  }
380  {
381    const size_t total = (len + kSampleRate - 1) / kSampleRate;
382    double r = (FastLog2(total) + 0.5) * (double)total + 200;
383    for (i = 0; i < 256; ++i) {
384      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
385    }
386    return TO_BROTLI_BOOL(r >= 0.0);
387  }
388}
389
390/* Acceptable loss for uncompressible speedup is 2% */
391#define MIN_RATIO 980
392
393static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
394    const uint8_t* metablock_start, const uint8_t* next_emit,
395    const size_t insertlen, const size_t literal_ratio) {
396  const size_t compressed = (size_t)(next_emit - metablock_start);
397  if (compressed * 50 > insertlen) {
398    return BROTLI_FALSE;
399  } else {
400    return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
401  }
402}
403
404static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
405                                      const size_t storage_ix_start,
406                                      size_t* storage_ix, uint8_t* storage) {
407  const size_t len = (size_t)(end - begin);
408  RewindBitPosition(storage_ix_start, storage_ix, storage);
409  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
410  *storage_ix = (*storage_ix + 7u) & ~7u;
411  memcpy(&storage[*storage_ix >> 3], begin, len);
412  *storage_ix += len << 3;
413  storage[*storage_ix >> 3] = 0;
414}
415
416static uint32_t kCmdHistoSeed[128] = {
417  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
418  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
419  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
420  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
421  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
422  1, 1, 1, 1, 0, 0, 0, 0,
423};
424
425static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
426    MemoryManager* m, const uint8_t* input, size_t input_size,
427    BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
428    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
429    size_t* storage_ix, uint8_t* storage) {
430  uint32_t cmd_histo[128];
431  const uint8_t* ip_end;
432
433  /* "next_emit" is a pointer to the first byte that is not covered by a
434     previous copy. Bytes between "next_emit" and the start of the next copy or
435     the end of the input will be emitted as literal bytes. */
436  const uint8_t* next_emit = input;
437  /* Save the start of the first block for position and distance computations.
438  */
439  const uint8_t* base_ip = input;
440
441  static const size_t kFirstBlockSize = 3 << 15;
442  static const size_t kMergeBlockSize = 1 << 16;
443
444  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
445  const size_t kMinMatchLen = 5;
446
447  const uint8_t* metablock_start = input;
448  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
449  size_t total_block_size = block_size;
450  /* Save the bit position of the MLEN field of the meta-block header, so that
451     we can update it later if we decide to extend this meta-block. */
452  size_t mlen_storage_ix = *storage_ix + 3;
453
454  uint8_t lit_depth[256];
455  uint16_t lit_bits[256];
456
457  size_t literal_ratio;
458
459  const uint8_t* ip;
460  int last_distance;
461
462  const size_t shift = 64u - table_bits;
463
464  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
465  /* No block splits, no contexts. */
466  BrotliWriteBits(13, 0, storage_ix, storage);
467
468  literal_ratio = BuildAndStoreLiteralPrefixCode(
469      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
470  if (BROTLI_IS_OOM(m)) return;
471
472  {
473    /* Store the pre-compressed command and distance prefix codes. */
474    size_t i;
475    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
476      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
477    }
478  }
479  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
480                  storage_ix, storage);
481
482 emit_commands:
483  /* Initialize the command and distance histograms. We will gather
484     statistics of command and distance codes during the processing
485     of this block and use it to update the command and distance
486     prefix codes for the next block. */
487  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
488
489  /* "ip" is the input pointer. */
490  ip = input;
491  last_distance = -1;
492  ip_end = input + block_size;
493
494  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
495    /* For the last block, we need to keep a 16 bytes margin so that we can be
496       sure that all distances are at most window size - 16.
497       For all other blocks, we only need to keep a margin of 5 bytes so that
498       we don't go over the block size with a copy. */
499    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
500                                        input_size - kInputMarginBytes);
501    const uint8_t* ip_limit = input + len_limit;
502
503    uint32_t next_hash;
504    for (next_hash = Hash(++ip, shift); ; ) {
505      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
506         If we get close to exhausting the input then goto emit_remainder.
507
508         Heuristic match skipping: If 32 bytes are scanned with no matches
509         found, start looking only at every other byte. If 32 more bytes are
510         scanned, look at every third byte, etc.. When a match is found,
511         immediately go back to looking at every byte. This is a small loss
512         (~5% performance, ~0.1% density) for compressible data due to more
513         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
514         win since the compressor quickly "realizes" the data is incompressible
515         and doesn't bother looking for matches everywhere.
516
517         The "skip" variable keeps track of how many bytes there are since the
518         last match; dividing it by 32 (i.e. right-shifting by five) gives the
519         number of bytes to move ahead for each iteration. */
520      uint32_t skip = 32;
521
522      const uint8_t* next_ip = ip;
523      const uint8_t* candidate;
524      BROTLI_DCHECK(next_emit < ip);
525trawl:
526      do {
527        uint32_t hash = next_hash;
528        uint32_t bytes_between_hash_lookups = skip++ >> 5;
529        BROTLI_DCHECK(hash == Hash(next_ip, shift));
530        ip = next_ip;
531        next_ip = ip + bytes_between_hash_lookups;
532        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
533          goto emit_remainder;
534        }
535        next_hash = Hash(next_ip, shift);
536        candidate = ip - last_distance;
537        if (IsMatch(ip, candidate)) {
538          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
539            table[hash] = (int)(ip - base_ip);
540            break;
541          }
542        }
543        candidate = base_ip + table[hash];
544        BROTLI_DCHECK(candidate >= base_ip);
545        BROTLI_DCHECK(candidate < ip);
546
547        table[hash] = (int)(ip - base_ip);
548      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
549
550      /* Check copy distance. If candidate is not feasible, continue search.
551         Checking is done outside of hot loop to reduce overhead. */
552      if (ip - candidate > MAX_DISTANCE) goto trawl;
553
554      /* Step 2: Emit the found match together with the literal bytes from
555         "next_emit" to the bit stream, and then see if we can find a next match
556         immediately afterwards. Repeat until we find no match for the input
557         without emitting some literal bytes. */
558
559      {
560        /* We have a 5-byte match at ip, and we need to emit bytes in
561           [next_emit, ip). */
562        const uint8_t* base = ip;
563        size_t matched = 5 + FindMatchLengthWithLimit(
564            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
565        int distance = (int)(base - candidate);  /* > 0 */
566        size_t insert = (size_t)(base - next_emit);
567        ip += matched;
568        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
569        if (BROTLI_PREDICT_TRUE(insert < 6210)) {
570          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
571                        storage_ix, storage);
572        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
573                                             literal_ratio)) {
574          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
575                                    storage_ix, storage);
576          input_size -= (size_t)(base - input);
577          input = base;
578          next_emit = input;
579          goto next_block;
580        } else {
581          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582                            storage_ix, storage);
583        }
584        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
585                     storage_ix, storage);
586        if (distance == last_distance) {
587          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
588          ++cmd_histo[64];
589        } else {
590          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
591                       cmd_histo, storage_ix, storage);
592          last_distance = distance;
593        }
594        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
595                                storage_ix, storage);
596
597        next_emit = ip;
598        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
599          goto emit_remainder;
600        }
601        /* We could immediately start working at ip now, but to improve
602           compression we first update "table" with the hashes of some positions
603           within the last copy. */
604        {
605          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
606          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
607          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
608          table[prev_hash] = (int)(ip - base_ip - 3);
609          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
610          table[prev_hash] = (int)(ip - base_ip - 2);
611          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
612          table[prev_hash] = (int)(ip - base_ip - 1);
613
614          candidate = base_ip + table[cur_hash];
615          table[cur_hash] = (int)(ip - base_ip);
616        }
617      }
618
619      while (IsMatch(ip, candidate)) {
620        /* We have a 5-byte match at ip, and no need to emit any literal bytes
621           prior to ip. */
622        const uint8_t* base = ip;
623        size_t matched = 5 + FindMatchLengthWithLimit(
624            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
625        if (ip - candidate > MAX_DISTANCE) break;
626        ip += matched;
627        last_distance = (int)(base - candidate);  /* > 0 */
628        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
629        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
630                    storage_ix, storage);
631        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
632                     cmd_histo, storage_ix, storage);
633
634        next_emit = ip;
635        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
636          goto emit_remainder;
637        }
638        /* We could immediately start working at ip now, but to improve
639           compression we first update "table" with the hashes of some positions
640           within the last copy. */
641        {
642          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
643          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
644          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
645          table[prev_hash] = (int)(ip - base_ip - 3);
646          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
647          table[prev_hash] = (int)(ip - base_ip - 2);
648          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
649          table[prev_hash] = (int)(ip - base_ip - 1);
650
651          candidate = base_ip + table[cur_hash];
652          table[cur_hash] = (int)(ip - base_ip);
653        }
654      }
655
656      next_hash = Hash(++ip, shift);
657    }
658  }
659
660 emit_remainder:
661  BROTLI_DCHECK(next_emit <= ip_end);
662  input += block_size;
663  input_size -= block_size;
664  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
665
666  /* Decide if we want to continue this meta-block instead of emitting the
667     last insert-only command. */
668  if (input_size > 0 &&
669      total_block_size + block_size <= (1 << 20) &&
670      ShouldMergeBlock(input, block_size, lit_depth)) {
671    BROTLI_DCHECK(total_block_size > (1 << 16));
672    /* Update the size of the current meta-block and continue emitting commands.
673       We can do this because the current size and the new size both have 5
674       nibbles. */
675    total_block_size += block_size;
676    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
677    goto emit_commands;
678  }
679
680  /* Emit the remaining bytes as literals. */
681  if (next_emit < ip_end) {
682    const size_t insert = (size_t)(ip_end - next_emit);
683    if (BROTLI_PREDICT_TRUE(insert < 6210)) {
684      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
685                    storage_ix, storage);
686      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
687    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
688                                         literal_ratio)) {
689      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
690                                storage_ix, storage);
691    } else {
692      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
693                        storage_ix, storage);
694      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
695                   storage_ix, storage);
696    }
697  }
698  next_emit = ip_end;
699
700next_block:
701  /* If we have more data, write a new meta-block header and prefix codes and
702     then continue emitting commands. */
703  if (input_size > 0) {
704    metablock_start = input;
705    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
706    total_block_size = block_size;
707    /* Save the bit position of the MLEN field of the meta-block header, so that
708       we can update it later if we decide to extend this meta-block. */
709    mlen_storage_ix = *storage_ix + 3;
710    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
711    /* No block splits, no contexts. */
712    BrotliWriteBits(13, 0, storage_ix, storage);
713    literal_ratio = BuildAndStoreLiteralPrefixCode(
714        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
715    if (BROTLI_IS_OOM(m)) return;
716    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
717                                   storage_ix, storage);
718    goto emit_commands;
719  }
720
721  if (!is_last) {
722    /* If this is not the last block, update the command and distance prefix
723       codes for the next block and store the compressed forms. */
724    cmd_code[0] = 0;
725    *cmd_code_numbits = 0;
726    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
727                                   cmd_code_numbits, cmd_code);
728  }
729}
730
731#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
732
733#define BAKE_METHOD_PARAM_(B) \
734static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
735    MemoryManager* m, const uint8_t* input, size_t input_size,               \
736    BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
737    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
738    size_t* storage_ix, uint8_t* storage) {                                  \
739  BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
740      cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
741}
742FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
743#undef BAKE_METHOD_PARAM_
744
745void BrotliCompressFragmentFast(
746    MemoryManager* m, const uint8_t* input, size_t input_size,
747    BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
748    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
749    size_t* storage_ix, uint8_t* storage) {
750  const size_t initial_storage_ix = *storage_ix;
751  const size_t table_bits = Log2FloorNonZero(table_size);
752
753  if (input_size == 0) {
754    BROTLI_DCHECK(is_last);
755    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
756    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
757    *storage_ix = (*storage_ix + 7u) & ~7u;
758    return;
759  }
760
761  switch (table_bits) {
762#define CASE_(B)                                                     \
763    case B:                                                          \
764      BrotliCompressFragmentFastImpl ## B(                           \
765          m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
766          cmd_code_numbits, cmd_code, storage_ix, storage);          \
767      break;
768    FOR_TABLE_BITS_(CASE_)
769#undef CASE_
770    default: BROTLI_DCHECK(0); break;
771  }
772
773  /* If output is larger than single uncompressed block, rewrite it. */
774  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
775    EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
776                              storage_ix, storage);
777  }
778
779  if (is_last) {
780    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
781    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
782    *storage_ix = (*storage_ix + 7u) & ~7u;
783  }
784}
785
786#undef FOR_TABLE_BITS_
787
788#if defined(__cplusplus) || defined(c_plusplus)
789}  /* extern "C" */
790#endif
791