1/* Copyright 2010 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Entropy encoding (Huffman) utilities. */
8
9#include "./entropy_encode.h"
10
11#include <string.h>  /* memset */
12
13#include "../common/constants.h"
14#include "../common/platform.h"
15#include <brotli/types.h>
16
17#if defined(__cplusplus) || defined(c_plusplus)
18extern "C" {
19#endif
20
21const size_t kBrotliShellGaps[] = {132, 57, 23, 10, 4, 1};
22
23BROTLI_BOOL BrotliSetDepth(
24    int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
25  int stack[16];
26  int level = 0;
27  int p = p0;
28  BROTLI_DCHECK(max_depth <= 15);
29  stack[0] = -1;
30  while (BROTLI_TRUE) {
31    if (pool[p].index_left_ >= 0) {
32      level++;
33      if (level > max_depth) return BROTLI_FALSE;
34      stack[level] = pool[p].index_right_or_value_;
35      p = pool[p].index_left_;
36      continue;
37    } else {
38      depth[pool[p].index_right_or_value_] = (uint8_t)level;
39    }
40    while (level >= 0 && stack[level] == -1) level--;
41    if (level < 0) return BROTLI_TRUE;
42    p = stack[level];
43    stack[level] = -1;
44  }
45}
46
47/* Sort the root nodes, least popular first. */
48static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
49    const HuffmanTree* v0, const HuffmanTree* v1) {
50  if (v0->total_count_ != v1->total_count_) {
51    return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
52  }
53  return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
54}
55
56/* This function will create a Huffman tree.
57
58   The catch here is that the tree cannot be arbitrarily deep.
59   Brotli specifies a maximum depth of 15 bits for "code trees"
60   and 7 bits for "code length code trees."
61
62   count_limit is the value that is to be faked as the minimum value
63   and this minimum value is raised until the tree matches the
64   maximum length requirement.
65
66   This algorithm is not of excellent performance for very long data blocks,
67   especially when population counts are longer than 2**tree_limit, but
68   we are not planning to use this with extremely long blocks.
69
70   See http://en.wikipedia.org/wiki/Huffman_coding */
71void BrotliCreateHuffmanTree(const uint32_t* data,
72                             const size_t length,
73                             const int tree_limit,
74                             HuffmanTree* tree,
75                             uint8_t* depth) {
76  uint32_t count_limit;
77  HuffmanTree sentinel;
78  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
79  /* For block sizes below 64 kB, we never need to do a second iteration
80     of this loop. Probably all of our block sizes will be smaller than
81     that, so this loop is mostly of academic interest. If we actually
82     would need this, we would be better off with the Katajainen algorithm. */
83  for (count_limit = 1; ; count_limit *= 2) {
84    size_t n = 0;
85    size_t i;
86    size_t j;
87    size_t k;
88    for (i = length; i != 0;) {
89      --i;
90      if (data[i]) {
91        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
92        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
93      }
94    }
95
96    if (n == 1) {
97      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
98      break;
99    }
100
101    SortHuffmanTreeItems(tree, n, SortHuffmanTree);
102
103    /* The nodes are:
104       [0, n): the sorted leaf nodes that we start with.
105       [n]: we add a sentinel here.
106       [n + 1, 2n): new parent nodes are added here, starting from
107                    (n+1). These are naturally in ascending order.
108       [2n]: we add a sentinel at the end as well.
109       There will be (2n+1) elements at the end. */
110    tree[n] = sentinel;
111    tree[n + 1] = sentinel;
112
113    i = 0;      /* Points to the next leaf node. */
114    j = n + 1;  /* Points to the next non-leaf node. */
115    for (k = n - 1; k != 0; --k) {
116      size_t left, right;
117      if (tree[i].total_count_ <= tree[j].total_count_) {
118        left = i;
119        ++i;
120      } else {
121        left = j;
122        ++j;
123      }
124      if (tree[i].total_count_ <= tree[j].total_count_) {
125        right = i;
126        ++i;
127      } else {
128        right = j;
129        ++j;
130      }
131
132      {
133        /* The sentinel node becomes the parent node. */
134        size_t j_end = 2 * n - k;
135        tree[j_end].total_count_ =
136            tree[left].total_count_ + tree[right].total_count_;
137        tree[j_end].index_left_ = (int16_t)left;
138        tree[j_end].index_right_or_value_ = (int16_t)right;
139
140        /* Add back the last sentinel node. */
141        tree[j_end + 1] = sentinel;
142      }
143    }
144    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
145      /* We need to pack the Huffman tree in tree_limit bits. If this was not
146         successful, add fake entities to the lowest values and retry. */
147      break;
148    }
149  }
150}
151
152static void Reverse(uint8_t* v, size_t start, size_t end) {
153  --end;
154  while (start < end) {
155    uint8_t tmp = v[start];
156    v[start] = v[end];
157    v[end] = tmp;
158    ++start;
159    --end;
160  }
161}
162
163static void BrotliWriteHuffmanTreeRepetitions(
164    const uint8_t previous_value,
165    const uint8_t value,
166    size_t repetitions,
167    size_t* tree_size,
168    uint8_t* tree,
169    uint8_t* extra_bits_data) {
170  BROTLI_DCHECK(repetitions > 0);
171  if (previous_value != value) {
172    tree[*tree_size] = value;
173    extra_bits_data[*tree_size] = 0;
174    ++(*tree_size);
175    --repetitions;
176  }
177  if (repetitions == 7) {
178    tree[*tree_size] = value;
179    extra_bits_data[*tree_size] = 0;
180    ++(*tree_size);
181    --repetitions;
182  }
183  if (repetitions < 3) {
184    size_t i;
185    for (i = 0; i < repetitions; ++i) {
186      tree[*tree_size] = value;
187      extra_bits_data[*tree_size] = 0;
188      ++(*tree_size);
189    }
190  } else {
191    size_t start = *tree_size;
192    repetitions -= 3;
193    while (BROTLI_TRUE) {
194      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
195      extra_bits_data[*tree_size] = repetitions & 0x3;
196      ++(*tree_size);
197      repetitions >>= 2;
198      if (repetitions == 0) {
199        break;
200      }
201      --repetitions;
202    }
203    Reverse(tree, start, *tree_size);
204    Reverse(extra_bits_data, start, *tree_size);
205  }
206}
207
208static void BrotliWriteHuffmanTreeRepetitionsZeros(
209    size_t repetitions,
210    size_t* tree_size,
211    uint8_t* tree,
212    uint8_t* extra_bits_data) {
213  if (repetitions == 11) {
214    tree[*tree_size] = 0;
215    extra_bits_data[*tree_size] = 0;
216    ++(*tree_size);
217    --repetitions;
218  }
219  if (repetitions < 3) {
220    size_t i;
221    for (i = 0; i < repetitions; ++i) {
222      tree[*tree_size] = 0;
223      extra_bits_data[*tree_size] = 0;
224      ++(*tree_size);
225    }
226  } else {
227    size_t start = *tree_size;
228    repetitions -= 3;
229    while (BROTLI_TRUE) {
230      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
231      extra_bits_data[*tree_size] = repetitions & 0x7;
232      ++(*tree_size);
233      repetitions >>= 3;
234      if (repetitions == 0) {
235        break;
236      }
237      --repetitions;
238    }
239    Reverse(tree, start, *tree_size);
240    Reverse(extra_bits_data, start, *tree_size);
241  }
242}
243
244void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
245                                       uint8_t* good_for_rle) {
246  size_t nonzero_count = 0;
247  size_t stride;
248  size_t limit;
249  size_t sum;
250  const size_t streak_limit = 1240;
251  /* Let's make the Huffman code more compatible with RLE encoding. */
252  size_t i;
253  for (i = 0; i < length; i++) {
254    if (counts[i]) {
255      ++nonzero_count;
256    }
257  }
258  if (nonzero_count < 16) {
259    return;
260  }
261  while (length != 0 && counts[length - 1] == 0) {
262    --length;
263  }
264  if (length == 0) {
265    return;  /* All zeros. */
266  }
267  /* Now counts[0..length - 1] does not have trailing zeros. */
268  {
269    size_t nonzeros = 0;
270    uint32_t smallest_nonzero = 1 << 30;
271    for (i = 0; i < length; ++i) {
272      if (counts[i] != 0) {
273        ++nonzeros;
274        if (smallest_nonzero > counts[i]) {
275          smallest_nonzero = counts[i];
276        }
277      }
278    }
279    if (nonzeros < 5) {
280      /* Small histogram will model it well. */
281      return;
282    }
283    if (smallest_nonzero < 4) {
284      size_t zeros = length - nonzeros;
285      if (zeros < 6) {
286        for (i = 1; i < length - 1; ++i) {
287          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
288            counts[i] = 1;
289          }
290        }
291      }
292    }
293    if (nonzeros < 28) {
294      return;
295    }
296  }
297  /* 2) Let's mark all population counts that already can be encoded
298     with an RLE code. */
299  memset(good_for_rle, 0, length);
300  {
301    /* Let's not spoil any of the existing good RLE codes.
302       Mark any seq of 0's that is longer as 5 as a good_for_rle.
303       Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
304    uint32_t symbol = counts[0];
305    size_t step = 0;
306    for (i = 0; i <= length; ++i) {
307      if (i == length || counts[i] != symbol) {
308        if ((symbol == 0 && step >= 5) ||
309            (symbol != 0 && step >= 7)) {
310          size_t k;
311          for (k = 0; k < step; ++k) {
312            good_for_rle[i - k - 1] = 1;
313          }
314        }
315        step = 1;
316        if (i != length) {
317          symbol = counts[i];
318        }
319      } else {
320        ++step;
321      }
322    }
323  }
324  /* 3) Let's replace those population counts that lead to more RLE codes.
325     Math here is in 24.8 fixed point representation. */
326  stride = 0;
327  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
328  sum = 0;
329  for (i = 0; i <= length; ++i) {
330    if (i == length || good_for_rle[i] ||
331        (i != 0 && good_for_rle[i - 1]) ||
332        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
333      if (stride >= 4 || (stride >= 3 && sum == 0)) {
334        size_t k;
335        /* The stride must end, collapse what we have, if we have enough (4). */
336        size_t count = (sum + stride / 2) / stride;
337        if (count == 0) {
338          count = 1;
339        }
340        if (sum == 0) {
341          /* Don't make an all zeros stride to be upgraded to ones. */
342          count = 0;
343        }
344        for (k = 0; k < stride; ++k) {
345          /* We don't want to change value at counts[i],
346             that is already belonging to the next stride. Thus - 1. */
347          counts[i - k - 1] = (uint32_t)count;
348        }
349      }
350      stride = 0;
351      sum = 0;
352      if (i < length - 2) {
353        /* All interesting strides have a count of at least 4, */
354        /* at least when non-zeros. */
355        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
356      } else if (i < length) {
357        limit = 256 * counts[i];
358      } else {
359        limit = 0;
360      }
361    }
362    ++stride;
363    if (i != length) {
364      sum += counts[i];
365      if (stride >= 4) {
366        limit = (256 * sum + stride / 2) / stride;
367      }
368      if (stride == 4) {
369        limit += 120;
370      }
371    }
372  }
373}
374
375static void DecideOverRleUse(const uint8_t* depth, const size_t length,
376                             BROTLI_BOOL* use_rle_for_non_zero,
377                             BROTLI_BOOL* use_rle_for_zero) {
378  size_t total_reps_zero = 0;
379  size_t total_reps_non_zero = 0;
380  size_t count_reps_zero = 1;
381  size_t count_reps_non_zero = 1;
382  size_t i;
383  for (i = 0; i < length;) {
384    const uint8_t value = depth[i];
385    size_t reps = 1;
386    size_t k;
387    for (k = i + 1; k < length && depth[k] == value; ++k) {
388      ++reps;
389    }
390    if (reps >= 3 && value == 0) {
391      total_reps_zero += reps;
392      ++count_reps_zero;
393    }
394    if (reps >= 4 && value != 0) {
395      total_reps_non_zero += reps;
396      ++count_reps_non_zero;
397    }
398    i += reps;
399  }
400  *use_rle_for_non_zero =
401      TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
402  *use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
403}
404
405void BrotliWriteHuffmanTree(const uint8_t* depth,
406                            size_t length,
407                            size_t* tree_size,
408                            uint8_t* tree,
409                            uint8_t* extra_bits_data) {
410  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
411  size_t i;
412  BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
413  BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
414
415  /* Throw away trailing zeros. */
416  size_t new_length = length;
417  for (i = 0; i < length; ++i) {
418    if (depth[length - i - 1] == 0) {
419      --new_length;
420    } else {
421      break;
422    }
423  }
424
425  /* First gather statistics on if it is a good idea to do RLE. */
426  if (length > 50) {
427    /* Find RLE coding for longer codes.
428       Shorter codes seem not to benefit from RLE. */
429    DecideOverRleUse(depth, new_length,
430                     &use_rle_for_non_zero, &use_rle_for_zero);
431  }
432
433  /* Actual RLE coding. */
434  for (i = 0; i < new_length;) {
435    const uint8_t value = depth[i];
436    size_t reps = 1;
437    if ((value != 0 && use_rle_for_non_zero) ||
438        (value == 0 && use_rle_for_zero)) {
439      size_t k;
440      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
441        ++reps;
442      }
443    }
444    if (value == 0) {
445      BrotliWriteHuffmanTreeRepetitionsZeros(
446          reps, tree_size, tree, extra_bits_data);
447    } else {
448      BrotliWriteHuffmanTreeRepetitions(previous_value,
449                                        value, reps, tree_size,
450                                        tree, extra_bits_data);
451      previous_value = value;
452    }
453    i += reps;
454  }
455}
456
457static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
458  static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
459    0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
460    0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
461  };
462  size_t retval = kLut[bits & 0x0F];
463  size_t i;
464  for (i = 4; i < num_bits; i += 4) {
465    retval <<= 4;
466    bits = (uint16_t)(bits >> 4);
467    retval |= kLut[bits & 0x0F];
468  }
469  retval >>= ((0 - num_bits) & 0x03);
470  return (uint16_t)retval;
471}
472
473/* 0..15 are values for bits */
474#define MAX_HUFFMAN_BITS 16
475
476void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
477                                     size_t len,
478                                     uint16_t* bits) {
479  /* In Brotli, all bit depths are [1..15]
480     0 bit depth means that the symbol does not exist. */
481  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
482  uint16_t next_code[MAX_HUFFMAN_BITS];
483  size_t i;
484  int code = 0;
485  for (i = 0; i < len; ++i) {
486    ++bl_count[depth[i]];
487  }
488  bl_count[0] = 0;
489  next_code[0] = 0;
490  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
491    code = (code + bl_count[i - 1]) << 1;
492    next_code[i] = (uint16_t)code;
493  }
494  for (i = 0; i < len; ++i) {
495    if (depth[i]) {
496      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
497    }
498  }
499}
500
501#if defined(__cplusplus) || defined(c_plusplus)
502}  /* extern "C" */
503#endif
504