11cb0ef41Sopenharmony_ci/* NOLINT(build/header_guard) */
21cb0ef41Sopenharmony_ci/* Copyright 2013 Google Inc. All Rights Reserved.
31cb0ef41Sopenharmony_ci
41cb0ef41Sopenharmony_ci   Distributed under MIT license.
51cb0ef41Sopenharmony_ci   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
61cb0ef41Sopenharmony_ci*/
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci/* template parameters: FN, DataType */
91cb0ef41Sopenharmony_ci
101cb0ef41Sopenharmony_ci#define HistogramType FN(Histogram)
111cb0ef41Sopenharmony_ci
121cb0ef41Sopenharmony_cistatic void FN(InitialEntropyCodes)(const DataType* data, size_t length,
131cb0ef41Sopenharmony_ci                                    size_t stride,
141cb0ef41Sopenharmony_ci                                    size_t num_histograms,
151cb0ef41Sopenharmony_ci                                    HistogramType* histograms) {
161cb0ef41Sopenharmony_ci  uint32_t seed = 7;
171cb0ef41Sopenharmony_ci  size_t block_length = length / num_histograms;
181cb0ef41Sopenharmony_ci  size_t i;
191cb0ef41Sopenharmony_ci  FN(ClearHistograms)(histograms, num_histograms);
201cb0ef41Sopenharmony_ci  for (i = 0; i < num_histograms; ++i) {
211cb0ef41Sopenharmony_ci    size_t pos = length * i / num_histograms;
221cb0ef41Sopenharmony_ci    if (i != 0) {
231cb0ef41Sopenharmony_ci      pos += MyRand(&seed) % block_length;
241cb0ef41Sopenharmony_ci    }
251cb0ef41Sopenharmony_ci    if (pos + stride >= length) {
261cb0ef41Sopenharmony_ci      pos = length - stride - 1;
271cb0ef41Sopenharmony_ci    }
281cb0ef41Sopenharmony_ci    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
291cb0ef41Sopenharmony_ci  }
301cb0ef41Sopenharmony_ci}
311cb0ef41Sopenharmony_ci
321cb0ef41Sopenharmony_cistatic void FN(RandomSample)(uint32_t* seed,
331cb0ef41Sopenharmony_ci                             const DataType* data,
341cb0ef41Sopenharmony_ci                             size_t length,
351cb0ef41Sopenharmony_ci                             size_t stride,
361cb0ef41Sopenharmony_ci                             HistogramType* sample) {
371cb0ef41Sopenharmony_ci  size_t pos = 0;
381cb0ef41Sopenharmony_ci  if (stride >= length) {
391cb0ef41Sopenharmony_ci    stride = length;
401cb0ef41Sopenharmony_ci  } else {
411cb0ef41Sopenharmony_ci    pos = MyRand(seed) % (length - stride + 1);
421cb0ef41Sopenharmony_ci  }
431cb0ef41Sopenharmony_ci  FN(HistogramAddVector)(sample, data + pos, stride);
441cb0ef41Sopenharmony_ci}
451cb0ef41Sopenharmony_ci
461cb0ef41Sopenharmony_cistatic void FN(RefineEntropyCodes)(const DataType* data, size_t length,
471cb0ef41Sopenharmony_ci                                   size_t stride,
481cb0ef41Sopenharmony_ci                                   size_t num_histograms,
491cb0ef41Sopenharmony_ci                                   HistogramType* histograms) {
501cb0ef41Sopenharmony_ci  size_t iters =
511cb0ef41Sopenharmony_ci      kIterMulForRefining * length / stride + kMinItersForRefining;
521cb0ef41Sopenharmony_ci  uint32_t seed = 7;
531cb0ef41Sopenharmony_ci  size_t iter;
541cb0ef41Sopenharmony_ci  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
551cb0ef41Sopenharmony_ci  for (iter = 0; iter < iters; ++iter) {
561cb0ef41Sopenharmony_ci    HistogramType sample;
571cb0ef41Sopenharmony_ci    FN(HistogramClear)(&sample);
581cb0ef41Sopenharmony_ci    FN(RandomSample)(&seed, data, length, stride, &sample);
591cb0ef41Sopenharmony_ci    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
601cb0ef41Sopenharmony_ci  }
611cb0ef41Sopenharmony_ci}
621cb0ef41Sopenharmony_ci
631cb0ef41Sopenharmony_ci/* Assigns a block id from the range [0, num_histograms) to each data element
641cb0ef41Sopenharmony_ci   in data[0..length) and fills in block_id[0..length) with the assigned values.
651cb0ef41Sopenharmony_ci   Returns the number of blocks, i.e. one plus the number of block switches. */
661cb0ef41Sopenharmony_cistatic size_t FN(FindBlocks)(const DataType* data, const size_t length,
671cb0ef41Sopenharmony_ci                             const double block_switch_bitcost,
681cb0ef41Sopenharmony_ci                             const size_t num_histograms,
691cb0ef41Sopenharmony_ci                             const HistogramType* histograms,
701cb0ef41Sopenharmony_ci                             double* insert_cost,
711cb0ef41Sopenharmony_ci                             double* cost,
721cb0ef41Sopenharmony_ci                             uint8_t* switch_signal,
731cb0ef41Sopenharmony_ci                             uint8_t* block_id) {
741cb0ef41Sopenharmony_ci  const size_t data_size = FN(HistogramDataSize)();
751cb0ef41Sopenharmony_ci  const size_t bitmaplen = (num_histograms + 7) >> 3;
761cb0ef41Sopenharmony_ci  size_t num_blocks = 1;
771cb0ef41Sopenharmony_ci  size_t i;
781cb0ef41Sopenharmony_ci  size_t j;
791cb0ef41Sopenharmony_ci  BROTLI_DCHECK(num_histograms <= 256);
801cb0ef41Sopenharmony_ci  if (num_histograms <= 1) {
811cb0ef41Sopenharmony_ci    for (i = 0; i < length; ++i) {
821cb0ef41Sopenharmony_ci      block_id[i] = 0;
831cb0ef41Sopenharmony_ci    }
841cb0ef41Sopenharmony_ci    return 1;
851cb0ef41Sopenharmony_ci  }
861cb0ef41Sopenharmony_ci  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
871cb0ef41Sopenharmony_ci  for (i = 0; i < num_histograms; ++i) {
881cb0ef41Sopenharmony_ci    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
891cb0ef41Sopenharmony_ci  }
901cb0ef41Sopenharmony_ci  for (i = data_size; i != 0;) {
911cb0ef41Sopenharmony_ci    --i;
921cb0ef41Sopenharmony_ci    for (j = 0; j < num_histograms; ++j) {
931cb0ef41Sopenharmony_ci      insert_cost[i * num_histograms + j] =
941cb0ef41Sopenharmony_ci          insert_cost[j] - BitCost(histograms[j].data_[i]);
951cb0ef41Sopenharmony_ci    }
961cb0ef41Sopenharmony_ci  }
971cb0ef41Sopenharmony_ci  memset(cost, 0, sizeof(cost[0]) * num_histograms);
981cb0ef41Sopenharmony_ci  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
991cb0ef41Sopenharmony_ci  /* After each iteration of this loop, cost[k] will contain the difference
1001cb0ef41Sopenharmony_ci     between the minimum cost of arriving at the current byte position using
1011cb0ef41Sopenharmony_ci     entropy code k, and the minimum cost of arriving at the current byte
1021cb0ef41Sopenharmony_ci     position. This difference is capped at the block switch cost, and if it
1031cb0ef41Sopenharmony_ci     reaches block switch cost, it means that when we trace back from the last
1041cb0ef41Sopenharmony_ci     position, we need to switch here. */
1051cb0ef41Sopenharmony_ci  for (i = 0; i < length; ++i) {
1061cb0ef41Sopenharmony_ci    const size_t byte_ix = i;
1071cb0ef41Sopenharmony_ci    size_t ix = byte_ix * bitmaplen;
1081cb0ef41Sopenharmony_ci    size_t insert_cost_ix = data[byte_ix] * num_histograms;
1091cb0ef41Sopenharmony_ci    double min_cost = 1e99;
1101cb0ef41Sopenharmony_ci    double block_switch_cost = block_switch_bitcost;
1111cb0ef41Sopenharmony_ci    size_t k;
1121cb0ef41Sopenharmony_ci    for (k = 0; k < num_histograms; ++k) {
1131cb0ef41Sopenharmony_ci      /* We are coding the symbol in data[byte_ix] with entropy code k. */
1141cb0ef41Sopenharmony_ci      cost[k] += insert_cost[insert_cost_ix + k];
1151cb0ef41Sopenharmony_ci      if (cost[k] < min_cost) {
1161cb0ef41Sopenharmony_ci        min_cost = cost[k];
1171cb0ef41Sopenharmony_ci        block_id[byte_ix] = (uint8_t)k;
1181cb0ef41Sopenharmony_ci      }
1191cb0ef41Sopenharmony_ci    }
1201cb0ef41Sopenharmony_ci    /* More blocks for the beginning. */
1211cb0ef41Sopenharmony_ci    if (byte_ix < 2000) {
1221cb0ef41Sopenharmony_ci      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
1231cb0ef41Sopenharmony_ci    }
1241cb0ef41Sopenharmony_ci    for (k = 0; k < num_histograms; ++k) {
1251cb0ef41Sopenharmony_ci      cost[k] -= min_cost;
1261cb0ef41Sopenharmony_ci      if (cost[k] >= block_switch_cost) {
1271cb0ef41Sopenharmony_ci        const uint8_t mask = (uint8_t)(1u << (k & 7));
1281cb0ef41Sopenharmony_ci        cost[k] = block_switch_cost;
1291cb0ef41Sopenharmony_ci        BROTLI_DCHECK((k >> 3) < bitmaplen);
1301cb0ef41Sopenharmony_ci        switch_signal[ix + (k >> 3)] |= mask;
1311cb0ef41Sopenharmony_ci      }
1321cb0ef41Sopenharmony_ci    }
1331cb0ef41Sopenharmony_ci  }
1341cb0ef41Sopenharmony_ci  {  /* Trace back from the last position and switch at the marked places. */
1351cb0ef41Sopenharmony_ci    size_t byte_ix = length - 1;
1361cb0ef41Sopenharmony_ci    size_t ix = byte_ix * bitmaplen;
1371cb0ef41Sopenharmony_ci    uint8_t cur_id = block_id[byte_ix];
1381cb0ef41Sopenharmony_ci    while (byte_ix > 0) {
1391cb0ef41Sopenharmony_ci      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
1401cb0ef41Sopenharmony_ci      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
1411cb0ef41Sopenharmony_ci      --byte_ix;
1421cb0ef41Sopenharmony_ci      ix -= bitmaplen;
1431cb0ef41Sopenharmony_ci      if (switch_signal[ix + (cur_id >> 3)] & mask) {
1441cb0ef41Sopenharmony_ci        if (cur_id != block_id[byte_ix]) {
1451cb0ef41Sopenharmony_ci          cur_id = block_id[byte_ix];
1461cb0ef41Sopenharmony_ci          ++num_blocks;
1471cb0ef41Sopenharmony_ci        }
1481cb0ef41Sopenharmony_ci      }
1491cb0ef41Sopenharmony_ci      block_id[byte_ix] = cur_id;
1501cb0ef41Sopenharmony_ci    }
1511cb0ef41Sopenharmony_ci  }
1521cb0ef41Sopenharmony_ci  return num_blocks;
1531cb0ef41Sopenharmony_ci}
1541cb0ef41Sopenharmony_ci
1551cb0ef41Sopenharmony_cistatic size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
1561cb0ef41Sopenharmony_ci                                uint16_t* new_id, const size_t num_histograms) {
1571cb0ef41Sopenharmony_ci  static const uint16_t kInvalidId = 256;
1581cb0ef41Sopenharmony_ci  uint16_t next_id = 0;
1591cb0ef41Sopenharmony_ci  size_t i;
1601cb0ef41Sopenharmony_ci  for (i = 0; i < num_histograms; ++i) {
1611cb0ef41Sopenharmony_ci    new_id[i] = kInvalidId;
1621cb0ef41Sopenharmony_ci  }
1631cb0ef41Sopenharmony_ci  for (i = 0; i < length; ++i) {
1641cb0ef41Sopenharmony_ci    BROTLI_DCHECK(block_ids[i] < num_histograms);
1651cb0ef41Sopenharmony_ci    if (new_id[block_ids[i]] == kInvalidId) {
1661cb0ef41Sopenharmony_ci      new_id[block_ids[i]] = next_id++;
1671cb0ef41Sopenharmony_ci    }
1681cb0ef41Sopenharmony_ci  }
1691cb0ef41Sopenharmony_ci  for (i = 0; i < length; ++i) {
1701cb0ef41Sopenharmony_ci    block_ids[i] = (uint8_t)new_id[block_ids[i]];
1711cb0ef41Sopenharmony_ci    BROTLI_DCHECK(block_ids[i] < num_histograms);
1721cb0ef41Sopenharmony_ci  }
1731cb0ef41Sopenharmony_ci  BROTLI_DCHECK(next_id <= num_histograms);
1741cb0ef41Sopenharmony_ci  return next_id;
1751cb0ef41Sopenharmony_ci}
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_cistatic void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
1781cb0ef41Sopenharmony_ci                                     const uint8_t* block_ids,
1791cb0ef41Sopenharmony_ci                                     const size_t num_histograms,
1801cb0ef41Sopenharmony_ci                                     HistogramType* histograms) {
1811cb0ef41Sopenharmony_ci  size_t i;
1821cb0ef41Sopenharmony_ci  FN(ClearHistograms)(histograms, num_histograms);
1831cb0ef41Sopenharmony_ci  for (i = 0; i < length; ++i) {
1841cb0ef41Sopenharmony_ci    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
1851cb0ef41Sopenharmony_ci  }
1861cb0ef41Sopenharmony_ci}
1871cb0ef41Sopenharmony_ci
1881cb0ef41Sopenharmony_cistatic void FN(ClusterBlocks)(MemoryManager* m,
1891cb0ef41Sopenharmony_ci                              const DataType* data, const size_t length,
1901cb0ef41Sopenharmony_ci                              const size_t num_blocks,
1911cb0ef41Sopenharmony_ci                              uint8_t* block_ids,
1921cb0ef41Sopenharmony_ci                              BlockSplit* split) {
1931cb0ef41Sopenharmony_ci  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
1941cb0ef41Sopenharmony_ci  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
1951cb0ef41Sopenharmony_ci  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
1961cb0ef41Sopenharmony_ci      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
1971cb0ef41Sopenharmony_ci  size_t all_histograms_size = 0;
1981cb0ef41Sopenharmony_ci  size_t all_histograms_capacity = expected_num_clusters;
1991cb0ef41Sopenharmony_ci  HistogramType* all_histograms =
2001cb0ef41Sopenharmony_ci      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
2011cb0ef41Sopenharmony_ci  size_t cluster_size_size = 0;
2021cb0ef41Sopenharmony_ci  size_t cluster_size_capacity = expected_num_clusters;
2031cb0ef41Sopenharmony_ci  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
2041cb0ef41Sopenharmony_ci  size_t num_clusters = 0;
2051cb0ef41Sopenharmony_ci  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
2061cb0ef41Sopenharmony_ci      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
2071cb0ef41Sopenharmony_ci  size_t max_num_pairs =
2081cb0ef41Sopenharmony_ci      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
2091cb0ef41Sopenharmony_ci  size_t pairs_capacity = max_num_pairs + 1;
2101cb0ef41Sopenharmony_ci  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
2111cb0ef41Sopenharmony_ci  size_t pos = 0;
2121cb0ef41Sopenharmony_ci  uint32_t* clusters;
2131cb0ef41Sopenharmony_ci  size_t num_final_clusters;
2141cb0ef41Sopenharmony_ci  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
2151cb0ef41Sopenharmony_ci  uint32_t* new_index;
2161cb0ef41Sopenharmony_ci  size_t i;
2171cb0ef41Sopenharmony_ci  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
2181cb0ef41Sopenharmony_ci  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
2191cb0ef41Sopenharmony_ci  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
2201cb0ef41Sopenharmony_ci  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
2211cb0ef41Sopenharmony_ci
2221cb0ef41Sopenharmony_ci  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
2231cb0ef41Sopenharmony_ci      BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
2241cb0ef41Sopenharmony_ci      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
2251cb0ef41Sopenharmony_ci      BROTLI_IS_NULL(pairs)) {
2261cb0ef41Sopenharmony_ci    return;
2271cb0ef41Sopenharmony_ci  }
2281cb0ef41Sopenharmony_ci
2291cb0ef41Sopenharmony_ci  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
2301cb0ef41Sopenharmony_ci
2311cb0ef41Sopenharmony_ci  {
2321cb0ef41Sopenharmony_ci    size_t block_idx = 0;
2331cb0ef41Sopenharmony_ci    for (i = 0; i < length; ++i) {
2341cb0ef41Sopenharmony_ci      BROTLI_DCHECK(block_idx < num_blocks);
2351cb0ef41Sopenharmony_ci      ++block_lengths[block_idx];
2361cb0ef41Sopenharmony_ci      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
2371cb0ef41Sopenharmony_ci        ++block_idx;
2381cb0ef41Sopenharmony_ci      }
2391cb0ef41Sopenharmony_ci    }
2401cb0ef41Sopenharmony_ci    BROTLI_DCHECK(block_idx == num_blocks);
2411cb0ef41Sopenharmony_ci  }
2421cb0ef41Sopenharmony_ci
2431cb0ef41Sopenharmony_ci  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
2441cb0ef41Sopenharmony_ci    const size_t num_to_combine =
2451cb0ef41Sopenharmony_ci        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
2461cb0ef41Sopenharmony_ci    size_t num_new_clusters;
2471cb0ef41Sopenharmony_ci    size_t j;
2481cb0ef41Sopenharmony_ci    for (j = 0; j < num_to_combine; ++j) {
2491cb0ef41Sopenharmony_ci      size_t k;
2501cb0ef41Sopenharmony_ci      FN(HistogramClear)(&histograms[j]);
2511cb0ef41Sopenharmony_ci      for (k = 0; k < block_lengths[i + j]; ++k) {
2521cb0ef41Sopenharmony_ci        FN(HistogramAdd)(&histograms[j], data[pos++]);
2531cb0ef41Sopenharmony_ci      }
2541cb0ef41Sopenharmony_ci      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
2551cb0ef41Sopenharmony_ci      new_clusters[j] = (uint32_t)j;
2561cb0ef41Sopenharmony_ci      symbols[j] = (uint32_t)j;
2571cb0ef41Sopenharmony_ci      sizes[j] = 1;
2581cb0ef41Sopenharmony_ci    }
2591cb0ef41Sopenharmony_ci    num_new_clusters = FN(BrotliHistogramCombine)(
2601cb0ef41Sopenharmony_ci        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
2611cb0ef41Sopenharmony_ci        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
2621cb0ef41Sopenharmony_ci    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
2631cb0ef41Sopenharmony_ci        all_histograms_capacity, all_histograms_size + num_new_clusters);
2641cb0ef41Sopenharmony_ci    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
2651cb0ef41Sopenharmony_ci        cluster_size_capacity, cluster_size_size + num_new_clusters);
2661cb0ef41Sopenharmony_ci    if (BROTLI_IS_OOM(m)) return;
2671cb0ef41Sopenharmony_ci    for (j = 0; j < num_new_clusters; ++j) {
2681cb0ef41Sopenharmony_ci      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
2691cb0ef41Sopenharmony_ci      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
2701cb0ef41Sopenharmony_ci      remap[new_clusters[j]] = (uint32_t)j;
2711cb0ef41Sopenharmony_ci    }
2721cb0ef41Sopenharmony_ci    for (j = 0; j < num_to_combine; ++j) {
2731cb0ef41Sopenharmony_ci      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
2741cb0ef41Sopenharmony_ci    }
2751cb0ef41Sopenharmony_ci    num_clusters += num_new_clusters;
2761cb0ef41Sopenharmony_ci    BROTLI_DCHECK(num_clusters == cluster_size_size);
2771cb0ef41Sopenharmony_ci    BROTLI_DCHECK(num_clusters == all_histograms_size);
2781cb0ef41Sopenharmony_ci  }
2791cb0ef41Sopenharmony_ci  BROTLI_FREE(m, histograms);
2801cb0ef41Sopenharmony_ci
2811cb0ef41Sopenharmony_ci  max_num_pairs =
2821cb0ef41Sopenharmony_ci      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
2831cb0ef41Sopenharmony_ci  if (pairs_capacity < max_num_pairs + 1) {
2841cb0ef41Sopenharmony_ci    BROTLI_FREE(m, pairs);
2851cb0ef41Sopenharmony_ci    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
2861cb0ef41Sopenharmony_ci    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
2871cb0ef41Sopenharmony_ci  }
2881cb0ef41Sopenharmony_ci
2891cb0ef41Sopenharmony_ci  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
2901cb0ef41Sopenharmony_ci  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
2911cb0ef41Sopenharmony_ci  for (i = 0; i < num_clusters; ++i) {
2921cb0ef41Sopenharmony_ci    clusters[i] = (uint32_t)i;
2931cb0ef41Sopenharmony_ci  }
2941cb0ef41Sopenharmony_ci  num_final_clusters = FN(BrotliHistogramCombine)(
2951cb0ef41Sopenharmony_ci      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
2961cb0ef41Sopenharmony_ci      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
2971cb0ef41Sopenharmony_ci      max_num_pairs);
2981cb0ef41Sopenharmony_ci  BROTLI_FREE(m, pairs);
2991cb0ef41Sopenharmony_ci  BROTLI_FREE(m, cluster_size);
3001cb0ef41Sopenharmony_ci
3011cb0ef41Sopenharmony_ci  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
3021cb0ef41Sopenharmony_ci  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
3031cb0ef41Sopenharmony_ci  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
3041cb0ef41Sopenharmony_ci  pos = 0;
3051cb0ef41Sopenharmony_ci  {
3061cb0ef41Sopenharmony_ci    uint32_t next_index = 0;
3071cb0ef41Sopenharmony_ci    for (i = 0; i < num_blocks; ++i) {
3081cb0ef41Sopenharmony_ci      HistogramType histo;
3091cb0ef41Sopenharmony_ci      size_t j;
3101cb0ef41Sopenharmony_ci      uint32_t best_out;
3111cb0ef41Sopenharmony_ci      double best_bits;
3121cb0ef41Sopenharmony_ci      FN(HistogramClear)(&histo);
3131cb0ef41Sopenharmony_ci      for (j = 0; j < block_lengths[i]; ++j) {
3141cb0ef41Sopenharmony_ci        FN(HistogramAdd)(&histo, data[pos++]);
3151cb0ef41Sopenharmony_ci      }
3161cb0ef41Sopenharmony_ci      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
3171cb0ef41Sopenharmony_ci      best_bits =
3181cb0ef41Sopenharmony_ci          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
3191cb0ef41Sopenharmony_ci      for (j = 0; j < num_final_clusters; ++j) {
3201cb0ef41Sopenharmony_ci        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
3211cb0ef41Sopenharmony_ci            &histo, &all_histograms[clusters[j]]);
3221cb0ef41Sopenharmony_ci        if (cur_bits < best_bits) {
3231cb0ef41Sopenharmony_ci          best_bits = cur_bits;
3241cb0ef41Sopenharmony_ci          best_out = clusters[j];
3251cb0ef41Sopenharmony_ci        }
3261cb0ef41Sopenharmony_ci      }
3271cb0ef41Sopenharmony_ci      histogram_symbols[i] = best_out;
3281cb0ef41Sopenharmony_ci      if (new_index[best_out] == kInvalidIndex) {
3291cb0ef41Sopenharmony_ci        new_index[best_out] = next_index++;
3301cb0ef41Sopenharmony_ci      }
3311cb0ef41Sopenharmony_ci    }
3321cb0ef41Sopenharmony_ci  }
3331cb0ef41Sopenharmony_ci  BROTLI_FREE(m, clusters);
3341cb0ef41Sopenharmony_ci  BROTLI_FREE(m, all_histograms);
3351cb0ef41Sopenharmony_ci  BROTLI_ENSURE_CAPACITY(
3361cb0ef41Sopenharmony_ci      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
3371cb0ef41Sopenharmony_ci  BROTLI_ENSURE_CAPACITY(
3381cb0ef41Sopenharmony_ci      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
3391cb0ef41Sopenharmony_ci  if (BROTLI_IS_OOM(m)) return;
3401cb0ef41Sopenharmony_ci  {
3411cb0ef41Sopenharmony_ci    uint32_t cur_length = 0;
3421cb0ef41Sopenharmony_ci    size_t block_idx = 0;
3431cb0ef41Sopenharmony_ci    uint8_t max_type = 0;
3441cb0ef41Sopenharmony_ci    for (i = 0; i < num_blocks; ++i) {
3451cb0ef41Sopenharmony_ci      cur_length += block_lengths[i];
3461cb0ef41Sopenharmony_ci      if (i + 1 == num_blocks ||
3471cb0ef41Sopenharmony_ci          histogram_symbols[i] != histogram_symbols[i + 1]) {
3481cb0ef41Sopenharmony_ci        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
3491cb0ef41Sopenharmony_ci        split->types[block_idx] = id;
3501cb0ef41Sopenharmony_ci        split->lengths[block_idx] = cur_length;
3511cb0ef41Sopenharmony_ci        max_type = BROTLI_MAX(uint8_t, max_type, id);
3521cb0ef41Sopenharmony_ci        cur_length = 0;
3531cb0ef41Sopenharmony_ci        ++block_idx;
3541cb0ef41Sopenharmony_ci      }
3551cb0ef41Sopenharmony_ci    }
3561cb0ef41Sopenharmony_ci    split->num_blocks = block_idx;
3571cb0ef41Sopenharmony_ci    split->num_types = (size_t)max_type + 1;
3581cb0ef41Sopenharmony_ci  }
3591cb0ef41Sopenharmony_ci  BROTLI_FREE(m, new_index);
3601cb0ef41Sopenharmony_ci  BROTLI_FREE(m, block_lengths);
3611cb0ef41Sopenharmony_ci  BROTLI_FREE(m, histogram_symbols);
3621cb0ef41Sopenharmony_ci}
3631cb0ef41Sopenharmony_ci
3641cb0ef41Sopenharmony_cistatic void FN(SplitByteVector)(MemoryManager* m,
3651cb0ef41Sopenharmony_ci                                const DataType* data, const size_t length,
3661cb0ef41Sopenharmony_ci                                const size_t literals_per_histogram,
3671cb0ef41Sopenharmony_ci                                const size_t max_histograms,
3681cb0ef41Sopenharmony_ci                                const size_t sampling_stride_length,
3691cb0ef41Sopenharmony_ci                                const double block_switch_cost,
3701cb0ef41Sopenharmony_ci                                const BrotliEncoderParams* params,
3711cb0ef41Sopenharmony_ci                                BlockSplit* split) {
3721cb0ef41Sopenharmony_ci  const size_t data_size = FN(HistogramDataSize)();
3731cb0ef41Sopenharmony_ci  size_t num_histograms = length / literals_per_histogram + 1;
3741cb0ef41Sopenharmony_ci  HistogramType* histograms;
3751cb0ef41Sopenharmony_ci  if (num_histograms > max_histograms) {
3761cb0ef41Sopenharmony_ci    num_histograms = max_histograms;
3771cb0ef41Sopenharmony_ci  }
3781cb0ef41Sopenharmony_ci  if (length == 0) {
3791cb0ef41Sopenharmony_ci    split->num_types = 1;
3801cb0ef41Sopenharmony_ci    return;
3811cb0ef41Sopenharmony_ci  } else if (length < kMinLengthForBlockSplitting) {
3821cb0ef41Sopenharmony_ci    BROTLI_ENSURE_CAPACITY(m, uint8_t,
3831cb0ef41Sopenharmony_ci        split->types, split->types_alloc_size, split->num_blocks + 1);
3841cb0ef41Sopenharmony_ci    BROTLI_ENSURE_CAPACITY(m, uint32_t,
3851cb0ef41Sopenharmony_ci        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
3861cb0ef41Sopenharmony_ci    if (BROTLI_IS_OOM(m)) return;
3871cb0ef41Sopenharmony_ci    split->num_types = 1;
3881cb0ef41Sopenharmony_ci    split->types[split->num_blocks] = 0;
3891cb0ef41Sopenharmony_ci    split->lengths[split->num_blocks] = (uint32_t)length;
3901cb0ef41Sopenharmony_ci    split->num_blocks++;
3911cb0ef41Sopenharmony_ci    return;
3921cb0ef41Sopenharmony_ci  }
3931cb0ef41Sopenharmony_ci  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
3941cb0ef41Sopenharmony_ci  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
3951cb0ef41Sopenharmony_ci  /* Find good entropy codes. */
3961cb0ef41Sopenharmony_ci  FN(InitialEntropyCodes)(data, length,
3971cb0ef41Sopenharmony_ci                          sampling_stride_length,
3981cb0ef41Sopenharmony_ci                          num_histograms, histograms);
3991cb0ef41Sopenharmony_ci  FN(RefineEntropyCodes)(data, length,
4001cb0ef41Sopenharmony_ci                         sampling_stride_length,
4011cb0ef41Sopenharmony_ci                         num_histograms, histograms);
4021cb0ef41Sopenharmony_ci  {
4031cb0ef41Sopenharmony_ci    /* Find a good path through literals with the good entropy codes. */
4041cb0ef41Sopenharmony_ci    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
4051cb0ef41Sopenharmony_ci    size_t num_blocks = 0;
4061cb0ef41Sopenharmony_ci    const size_t bitmaplen = (num_histograms + 7) >> 3;
4071cb0ef41Sopenharmony_ci    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
4081cb0ef41Sopenharmony_ci    double* cost = BROTLI_ALLOC(m, double, num_histograms);
4091cb0ef41Sopenharmony_ci    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
4101cb0ef41Sopenharmony_ci    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
4111cb0ef41Sopenharmony_ci    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
4121cb0ef41Sopenharmony_ci    size_t i;
4131cb0ef41Sopenharmony_ci    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
4141cb0ef41Sopenharmony_ci        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
4151cb0ef41Sopenharmony_ci        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
4161cb0ef41Sopenharmony_ci      return;
4171cb0ef41Sopenharmony_ci    }
4181cb0ef41Sopenharmony_ci    for (i = 0; i < iters; ++i) {
4191cb0ef41Sopenharmony_ci      num_blocks = FN(FindBlocks)(data, length,
4201cb0ef41Sopenharmony_ci                                  block_switch_cost,
4211cb0ef41Sopenharmony_ci                                  num_histograms, histograms,
4221cb0ef41Sopenharmony_ci                                  insert_cost, cost, switch_signal,
4231cb0ef41Sopenharmony_ci                                  block_ids);
4241cb0ef41Sopenharmony_ci      num_histograms = FN(RemapBlockIds)(block_ids, length,
4251cb0ef41Sopenharmony_ci                                         new_id, num_histograms);
4261cb0ef41Sopenharmony_ci      FN(BuildBlockHistograms)(data, length, block_ids,
4271cb0ef41Sopenharmony_ci                               num_histograms, histograms);
4281cb0ef41Sopenharmony_ci    }
4291cb0ef41Sopenharmony_ci    BROTLI_FREE(m, insert_cost);
4301cb0ef41Sopenharmony_ci    BROTLI_FREE(m, cost);
4311cb0ef41Sopenharmony_ci    BROTLI_FREE(m, switch_signal);
4321cb0ef41Sopenharmony_ci    BROTLI_FREE(m, new_id);
4331cb0ef41Sopenharmony_ci    BROTLI_FREE(m, histograms);
4341cb0ef41Sopenharmony_ci    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
4351cb0ef41Sopenharmony_ci    if (BROTLI_IS_OOM(m)) return;
4361cb0ef41Sopenharmony_ci    BROTLI_FREE(m, block_ids);
4371cb0ef41Sopenharmony_ci  }
4381cb0ef41Sopenharmony_ci}
4391cb0ef41Sopenharmony_ci
4401cb0ef41Sopenharmony_ci#undef HistogramType
441