11cb0ef41Sopenharmony_ci/* NOLINT(build/header_guard) */ 21cb0ef41Sopenharmony_ci/* Copyright 2013 Google Inc. All Rights Reserved. 31cb0ef41Sopenharmony_ci 41cb0ef41Sopenharmony_ci Distributed under MIT license. 51cb0ef41Sopenharmony_ci See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 61cb0ef41Sopenharmony_ci*/ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci/* template parameters: FN, DataType */ 91cb0ef41Sopenharmony_ci 101cb0ef41Sopenharmony_ci#define HistogramType FN(Histogram) 111cb0ef41Sopenharmony_ci 121cb0ef41Sopenharmony_cistatic void FN(InitialEntropyCodes)(const DataType* data, size_t length, 131cb0ef41Sopenharmony_ci size_t stride, 141cb0ef41Sopenharmony_ci size_t num_histograms, 151cb0ef41Sopenharmony_ci HistogramType* histograms) { 161cb0ef41Sopenharmony_ci uint32_t seed = 7; 171cb0ef41Sopenharmony_ci size_t block_length = length / num_histograms; 181cb0ef41Sopenharmony_ci size_t i; 191cb0ef41Sopenharmony_ci FN(ClearHistograms)(histograms, num_histograms); 201cb0ef41Sopenharmony_ci for (i = 0; i < num_histograms; ++i) { 211cb0ef41Sopenharmony_ci size_t pos = length * i / num_histograms; 221cb0ef41Sopenharmony_ci if (i != 0) { 231cb0ef41Sopenharmony_ci pos += MyRand(&seed) % block_length; 241cb0ef41Sopenharmony_ci } 251cb0ef41Sopenharmony_ci if (pos + stride >= length) { 261cb0ef41Sopenharmony_ci pos = length - stride - 1; 271cb0ef41Sopenharmony_ci } 281cb0ef41Sopenharmony_ci FN(HistogramAddVector)(&histograms[i], data + pos, stride); 291cb0ef41Sopenharmony_ci } 301cb0ef41Sopenharmony_ci} 311cb0ef41Sopenharmony_ci 321cb0ef41Sopenharmony_cistatic void FN(RandomSample)(uint32_t* seed, 331cb0ef41Sopenharmony_ci const DataType* data, 341cb0ef41Sopenharmony_ci size_t length, 351cb0ef41Sopenharmony_ci size_t stride, 361cb0ef41Sopenharmony_ci HistogramType* sample) { 371cb0ef41Sopenharmony_ci size_t pos = 0; 381cb0ef41Sopenharmony_ci if (stride >= length) { 391cb0ef41Sopenharmony_ci stride = length; 401cb0ef41Sopenharmony_ci } else { 411cb0ef41Sopenharmony_ci pos = MyRand(seed) % (length - stride + 1); 421cb0ef41Sopenharmony_ci } 431cb0ef41Sopenharmony_ci FN(HistogramAddVector)(sample, data + pos, stride); 441cb0ef41Sopenharmony_ci} 451cb0ef41Sopenharmony_ci 461cb0ef41Sopenharmony_cistatic void FN(RefineEntropyCodes)(const DataType* data, size_t length, 471cb0ef41Sopenharmony_ci size_t stride, 481cb0ef41Sopenharmony_ci size_t num_histograms, 491cb0ef41Sopenharmony_ci HistogramType* histograms) { 501cb0ef41Sopenharmony_ci size_t iters = 511cb0ef41Sopenharmony_ci kIterMulForRefining * length / stride + kMinItersForRefining; 521cb0ef41Sopenharmony_ci uint32_t seed = 7; 531cb0ef41Sopenharmony_ci size_t iter; 541cb0ef41Sopenharmony_ci iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; 551cb0ef41Sopenharmony_ci for (iter = 0; iter < iters; ++iter) { 561cb0ef41Sopenharmony_ci HistogramType sample; 571cb0ef41Sopenharmony_ci FN(HistogramClear)(&sample); 581cb0ef41Sopenharmony_ci FN(RandomSample)(&seed, data, length, stride, &sample); 591cb0ef41Sopenharmony_ci FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample); 601cb0ef41Sopenharmony_ci } 611cb0ef41Sopenharmony_ci} 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci/* Assigns a block id from the range [0, num_histograms) to each data element 641cb0ef41Sopenharmony_ci in data[0..length) and fills in block_id[0..length) with the assigned values. 651cb0ef41Sopenharmony_ci Returns the number of blocks, i.e. one plus the number of block switches. */ 661cb0ef41Sopenharmony_cistatic size_t FN(FindBlocks)(const DataType* data, const size_t length, 671cb0ef41Sopenharmony_ci const double block_switch_bitcost, 681cb0ef41Sopenharmony_ci const size_t num_histograms, 691cb0ef41Sopenharmony_ci const HistogramType* histograms, 701cb0ef41Sopenharmony_ci double* insert_cost, 711cb0ef41Sopenharmony_ci double* cost, 721cb0ef41Sopenharmony_ci uint8_t* switch_signal, 731cb0ef41Sopenharmony_ci uint8_t* block_id) { 741cb0ef41Sopenharmony_ci const size_t data_size = FN(HistogramDataSize)(); 751cb0ef41Sopenharmony_ci const size_t bitmaplen = (num_histograms + 7) >> 3; 761cb0ef41Sopenharmony_ci size_t num_blocks = 1; 771cb0ef41Sopenharmony_ci size_t i; 781cb0ef41Sopenharmony_ci size_t j; 791cb0ef41Sopenharmony_ci BROTLI_DCHECK(num_histograms <= 256); 801cb0ef41Sopenharmony_ci if (num_histograms <= 1) { 811cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 821cb0ef41Sopenharmony_ci block_id[i] = 0; 831cb0ef41Sopenharmony_ci } 841cb0ef41Sopenharmony_ci return 1; 851cb0ef41Sopenharmony_ci } 861cb0ef41Sopenharmony_ci memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms); 871cb0ef41Sopenharmony_ci for (i = 0; i < num_histograms; ++i) { 881cb0ef41Sopenharmony_ci insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); 891cb0ef41Sopenharmony_ci } 901cb0ef41Sopenharmony_ci for (i = data_size; i != 0;) { 911cb0ef41Sopenharmony_ci --i; 921cb0ef41Sopenharmony_ci for (j = 0; j < num_histograms; ++j) { 931cb0ef41Sopenharmony_ci insert_cost[i * num_histograms + j] = 941cb0ef41Sopenharmony_ci insert_cost[j] - BitCost(histograms[j].data_[i]); 951cb0ef41Sopenharmony_ci } 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci memset(cost, 0, sizeof(cost[0]) * num_histograms); 981cb0ef41Sopenharmony_ci memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen); 991cb0ef41Sopenharmony_ci /* After each iteration of this loop, cost[k] will contain the difference 1001cb0ef41Sopenharmony_ci between the minimum cost of arriving at the current byte position using 1011cb0ef41Sopenharmony_ci entropy code k, and the minimum cost of arriving at the current byte 1021cb0ef41Sopenharmony_ci position. This difference is capped at the block switch cost, and if it 1031cb0ef41Sopenharmony_ci reaches block switch cost, it means that when we trace back from the last 1041cb0ef41Sopenharmony_ci position, we need to switch here. */ 1051cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 1061cb0ef41Sopenharmony_ci const size_t byte_ix = i; 1071cb0ef41Sopenharmony_ci size_t ix = byte_ix * bitmaplen; 1081cb0ef41Sopenharmony_ci size_t insert_cost_ix = data[byte_ix] * num_histograms; 1091cb0ef41Sopenharmony_ci double min_cost = 1e99; 1101cb0ef41Sopenharmony_ci double block_switch_cost = block_switch_bitcost; 1111cb0ef41Sopenharmony_ci size_t k; 1121cb0ef41Sopenharmony_ci for (k = 0; k < num_histograms; ++k) { 1131cb0ef41Sopenharmony_ci /* We are coding the symbol in data[byte_ix] with entropy code k. */ 1141cb0ef41Sopenharmony_ci cost[k] += insert_cost[insert_cost_ix + k]; 1151cb0ef41Sopenharmony_ci if (cost[k] < min_cost) { 1161cb0ef41Sopenharmony_ci min_cost = cost[k]; 1171cb0ef41Sopenharmony_ci block_id[byte_ix] = (uint8_t)k; 1181cb0ef41Sopenharmony_ci } 1191cb0ef41Sopenharmony_ci } 1201cb0ef41Sopenharmony_ci /* More blocks for the beginning. */ 1211cb0ef41Sopenharmony_ci if (byte_ix < 2000) { 1221cb0ef41Sopenharmony_ci block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; 1231cb0ef41Sopenharmony_ci } 1241cb0ef41Sopenharmony_ci for (k = 0; k < num_histograms; ++k) { 1251cb0ef41Sopenharmony_ci cost[k] -= min_cost; 1261cb0ef41Sopenharmony_ci if (cost[k] >= block_switch_cost) { 1271cb0ef41Sopenharmony_ci const uint8_t mask = (uint8_t)(1u << (k & 7)); 1281cb0ef41Sopenharmony_ci cost[k] = block_switch_cost; 1291cb0ef41Sopenharmony_ci BROTLI_DCHECK((k >> 3) < bitmaplen); 1301cb0ef41Sopenharmony_ci switch_signal[ix + (k >> 3)] |= mask; 1311cb0ef41Sopenharmony_ci } 1321cb0ef41Sopenharmony_ci } 1331cb0ef41Sopenharmony_ci } 1341cb0ef41Sopenharmony_ci { /* Trace back from the last position and switch at the marked places. */ 1351cb0ef41Sopenharmony_ci size_t byte_ix = length - 1; 1361cb0ef41Sopenharmony_ci size_t ix = byte_ix * bitmaplen; 1371cb0ef41Sopenharmony_ci uint8_t cur_id = block_id[byte_ix]; 1381cb0ef41Sopenharmony_ci while (byte_ix > 0) { 1391cb0ef41Sopenharmony_ci const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); 1401cb0ef41Sopenharmony_ci BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen); 1411cb0ef41Sopenharmony_ci --byte_ix; 1421cb0ef41Sopenharmony_ci ix -= bitmaplen; 1431cb0ef41Sopenharmony_ci if (switch_signal[ix + (cur_id >> 3)] & mask) { 1441cb0ef41Sopenharmony_ci if (cur_id != block_id[byte_ix]) { 1451cb0ef41Sopenharmony_ci cur_id = block_id[byte_ix]; 1461cb0ef41Sopenharmony_ci ++num_blocks; 1471cb0ef41Sopenharmony_ci } 1481cb0ef41Sopenharmony_ci } 1491cb0ef41Sopenharmony_ci block_id[byte_ix] = cur_id; 1501cb0ef41Sopenharmony_ci } 1511cb0ef41Sopenharmony_ci } 1521cb0ef41Sopenharmony_ci return num_blocks; 1531cb0ef41Sopenharmony_ci} 1541cb0ef41Sopenharmony_ci 1551cb0ef41Sopenharmony_cistatic size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, 1561cb0ef41Sopenharmony_ci uint16_t* new_id, const size_t num_histograms) { 1571cb0ef41Sopenharmony_ci static const uint16_t kInvalidId = 256; 1581cb0ef41Sopenharmony_ci uint16_t next_id = 0; 1591cb0ef41Sopenharmony_ci size_t i; 1601cb0ef41Sopenharmony_ci for (i = 0; i < num_histograms; ++i) { 1611cb0ef41Sopenharmony_ci new_id[i] = kInvalidId; 1621cb0ef41Sopenharmony_ci } 1631cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 1641cb0ef41Sopenharmony_ci BROTLI_DCHECK(block_ids[i] < num_histograms); 1651cb0ef41Sopenharmony_ci if (new_id[block_ids[i]] == kInvalidId) { 1661cb0ef41Sopenharmony_ci new_id[block_ids[i]] = next_id++; 1671cb0ef41Sopenharmony_ci } 1681cb0ef41Sopenharmony_ci } 1691cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 1701cb0ef41Sopenharmony_ci block_ids[i] = (uint8_t)new_id[block_ids[i]]; 1711cb0ef41Sopenharmony_ci BROTLI_DCHECK(block_ids[i] < num_histograms); 1721cb0ef41Sopenharmony_ci } 1731cb0ef41Sopenharmony_ci BROTLI_DCHECK(next_id <= num_histograms); 1741cb0ef41Sopenharmony_ci return next_id; 1751cb0ef41Sopenharmony_ci} 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_cistatic void FN(BuildBlockHistograms)(const DataType* data, const size_t length, 1781cb0ef41Sopenharmony_ci const uint8_t* block_ids, 1791cb0ef41Sopenharmony_ci const size_t num_histograms, 1801cb0ef41Sopenharmony_ci HistogramType* histograms) { 1811cb0ef41Sopenharmony_ci size_t i; 1821cb0ef41Sopenharmony_ci FN(ClearHistograms)(histograms, num_histograms); 1831cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 1841cb0ef41Sopenharmony_ci FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); 1851cb0ef41Sopenharmony_ci } 1861cb0ef41Sopenharmony_ci} 1871cb0ef41Sopenharmony_ci 1881cb0ef41Sopenharmony_cistatic void FN(ClusterBlocks)(MemoryManager* m, 1891cb0ef41Sopenharmony_ci const DataType* data, const size_t length, 1901cb0ef41Sopenharmony_ci const size_t num_blocks, 1911cb0ef41Sopenharmony_ci uint8_t* block_ids, 1921cb0ef41Sopenharmony_ci BlockSplit* split) { 1931cb0ef41Sopenharmony_ci uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); 1941cb0ef41Sopenharmony_ci uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks); 1951cb0ef41Sopenharmony_ci const size_t expected_num_clusters = CLUSTERS_PER_BATCH * 1961cb0ef41Sopenharmony_ci (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; 1971cb0ef41Sopenharmony_ci size_t all_histograms_size = 0; 1981cb0ef41Sopenharmony_ci size_t all_histograms_capacity = expected_num_clusters; 1991cb0ef41Sopenharmony_ci HistogramType* all_histograms = 2001cb0ef41Sopenharmony_ci BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); 2011cb0ef41Sopenharmony_ci size_t cluster_size_size = 0; 2021cb0ef41Sopenharmony_ci size_t cluster_size_capacity = expected_num_clusters; 2031cb0ef41Sopenharmony_ci uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); 2041cb0ef41Sopenharmony_ci size_t num_clusters = 0; 2051cb0ef41Sopenharmony_ci HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, 2061cb0ef41Sopenharmony_ci BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); 2071cb0ef41Sopenharmony_ci size_t max_num_pairs = 2081cb0ef41Sopenharmony_ci HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; 2091cb0ef41Sopenharmony_ci size_t pairs_capacity = max_num_pairs + 1; 2101cb0ef41Sopenharmony_ci HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); 2111cb0ef41Sopenharmony_ci size_t pos = 0; 2121cb0ef41Sopenharmony_ci uint32_t* clusters; 2131cb0ef41Sopenharmony_ci size_t num_final_clusters; 2141cb0ef41Sopenharmony_ci static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; 2151cb0ef41Sopenharmony_ci uint32_t* new_index; 2161cb0ef41Sopenharmony_ci size_t i; 2171cb0ef41Sopenharmony_ci uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 }; 2181cb0ef41Sopenharmony_ci uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 }; 2191cb0ef41Sopenharmony_ci uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 }; 2201cb0ef41Sopenharmony_ci uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 }; 2211cb0ef41Sopenharmony_ci 2221cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) || 2231cb0ef41Sopenharmony_ci BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) || 2241cb0ef41Sopenharmony_ci BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) || 2251cb0ef41Sopenharmony_ci BROTLI_IS_NULL(pairs)) { 2261cb0ef41Sopenharmony_ci return; 2271cb0ef41Sopenharmony_ci } 2281cb0ef41Sopenharmony_ci 2291cb0ef41Sopenharmony_ci memset(block_lengths, 0, num_blocks * sizeof(uint32_t)); 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_ci { 2321cb0ef41Sopenharmony_ci size_t block_idx = 0; 2331cb0ef41Sopenharmony_ci for (i = 0; i < length; ++i) { 2341cb0ef41Sopenharmony_ci BROTLI_DCHECK(block_idx < num_blocks); 2351cb0ef41Sopenharmony_ci ++block_lengths[block_idx]; 2361cb0ef41Sopenharmony_ci if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { 2371cb0ef41Sopenharmony_ci ++block_idx; 2381cb0ef41Sopenharmony_ci } 2391cb0ef41Sopenharmony_ci } 2401cb0ef41Sopenharmony_ci BROTLI_DCHECK(block_idx == num_blocks); 2411cb0ef41Sopenharmony_ci } 2421cb0ef41Sopenharmony_ci 2431cb0ef41Sopenharmony_ci for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { 2441cb0ef41Sopenharmony_ci const size_t num_to_combine = 2451cb0ef41Sopenharmony_ci BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); 2461cb0ef41Sopenharmony_ci size_t num_new_clusters; 2471cb0ef41Sopenharmony_ci size_t j; 2481cb0ef41Sopenharmony_ci for (j = 0; j < num_to_combine; ++j) { 2491cb0ef41Sopenharmony_ci size_t k; 2501cb0ef41Sopenharmony_ci FN(HistogramClear)(&histograms[j]); 2511cb0ef41Sopenharmony_ci for (k = 0; k < block_lengths[i + j]; ++k) { 2521cb0ef41Sopenharmony_ci FN(HistogramAdd)(&histograms[j], data[pos++]); 2531cb0ef41Sopenharmony_ci } 2541cb0ef41Sopenharmony_ci histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); 2551cb0ef41Sopenharmony_ci new_clusters[j] = (uint32_t)j; 2561cb0ef41Sopenharmony_ci symbols[j] = (uint32_t)j; 2571cb0ef41Sopenharmony_ci sizes[j] = 1; 2581cb0ef41Sopenharmony_ci } 2591cb0ef41Sopenharmony_ci num_new_clusters = FN(BrotliHistogramCombine)( 2601cb0ef41Sopenharmony_ci histograms, sizes, symbols, new_clusters, pairs, num_to_combine, 2611cb0ef41Sopenharmony_ci num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); 2621cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, 2631cb0ef41Sopenharmony_ci all_histograms_capacity, all_histograms_size + num_new_clusters); 2641cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, 2651cb0ef41Sopenharmony_ci cluster_size_capacity, cluster_size_size + num_new_clusters); 2661cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m)) return; 2671cb0ef41Sopenharmony_ci for (j = 0; j < num_new_clusters; ++j) { 2681cb0ef41Sopenharmony_ci all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; 2691cb0ef41Sopenharmony_ci cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; 2701cb0ef41Sopenharmony_ci remap[new_clusters[j]] = (uint32_t)j; 2711cb0ef41Sopenharmony_ci } 2721cb0ef41Sopenharmony_ci for (j = 0; j < num_to_combine; ++j) { 2731cb0ef41Sopenharmony_ci histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; 2741cb0ef41Sopenharmony_ci } 2751cb0ef41Sopenharmony_ci num_clusters += num_new_clusters; 2761cb0ef41Sopenharmony_ci BROTLI_DCHECK(num_clusters == cluster_size_size); 2771cb0ef41Sopenharmony_ci BROTLI_DCHECK(num_clusters == all_histograms_size); 2781cb0ef41Sopenharmony_ci } 2791cb0ef41Sopenharmony_ci BROTLI_FREE(m, histograms); 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ci max_num_pairs = 2821cb0ef41Sopenharmony_ci BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); 2831cb0ef41Sopenharmony_ci if (pairs_capacity < max_num_pairs + 1) { 2841cb0ef41Sopenharmony_ci BROTLI_FREE(m, pairs); 2851cb0ef41Sopenharmony_ci pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); 2861cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return; 2871cb0ef41Sopenharmony_ci } 2881cb0ef41Sopenharmony_ci 2891cb0ef41Sopenharmony_ci clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); 2901cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return; 2911cb0ef41Sopenharmony_ci for (i = 0; i < num_clusters; ++i) { 2921cb0ef41Sopenharmony_ci clusters[i] = (uint32_t)i; 2931cb0ef41Sopenharmony_ci } 2941cb0ef41Sopenharmony_ci num_final_clusters = FN(BrotliHistogramCombine)( 2951cb0ef41Sopenharmony_ci all_histograms, cluster_size, histogram_symbols, clusters, pairs, 2961cb0ef41Sopenharmony_ci num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, 2971cb0ef41Sopenharmony_ci max_num_pairs); 2981cb0ef41Sopenharmony_ci BROTLI_FREE(m, pairs); 2991cb0ef41Sopenharmony_ci BROTLI_FREE(m, cluster_size); 3001cb0ef41Sopenharmony_ci 3011cb0ef41Sopenharmony_ci new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); 3021cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return; 3031cb0ef41Sopenharmony_ci for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; 3041cb0ef41Sopenharmony_ci pos = 0; 3051cb0ef41Sopenharmony_ci { 3061cb0ef41Sopenharmony_ci uint32_t next_index = 0; 3071cb0ef41Sopenharmony_ci for (i = 0; i < num_blocks; ++i) { 3081cb0ef41Sopenharmony_ci HistogramType histo; 3091cb0ef41Sopenharmony_ci size_t j; 3101cb0ef41Sopenharmony_ci uint32_t best_out; 3111cb0ef41Sopenharmony_ci double best_bits; 3121cb0ef41Sopenharmony_ci FN(HistogramClear)(&histo); 3131cb0ef41Sopenharmony_ci for (j = 0; j < block_lengths[i]; ++j) { 3141cb0ef41Sopenharmony_ci FN(HistogramAdd)(&histo, data[pos++]); 3151cb0ef41Sopenharmony_ci } 3161cb0ef41Sopenharmony_ci best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; 3171cb0ef41Sopenharmony_ci best_bits = 3181cb0ef41Sopenharmony_ci FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]); 3191cb0ef41Sopenharmony_ci for (j = 0; j < num_final_clusters; ++j) { 3201cb0ef41Sopenharmony_ci const double cur_bits = FN(BrotliHistogramBitCostDistance)( 3211cb0ef41Sopenharmony_ci &histo, &all_histograms[clusters[j]]); 3221cb0ef41Sopenharmony_ci if (cur_bits < best_bits) { 3231cb0ef41Sopenharmony_ci best_bits = cur_bits; 3241cb0ef41Sopenharmony_ci best_out = clusters[j]; 3251cb0ef41Sopenharmony_ci } 3261cb0ef41Sopenharmony_ci } 3271cb0ef41Sopenharmony_ci histogram_symbols[i] = best_out; 3281cb0ef41Sopenharmony_ci if (new_index[best_out] == kInvalidIndex) { 3291cb0ef41Sopenharmony_ci new_index[best_out] = next_index++; 3301cb0ef41Sopenharmony_ci } 3311cb0ef41Sopenharmony_ci } 3321cb0ef41Sopenharmony_ci } 3331cb0ef41Sopenharmony_ci BROTLI_FREE(m, clusters); 3341cb0ef41Sopenharmony_ci BROTLI_FREE(m, all_histograms); 3351cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY( 3361cb0ef41Sopenharmony_ci m, uint8_t, split->types, split->types_alloc_size, num_blocks); 3371cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY( 3381cb0ef41Sopenharmony_ci m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); 3391cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m)) return; 3401cb0ef41Sopenharmony_ci { 3411cb0ef41Sopenharmony_ci uint32_t cur_length = 0; 3421cb0ef41Sopenharmony_ci size_t block_idx = 0; 3431cb0ef41Sopenharmony_ci uint8_t max_type = 0; 3441cb0ef41Sopenharmony_ci for (i = 0; i < num_blocks; ++i) { 3451cb0ef41Sopenharmony_ci cur_length += block_lengths[i]; 3461cb0ef41Sopenharmony_ci if (i + 1 == num_blocks || 3471cb0ef41Sopenharmony_ci histogram_symbols[i] != histogram_symbols[i + 1]) { 3481cb0ef41Sopenharmony_ci const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; 3491cb0ef41Sopenharmony_ci split->types[block_idx] = id; 3501cb0ef41Sopenharmony_ci split->lengths[block_idx] = cur_length; 3511cb0ef41Sopenharmony_ci max_type = BROTLI_MAX(uint8_t, max_type, id); 3521cb0ef41Sopenharmony_ci cur_length = 0; 3531cb0ef41Sopenharmony_ci ++block_idx; 3541cb0ef41Sopenharmony_ci } 3551cb0ef41Sopenharmony_ci } 3561cb0ef41Sopenharmony_ci split->num_blocks = block_idx; 3571cb0ef41Sopenharmony_ci split->num_types = (size_t)max_type + 1; 3581cb0ef41Sopenharmony_ci } 3591cb0ef41Sopenharmony_ci BROTLI_FREE(m, new_index); 3601cb0ef41Sopenharmony_ci BROTLI_FREE(m, block_lengths); 3611cb0ef41Sopenharmony_ci BROTLI_FREE(m, histogram_symbols); 3621cb0ef41Sopenharmony_ci} 3631cb0ef41Sopenharmony_ci 3641cb0ef41Sopenharmony_cistatic void FN(SplitByteVector)(MemoryManager* m, 3651cb0ef41Sopenharmony_ci const DataType* data, const size_t length, 3661cb0ef41Sopenharmony_ci const size_t literals_per_histogram, 3671cb0ef41Sopenharmony_ci const size_t max_histograms, 3681cb0ef41Sopenharmony_ci const size_t sampling_stride_length, 3691cb0ef41Sopenharmony_ci const double block_switch_cost, 3701cb0ef41Sopenharmony_ci const BrotliEncoderParams* params, 3711cb0ef41Sopenharmony_ci BlockSplit* split) { 3721cb0ef41Sopenharmony_ci const size_t data_size = FN(HistogramDataSize)(); 3731cb0ef41Sopenharmony_ci size_t num_histograms = length / literals_per_histogram + 1; 3741cb0ef41Sopenharmony_ci HistogramType* histograms; 3751cb0ef41Sopenharmony_ci if (num_histograms > max_histograms) { 3761cb0ef41Sopenharmony_ci num_histograms = max_histograms; 3771cb0ef41Sopenharmony_ci } 3781cb0ef41Sopenharmony_ci if (length == 0) { 3791cb0ef41Sopenharmony_ci split->num_types = 1; 3801cb0ef41Sopenharmony_ci return; 3811cb0ef41Sopenharmony_ci } else if (length < kMinLengthForBlockSplitting) { 3821cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY(m, uint8_t, 3831cb0ef41Sopenharmony_ci split->types, split->types_alloc_size, split->num_blocks + 1); 3841cb0ef41Sopenharmony_ci BROTLI_ENSURE_CAPACITY(m, uint32_t, 3851cb0ef41Sopenharmony_ci split->lengths, split->lengths_alloc_size, split->num_blocks + 1); 3861cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m)) return; 3871cb0ef41Sopenharmony_ci split->num_types = 1; 3881cb0ef41Sopenharmony_ci split->types[split->num_blocks] = 0; 3891cb0ef41Sopenharmony_ci split->lengths[split->num_blocks] = (uint32_t)length; 3901cb0ef41Sopenharmony_ci split->num_blocks++; 3911cb0ef41Sopenharmony_ci return; 3921cb0ef41Sopenharmony_ci } 3931cb0ef41Sopenharmony_ci histograms = BROTLI_ALLOC(m, HistogramType, num_histograms); 3941cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return; 3951cb0ef41Sopenharmony_ci /* Find good entropy codes. */ 3961cb0ef41Sopenharmony_ci FN(InitialEntropyCodes)(data, length, 3971cb0ef41Sopenharmony_ci sampling_stride_length, 3981cb0ef41Sopenharmony_ci num_histograms, histograms); 3991cb0ef41Sopenharmony_ci FN(RefineEntropyCodes)(data, length, 4001cb0ef41Sopenharmony_ci sampling_stride_length, 4011cb0ef41Sopenharmony_ci num_histograms, histograms); 4021cb0ef41Sopenharmony_ci { 4031cb0ef41Sopenharmony_ci /* Find a good path through literals with the good entropy codes. */ 4041cb0ef41Sopenharmony_ci uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); 4051cb0ef41Sopenharmony_ci size_t num_blocks = 0; 4061cb0ef41Sopenharmony_ci const size_t bitmaplen = (num_histograms + 7) >> 3; 4071cb0ef41Sopenharmony_ci double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); 4081cb0ef41Sopenharmony_ci double* cost = BROTLI_ALLOC(m, double, num_histograms); 4091cb0ef41Sopenharmony_ci uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); 4101cb0ef41Sopenharmony_ci uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); 4111cb0ef41Sopenharmony_ci const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; 4121cb0ef41Sopenharmony_ci size_t i; 4131cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) || 4141cb0ef41Sopenharmony_ci BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) || 4151cb0ef41Sopenharmony_ci BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) { 4161cb0ef41Sopenharmony_ci return; 4171cb0ef41Sopenharmony_ci } 4181cb0ef41Sopenharmony_ci for (i = 0; i < iters; ++i) { 4191cb0ef41Sopenharmony_ci num_blocks = FN(FindBlocks)(data, length, 4201cb0ef41Sopenharmony_ci block_switch_cost, 4211cb0ef41Sopenharmony_ci num_histograms, histograms, 4221cb0ef41Sopenharmony_ci insert_cost, cost, switch_signal, 4231cb0ef41Sopenharmony_ci block_ids); 4241cb0ef41Sopenharmony_ci num_histograms = FN(RemapBlockIds)(block_ids, length, 4251cb0ef41Sopenharmony_ci new_id, num_histograms); 4261cb0ef41Sopenharmony_ci FN(BuildBlockHistograms)(data, length, block_ids, 4271cb0ef41Sopenharmony_ci num_histograms, histograms); 4281cb0ef41Sopenharmony_ci } 4291cb0ef41Sopenharmony_ci BROTLI_FREE(m, insert_cost); 4301cb0ef41Sopenharmony_ci BROTLI_FREE(m, cost); 4311cb0ef41Sopenharmony_ci BROTLI_FREE(m, switch_signal); 4321cb0ef41Sopenharmony_ci BROTLI_FREE(m, new_id); 4331cb0ef41Sopenharmony_ci BROTLI_FREE(m, histograms); 4341cb0ef41Sopenharmony_ci FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); 4351cb0ef41Sopenharmony_ci if (BROTLI_IS_OOM(m)) return; 4361cb0ef41Sopenharmony_ci BROTLI_FREE(m, block_ids); 4371cb0ef41Sopenharmony_ci } 4381cb0ef41Sopenharmony_ci} 4391cb0ef41Sopenharmony_ci 4401cb0ef41Sopenharmony_ci#undef HistogramType 441