1/* trees.c -- output deflated data using Huffman coding 2 * Copyright (C) 1995-2024 Jean-loup Gailly 3 * detect_data_type() function provided freely by Cosmin Truta, 2006 4 * For conditions of distribution and use, see copyright notice in zlib.h 5 */ 6 7/* 8 * ALGORITHM 9 * 10 * The "deflation" process uses several Huffman trees. The more 11 * common source values are represented by shorter bit sequences. 12 * 13 * Each code tree is stored in a compressed form which is itself 14 * a Huffman encoding of the lengths of all the code strings (in 15 * ascending order by source values). The actual code strings are 16 * reconstructed from the lengths in the inflate process, as described 17 * in the deflate specification. 18 * 19 * REFERENCES 20 * 21 * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". 22 * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc 23 * 24 * Storer, James A. 25 * Data Compression: Methods and Theory, pp. 49-50. 26 * Computer Science Press, 1988. ISBN 0-7167-8156-5. 27 * 28 * Sedgewick, R. 29 * Algorithms, p290. 30 * Addison-Wesley, 1983. ISBN 0-201-06672-6. 31 */ 32 33/* @(#) $Id$ */ 34 35/* #define GEN_TREES_H */ 36 37#include "deflate.h" 38 39#ifdef ZLIB_DEBUG 40# include <ctype.h> 41#endif 42 43/* =========================================================================== 44 * Constants 45 */ 46 47#define MAX_BL_BITS 7 48/* Bit length codes must not exceed MAX_BL_BITS bits */ 49 50#define END_BLOCK 256 51/* end of block literal code */ 52 53#define REP_3_6 16 54/* repeat previous bit length 3-6 times (2 bits of repeat count) */ 55 56#define REPZ_3_10 17 57/* repeat a zero length 3-10 times (3 bits of repeat count) */ 58 59#define REPZ_11_138 18 60/* repeat a zero length 11-138 times (7 bits of repeat count) */ 61 62local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ 63 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; 64 65local const int extra_dbits[D_CODES] /* extra bits for each distance code */ 66 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; 67 68local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ 69 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; 70 71local const uch bl_order[BL_CODES] 72 = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; 73/* The lengths of the bit length codes are sent in order of decreasing 74 * probability, to avoid transmitting the lengths for unused bit length codes. 75 */ 76 77/* =========================================================================== 78 * Local data. These are initialized only once. 79 */ 80 81#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ 82 83#if defined(GEN_TREES_H) || !defined(STDC) 84/* non ANSI compilers may not accept trees.h */ 85 86local ct_data static_ltree[L_CODES+2]; 87/* The static literal tree. Since the bit lengths are imposed, there is no 88 * need for the L_CODES extra codes used during heap construction. However 89 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init 90 * below). 91 */ 92 93local ct_data static_dtree[D_CODES]; 94/* The static distance tree. (Actually a trivial tree since all codes use 95 * 5 bits.) 96 */ 97 98uch _dist_code[DIST_CODE_LEN]; 99/* Distance codes. The first 256 values correspond to the distances 100 * 3 .. 258, the last 256 values correspond to the top 8 bits of 101 * the 15 bit distances. 102 */ 103 104uch _length_code[MAX_MATCH-MIN_MATCH+1]; 105/* length code for each normalized match length (0 == MIN_MATCH) */ 106 107local int base_length[LENGTH_CODES]; 108/* First normalized length for each code (0 = MIN_MATCH) */ 109 110local int base_dist[D_CODES]; 111/* First normalized distance for each code (0 = distance of 1) */ 112 113#else 114# include "trees.h" 115#endif /* GEN_TREES_H */ 116 117struct static_tree_desc_s { 118 const ct_data *static_tree; /* static tree or NULL */ 119 const intf *extra_bits; /* extra bits for each code or NULL */ 120 int extra_base; /* base index for extra_bits */ 121 int elems; /* max number of elements in the tree */ 122 int max_length; /* max bit length for the codes */ 123}; 124 125#ifdef NO_INIT_GLOBAL_POINTERS 126# define TCONST 127#else 128# define TCONST const 129#endif 130 131local TCONST static_tree_desc static_l_desc = 132{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; 133 134local TCONST static_tree_desc static_d_desc = 135{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; 136 137local TCONST static_tree_desc static_bl_desc = 138{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; 139 140/* =========================================================================== 141 * Output a short LSB first on the stream. 142 * IN assertion: there is enough room in pendingBuf. 143 */ 144#define put_short(s, w) { \ 145 put_byte(s, (uch)((w) & 0xff)); \ 146 put_byte(s, (uch)((ush)(w) >> 8)); \ 147} 148 149#define INDEX_2 2 150#define INDEX_7 7 151#define INDEX_8 8 152#define INDEX_9 9 153#define INDEX_13 13 154#define INDEX_16 16 155#define INDEX_31 31 156#define INDEX_32 32 157 158/* =========================================================================== 159 * Reverse the first len bits of a code, using straightforward code (a faster 160 * method would use a table) 161 * IN assertion: 1 <= len <= 15 162 */ 163local unsigned bi_reverse(unsigned code, int len) 164{ 165 register unsigned res = 0; 166 do { 167 res |= code & 1; 168 code >>= 1, res <<= 1; 169 } while (--len > 0); 170 return res >> 1; 171} 172 173/* =========================================================================== 174 * Flush the bit buffer, keeping at most 7 bits in it. 175 */ 176local void bi_flush(deflate_state *s) 177{ 178 if (s->bi_valid == INDEX_16) { 179 put_short(s, s->bi_buf); 180 s->bi_buf = 0; 181 s->bi_valid = 0; 182 } else if (s->bi_valid >= INDEX_8) { 183 put_byte(s, (Byte)s->bi_buf); 184 s->bi_buf >>= INDEX_8; 185 s->bi_valid -= INDEX_8; 186 } 187} 188 189/* =========================================================================== 190 * Flush the bit buffer and align the output on a byte boundary 191 */ 192local void bi_windup(deflate_state *s) 193{ 194 if (s->bi_valid > INDEX_8) { 195 put_short(s, s->bi_buf); 196 } else if (s->bi_valid > 0) { 197 put_byte(s, (Byte)s->bi_buf); 198 } 199 s->bi_buf = 0; 200 s->bi_valid = 0; 201#ifdef ZLIB_DEBUG 202 s->bits_sent = (s->bits_sent + INDEX_7) & ~INDEX_7; 203#endif 204} 205 206/* =========================================================================== 207 * Generate the codes for a given tree and bit counts (which need not be 208 * optimal). 209 * IN assertion: the array bl_count contains the bit length statistics for 210 * the given tree and the field len is set for all tree elements. 211 * OUT assertion: the field code is set for all tree elements of non 212 * zero code length. 213 */ 214local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) 215{ 216 ush next_code[MAX_BITS+1]; /* next code value for each bit length */ 217 unsigned code = 0; /* running code value */ 218 int bits; /* bit index */ 219 int n; /* code index */ 220 221 /* The distribution counts are first used to generate the code values 222 * without bit reversal. 223 */ 224 for (bits = 1; bits <= MAX_BITS; bits++) { 225 code = (code + bl_count[bits - 1]) << 1; 226 next_code[bits] = (ush)code; 227 } 228 /* Check that the bit counts in bl_count are consistent. The last code 229 * must be all ones. 230 */ 231 Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, 232 "inconsistent bit counts"); 233 Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); 234 235 for (n = 0; n <= max_code; n++) { 236 int len = tree[n].Len; 237 if (len == 0) continue; 238 /* Now reverse the bits */ 239 tree[n].Code = (ush)bi_reverse(next_code[len]++, len); 240 241 Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", 242 n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1)); 243 } 244} 245 246#ifdef GEN_TREES_H 247local void gen_trees_header(void); 248#endif 249 250#ifndef ZLIB_DEBUG 251# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) 252 /* Send a code of the given tree. c and tree must not have side effects */ 253 254#else /* !ZLIB_DEBUG */ 255# define send_code(s, c, tree) \ 256 { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ 257 send_bits(s, tree[c].Code, tree[c].Len); } 258#endif 259 260/* =========================================================================== 261 * Send a value on a given number of bits. 262 * IN assertion: length <= 16 and value fits in length bits. 263 */ 264#ifdef ZLIB_DEBUG 265local void send_bits(deflate_state *s, int value, int length) 266{ 267 Tracevv((stderr," l %2d v %4x ", length, value)); 268 Assert(length > 0 && length <= 15, "invalid length"); 269 s->bits_sent += (ulg)length; 270 271 /* If not enough room in bi_buf, use (valid) bits from bi_buf and 272 * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid)) 273 * unused bits in value. 274 */ 275 if (s->bi_valid > (int)Buf_size - length) { 276 s->bi_buf |= (ush)value << s->bi_valid; 277 put_short(s, s->bi_buf); 278 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); 279 s->bi_valid += length - Buf_size; 280 } else { 281 s->bi_buf |= (ush)value << s->bi_valid; 282 s->bi_valid += length; 283 } 284} 285#else /* !ZLIB_DEBUG */ 286 287#define send_bits(s, value, length) \ 288{ int len = length;\ 289 if (s->bi_valid > (int)Buf_size - len) {\ 290 int val = (int)value;\ 291 s->bi_buf |= (ush)val << s->bi_valid;\ 292 put_short(s, s->bi_buf);\ 293 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ 294 s->bi_valid += len - Buf_size;\ 295 } else {\ 296 s->bi_buf |= (ush)(value) << s->bi_valid;\ 297 s->bi_valid += len;\ 298 }\ 299} 300#endif /* ZLIB_DEBUG */ 301 302 303/* the arguments must not have side effects */ 304 305/* =========================================================================== 306 * Initialize the various 'constant' tables. 307 */ 308local void tr_static_init(void) 309{ 310#if defined(GEN_TREES_H) || !defined(STDC) 311 static int static_init_done = 0; 312 int n; /* iterates over tree elements */ 313 int bits; /* bit counter */ 314 int length; /* length value */ 315 int code; /* code value */ 316 int dist; /* distance index */ 317 ush bl_count[MAX_BITS+1]; 318 /* number of codes at each bit length for an optimal tree */ 319 320 if (static_init_done) return; 321 322 /* For some embedded targets, global variables are not initialized: */ 323#ifdef NO_INIT_GLOBAL_POINTERS 324 static_l_desc.static_tree = static_ltree; 325 static_l_desc.extra_bits = extra_lbits; 326 static_d_desc.static_tree = static_dtree; 327 static_d_desc.extra_bits = extra_dbits; 328 static_bl_desc.extra_bits = extra_blbits; 329#endif 330 331 /* Initialize the mapping length (0..255) -> length code (0..28) */ 332 length = 0; 333 for (code = 0; code < LENGTH_CODES-1; code++) { 334 base_length[code] = length; 335 for (n = 0; n < (1 << extra_lbits[code]); n++) { 336 _length_code[length++] = (uch)code; 337 } 338 } 339 Assert (length == 256, "tr_static_init: length != 256"); 340 /* Note that the length 255 (match length 258) can be represented 341 * in two different ways: code 284 + 5 bits or code 285, so we 342 * overwrite length_code[255] to use the best encoding: 343 */ 344 _length_code[length - 1] = (uch)code; 345 346 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ 347 dist = 0; 348 for (code = 0 ; code < 16; code++) { 349 base_dist[code] = dist; 350 for (n = 0; n < (1 << extra_dbits[code]); n++) { 351 _dist_code[dist++] = (uch)code; 352 } 353 } 354 Assert (dist == 256, "tr_static_init: dist != 256"); 355 dist >>= 7; /* from now on, all distances are divided by 128 */ 356 for ( ; code < D_CODES; code++) { 357 base_dist[code] = dist << 7; 358 for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { 359 _dist_code[256 + dist++] = (uch)code; 360 } 361 } 362 Assert (dist == 256, "tr_static_init: 256 + dist != 512"); 363 364 /* Construct the codes of the static literal tree */ 365 for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; 366 n = 0; 367 while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; 368 while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; 369 while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; 370 while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; 371 /* Codes 286 and 287 do not exist, but we must include them in the 372 * tree construction to get a canonical Huffman tree (longest code 373 * all ones) 374 */ 375 gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); 376 377 /* The static distance tree is trivial: */ 378 for (n = 0; n < D_CODES; n++) { 379 static_dtree[n].Len = 5; 380 static_dtree[n].Code = bi_reverse((unsigned)n, 5); 381 } 382 static_init_done = 1; 383 384# ifdef GEN_TREES_H 385 gen_trees_header(); 386# endif 387#endif /* defined(GEN_TREES_H) || !defined(STDC) */ 388} 389 390/* =========================================================================== 391 * Generate the file trees.h describing the static trees. 392 */ 393#ifdef GEN_TREES_H 394# ifndef ZLIB_DEBUG 395# include <stdio.h> 396# endif 397 398# define SEPARATOR(i, last, width) \ 399 ((i) == (last)? "\n};\n\n" : \ 400 ((i) % (width) == (width) - 1 ? ",\n" : ", ")) 401 402void gen_trees_header(void) 403{ 404 FILE *header = fopen("trees.h", "w"); 405 int i; 406 407 Assert (header != NULL, "Can't open trees.h"); 408 fprintf(header, 409 "/* header created automatically with -DGEN_TREES_H */\n\n"); 410 411 fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); 412 for (i = 0; i < L_CODES+2; i++) { 413 fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, 414 static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); 415 } 416 417 fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); 418 for (i = 0; i < D_CODES; i++) { 419 fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, 420 static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); 421 } 422 423 fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); 424 for (i = 0; i < DIST_CODE_LEN; i++) { 425 fprintf(header, "%2u%s", _dist_code[i], 426 SEPARATOR(i, DIST_CODE_LEN-1, 20)); 427 } 428 429 fprintf(header, 430 "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); 431 for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { 432 fprintf(header, "%2u%s", _length_code[i], 433 SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); 434 } 435 436 fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); 437 for (i = 0; i < LENGTH_CODES; i++) { 438 fprintf(header, "%1u%s", base_length[i], 439 SEPARATOR(i, LENGTH_CODES-1, 20)); 440 } 441 442 fprintf(header, "local const int base_dist[D_CODES] = {\n"); 443 for (i = 0; i < D_CODES; i++) { 444 fprintf(header, "%5u%s", base_dist[i], 445 SEPARATOR(i, D_CODES-1, 10)); 446 } 447 448 fclose(header); 449} 450#endif /* GEN_TREES_H */ 451 452/* =========================================================================== 453 * Initialize a new block. 454 */ 455local void init_block(deflate_state *s) 456{ 457 int n; /* iterates over tree elements */ 458 459 /* Initialize the trees. */ 460 for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; 461 for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; 462 for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; 463 464 s->dyn_ltree[END_BLOCK].Freq = 1; 465 s->opt_len = s->static_len = 0L; 466 s->sym_next = s->matches = 0; 467} 468 469/* =========================================================================== 470 * Initialize the tree data structures for a new zlib stream. 471 */ 472void ZLIB_INTERNAL _tr_init(deflate_state *s) { 473 tr_static_init(); 474 475 s->l_desc.dyn_tree = s->dyn_ltree; 476 s->l_desc.stat_desc = &static_l_desc; 477 478 s->d_desc.dyn_tree = s->dyn_dtree; 479 s->d_desc.stat_desc = &static_d_desc; 480 481 s->bl_desc.dyn_tree = s->bl_tree; 482 s->bl_desc.stat_desc = &static_bl_desc; 483 484 s->bi_buf = 0; 485 s->bi_valid = 0; 486#ifdef ZLIB_DEBUG 487 s->compressed_len = 0L; 488 s->bits_sent = 0L; 489#endif 490 491 /* Initialize the first block of the first file: */ 492 init_block(s); 493} 494 495#define SMALLEST 1 496/* Index within the heap array of least frequent node in the Huffman tree */ 497 498 499/* =========================================================================== 500 * Remove the smallest element from the heap and recreate the heap with 501 * one less element. Updates heap and heap_len. 502 */ 503#define pqremove(s, tree, top) \ 504{\ 505 top = s->heap[SMALLEST]; \ 506 s->heap[SMALLEST] = s->heap[s->heap_len--]; \ 507 pqdownheap(s, tree, SMALLEST); \ 508} 509 510/* =========================================================================== 511 * Compares to subtrees, using the tree depth as tie breaker when 512 * the subtrees have equal frequency. This minimizes the worst case length. 513 */ 514#define smaller(tree, n, m, depth) \ 515 (tree[n].Freq < tree[m].Freq || \ 516 (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) 517 518/* =========================================================================== 519 * Restore the heap property by moving down the tree starting at node k, 520 * exchanging a node with the smallest of its two sons if necessary, stopping 521 * when the heap property is re-established (each father smaller than its 522 * two sons). 523 */ 524local void pqdownheap(deflate_state *s, ct_data *tree, int k) 525{ 526 int v = s->heap[k]; 527 int j = k << 1; /* left son of k */ 528 while (j <= s->heap_len) { 529 /* Set j to the smallest of the two sons: */ 530 if (j < s->heap_len && 531 smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) { 532 j++; 533 } 534 /* Exit if v is smaller than both sons */ 535 if (smaller(tree, v, s->heap[j], s->depth)) break; 536 537 /* Exchange v with the smallest son */ 538 s->heap[k] = s->heap[j]; k = j; 539 540 /* And continue down the tree, setting j to the left son of k */ 541 j <<= 1; 542 } 543 s->heap[k] = v; 544} 545 546/* =========================================================================== 547 * Compute the optimal bit lengths for a tree and update the total bit length 548 * for the current block. 549 * IN assertion: the fields freq and dad are set, heap[heap_max] and 550 * above are the tree nodes sorted by increasing frequency. 551 * OUT assertions: the field len is set to the optimal bit length, the 552 * array bl_count contains the frequencies for each bit length. 553 * The length opt_len is updated; static_len is also updated if stree is 554 * not null. 555 */ 556local void gen_bitlen(deflate_state *s, tree_desc *desc) 557{ 558 ct_data *tree = desc->dyn_tree; 559 int max_code = desc->max_code; 560 const ct_data *stree = desc->stat_desc->static_tree; 561 const intf *extra = desc->stat_desc->extra_bits; 562 int base = desc->stat_desc->extra_base; 563 int max_length = desc->stat_desc->max_length; 564 int h; /* heap index */ 565 int n, m; /* iterate over the tree elements */ 566 int bits; /* bit length */ 567 int xbits; /* extra bits */ 568 ush f; /* frequency */ 569 int overflow = 0; /* number of elements with bit length too large */ 570 571 for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; 572 573 /* In a first pass, compute the optimal bit lengths (which may 574 * overflow in the case of the bit length tree). 575 */ 576 tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ 577 578 for (h = s->heap_max + 1; h < HEAP_SIZE; h++) { 579 n = s->heap[h]; 580 bits = tree[tree[n].Dad].Len + 1; 581 if (bits > max_length) bits = max_length, overflow++; 582 tree[n].Len = (ush)bits; 583 /* We overwrite tree[n].Dad which is no longer needed */ 584 585 if (n > max_code) continue; /* not a leaf node */ 586 587 s->bl_count[bits]++; 588 xbits = 0; 589 if (n >= base) xbits = extra[n - base]; 590 f = tree[n].Freq; 591 s->opt_len += (ulg)f * (unsigned)(bits + xbits); 592 if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); 593 } 594 if (overflow == 0) return; 595 596 Tracev((stderr,"\nbit length overflow\n")); 597 /* This happens for example on obj2 and pic of the Calgary corpus */ 598 599 /* Find the first bit length which could increase: */ 600 do { 601 bits = max_length - 1; 602 while (s->bl_count[bits] == 0) bits--; 603 s->bl_count[bits]--; /* move one leaf down the tree */ 604 s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */ 605 s->bl_count[max_length]--; 606 /* The brother of the overflow item also moves one step up, 607 * but this does not affect bl_count[max_length] 608 */ 609 overflow -= 2; 610 } while (overflow > 0); 611 612 /* Now recompute all bit lengths, scanning in increasing frequency. 613 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all 614 * lengths instead of fixing only the wrong ones. This idea is taken 615 * from 'ar' written by Haruhiko Okumura.) 616 */ 617 for (bits = max_length; bits != 0; bits--) { 618 n = s->bl_count[bits]; 619 while (n != 0) { 620 m = s->heap[--h]; 621 if (m > max_code) continue; 622 if ((unsigned) tree[m].Len != (unsigned) bits) { 623 Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); 624 s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; 625 tree[m].Len = (ush)bits; 626 } 627 n--; 628 } 629 } 630} 631 632#ifdef DUMP_BL_TREE 633# include <stdio.h> 634#endif 635 636/* =========================================================================== 637 * Construct one Huffman tree and assigns the code bit strings and lengths. 638 * Update the total bit length for the current block. 639 * IN assertion: the field freq is set for all tree elements. 640 * OUT assertions: the fields len and code are set to the optimal bit length 641 * and corresponding code. The length opt_len is updated; static_len is 642 * also updated if stree is not null. The field max_code is set. 643 */ 644local void build_tree(deflate_state *s, tree_desc *desc) 645{ 646 ct_data *tree = desc->dyn_tree; 647 const ct_data *stree = desc->stat_desc->static_tree; 648 int elems = desc->stat_desc->elems; 649 int n, m; /* iterate over heap elements */ 650 int max_code = -1; /* largest code with non zero frequency */ 651 int node; /* new node being created */ 652 653 /* Construct the initial heap, with least frequent element in 654 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1]. 655 * heap[0] is not used. 656 */ 657 s->heap_len = 0, s->heap_max = HEAP_SIZE; 658 659 for (n = 0; n < elems; n++) { 660 if (tree[n].Freq != 0) { 661 s->heap[++(s->heap_len)] = max_code = n; 662 s->depth[n] = 0; 663 } else { 664 tree[n].Len = 0; 665 } 666 } 667 668 /* The pkzip format requires that at least one distance code exists, 669 * and that at least one bit should be sent even if there is only one 670 * possible code. So to avoid special checks later on we force at least 671 * two codes of non zero frequency. 672 */ 673 while (s->heap_len < 2) { 674 node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); 675 tree[node].Freq = 1; 676 s->depth[node] = 0; 677 s->opt_len--; if (stree) s->static_len -= stree[node].Len; 678 /* node is 0 or 1 so it does not have extra bits */ 679 } 680 desc->max_code = max_code; 681 682 /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree, 683 * establish sub-heaps of increasing lengths: 684 */ 685 for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); 686 687 /* Construct the Huffman tree by repeatedly combining the least two 688 * frequent nodes. 689 */ 690 node = elems; /* next internal node of the tree */ 691 do { 692 pqremove(s, tree, n); /* n = node of least frequency */ 693 m = s->heap[SMALLEST]; /* m = node of next least frequency */ 694 695 s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ 696 s->heap[--(s->heap_max)] = m; 697 698 /* Create a new node father of n and m */ 699 tree[node].Freq = tree[n].Freq + tree[m].Freq; 700 s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? 701 s->depth[n] : s->depth[m]) + 1); 702 tree[n].Dad = tree[m].Dad = (ush)node; 703#ifdef DUMP_BL_TREE 704 if (tree == s->bl_tree) { 705 fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", 706 node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); 707 } 708#endif 709 /* and insert the new node in the heap */ 710 s->heap[SMALLEST] = node++; 711 pqdownheap(s, tree, SMALLEST); 712 713 } while (s->heap_len >= 2); 714 715 s->heap[--(s->heap_max)] = s->heap[SMALLEST]; 716 717 /* At this point, the fields freq and dad are set. We can now 718 * generate the bit lengths. 719 */ 720 gen_bitlen(s, (tree_desc *)desc); 721 722 /* The field len is now set, we can generate the bit codes */ 723 gen_codes ((ct_data *)tree, max_code, s->bl_count); 724} 725 726/* =========================================================================== 727 * Scan a literal or distance tree to determine the frequencies of the codes 728 * in the bit length tree. 729 */ 730local void scan_tree(deflate_state *s, ct_data *tree, int max_code) 731{ 732 int n; /* iterates over all tree elements */ 733 int prevlen = -1; /* last emitted length */ 734 int curlen; /* length of current code */ 735 int nextlen = tree[0].Len; /* length of next code */ 736 int count = 0; /* repeat count of the current code */ 737 int max_count = 7; /* max repeat count */ 738 int min_count = 4; /* min repeat count */ 739 740 if (nextlen == 0) max_count = 138, min_count = 3; 741 tree[max_code + 1].Len = (ush)0xffff; /* guard */ 742 743 for (n = 0; n <= max_code; n++) { 744 curlen = nextlen; nextlen = tree[n + 1].Len; 745 if (++count < max_count && curlen == nextlen) { 746 continue; 747 } else if (count < min_count) { 748 s->bl_tree[curlen].Freq += count; 749 } else if (curlen != 0) { 750 if (curlen != prevlen) s->bl_tree[curlen].Freq++; 751 s->bl_tree[REP_3_6].Freq++; 752 } else if (count <= 10) { 753 s->bl_tree[REPZ_3_10].Freq++; 754 } else { 755 s->bl_tree[REPZ_11_138].Freq++; 756 } 757 count = 0; prevlen = curlen; 758 if (nextlen == 0) { 759 max_count = 138, min_count = 3; 760 } else if (curlen == nextlen) { 761 max_count = 6, min_count = 3; 762 } else { 763 max_count = 7, min_count = 4; 764 } 765 } 766} 767 768/* =========================================================================== 769 * Send a literal or distance tree in compressed form, using the codes in 770 * bl_tree. 771 */ 772local void send_tree(deflate_state *s, ct_data *tree, int max_code) 773{ 774 int n; /* iterates over all tree elements */ 775 int prevlen = -1; /* last emitted length */ 776 int curlen; /* length of current code */ 777 int nextlen = tree[0].Len; /* length of next code */ 778 int count = 0; /* repeat count of the current code */ 779 int max_count = 7; /* max repeat count */ 780 int min_count = 4; /* min repeat count */ 781 782 /* tree[max_code + 1].Len = -1; */ /* guard already set */ 783 if (nextlen == 0) max_count = 138, min_count = 3; 784 785 for (n = 0; n <= max_code; n++) { 786 curlen = nextlen; nextlen = tree[n + 1].Len; 787 if (++count < max_count && curlen == nextlen) { 788 continue; 789 } else if (count < min_count) { 790 do { send_code(s, curlen, s->bl_tree); } while (--count != 0); 791 792 } else if (curlen != 0) { 793 if (curlen != prevlen) { 794 send_code(s, curlen, s->bl_tree); count--; 795 } 796 Assert(count >= 3 && count <= 6, " 3_6?"); 797 send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2); 798 799 } else if (count <= 10) { 800 send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3); 801 802 } else { 803 send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7); 804 } 805 count = 0; prevlen = curlen; 806 if (nextlen == 0) { 807 max_count = 138, min_count = 3; 808 } else if (curlen == nextlen) { 809 max_count = 6, min_count = 3; 810 } else { 811 max_count = 7, min_count = 4; 812 } 813 } 814} 815 816/* =========================================================================== 817 * Construct the Huffman tree for the bit lengths and return the index in 818 * bl_order of the last bit length code to send. 819 */ 820local int build_bl_tree(deflate_state *s) 821{ 822 int max_blindex; /* index of last bit length code of non zero freq */ 823 824 /* Determine the bit length frequencies for literal and distance trees */ 825 scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); 826 scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); 827 828 /* Build the bit length tree: */ 829 build_tree(s, (tree_desc *)(&(s->bl_desc))); 830 /* opt_len now includes the length of the tree representations, except the 831 * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts. 832 */ 833 834 /* Determine the number of bit length codes to send. The pkzip format 835 * requires that at least 4 bit length codes be sent. (appnote.txt says 836 * 3 but the actual value used is 4.) 837 */ 838 for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { 839 if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; 840 } 841 /* Update opt_len to include the bit length tree and counts */ 842 s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4; 843 Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", 844 s->opt_len, s->static_len)); 845 846 return max_blindex; 847} 848 849/* =========================================================================== 850 * Send the header for a block using dynamic Huffman trees: the counts, the 851 * lengths of the bit length codes, the literal tree and the distance tree. 852 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. 853 */ 854local void send_all_trees(deflate_state *s, int lcodes, int dcodes, 855 int blcodes) 856{ 857 int rank; /* index in bl_order */ 858 859 Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); 860 Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, 861 "too many codes"); 862 Tracev((stderr, "\nbl counts: ")); 863 send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */ 864 send_bits(s, dcodes - 1, 5); 865 send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */ 866 for (rank = 0; rank < blcodes; rank++) { 867 Tracev((stderr, "\nbl code %2d ", bl_order[rank])); 868 send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); 869 } 870 Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); 871 872 send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */ 873 Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); 874 875 send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */ 876 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); 877} 878 879/* =========================================================================== 880 * Send a stored block 881 */ 882void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, 883 ulg stored_len, int last) { 884 send_bits(s, (STORED_BLOCK<<1) + last, 3); /* send block type */ 885 bi_windup(s); /* align on byte boundary */ 886 put_short(s, (ush)stored_len); 887 put_short(s, (ush)~stored_len); 888 if (stored_len) 889 zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); 890 s->pending += stored_len; 891#ifdef ZLIB_DEBUG 892 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; 893 s->compressed_len += (stored_len + 4) << 3; 894 s->bits_sent += 2*16; 895 s->bits_sent += stored_len << 3; 896#endif 897} 898 899/* =========================================================================== 900 * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) 901 */ 902void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) { 903 bi_flush(s); 904} 905 906/* =========================================================================== 907 * Send one empty static block to give enough lookahead for inflate. 908 * This takes 10 bits, of which 7 may remain in the bit buffer. 909 */ 910void ZLIB_INTERNAL _tr_align(deflate_state *s) { 911 send_bits(s, STATIC_TREES<<1, 3); 912 send_code(s, END_BLOCK, static_ltree); 913#ifdef ZLIB_DEBUG 914 s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ 915#endif 916 bi_flush(s); 917} 918 919/* =========================================================================== 920 * Send the block data compressed using the given Huffman trees 921 */ 922local void compress_block(deflate_state *s, const ct_data *ltree, 923 const ct_data *dtree) 924{ 925 unsigned dist; /* distance of matched string */ 926 int lc; /* match length or unmatched char (if dist == 0) */ 927 unsigned sx = 0; /* running index in symbol buffers */ 928 unsigned code; /* the code to send */ 929 int extra; /* number of extra bits to send */ 930 931 if (s->sym_next != 0) do { 932#ifdef LIT_MEM 933 dist = s->d_buf[sx]; 934 lc = s->l_buf[sx++]; 935#else 936 dist = s->sym_buf[sx++] & 0xff; 937 dist += (unsigned)(s->sym_buf[sx++] & 0xff) << INDEX_8; 938 lc = s->sym_buf[sx++]; 939#endif 940 if (dist == 0) { 941 send_code(s, lc, ltree); /* send a literal byte */ 942 Tracecv(isgraph(lc), (stderr," '%c' ", lc)); 943 } else { 944 /* Here, lc is the match length - MIN_MATCH */ 945 code = _length_code[lc]; 946 send_code(s, code + LITERALS + 1, ltree); /* send length code */ 947 extra = extra_lbits[code]; 948 if (extra != 0) { 949 lc -= base_length[code]; 950 send_bits(s, lc, extra); /* send the extra length bits */ 951 } 952 dist--; /* dist is now the match distance - 1 */ 953 code = d_code(dist); 954 Assert (code < D_CODES, "bad d_code"); 955 956 send_code(s, code, dtree); /* send the distance code */ 957 extra = extra_dbits[code]; 958 if (extra != 0) { 959 dist -= (unsigned)base_dist[code]; 960 send_bits(s, dist, extra); /* send the extra distance bits */ 961 } 962 } /* literal or match pair ? */ 963 964 /* Check for no overlay of pending_buf on needed symbols */ 965#ifdef LIT_MEM 966 Assert(s->pending < INDEX_2 * (s->lit_bufsize + sx), "pendingBuf overflow"); 967#else 968 Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); 969#endif 970 971 } while (sx < s->sym_next); 972 973 send_code(s, END_BLOCK, ltree); 974} 975 976/* =========================================================================== 977 * Check if the data type is TEXT or BINARY, using the following algorithm: 978 * - TEXT if the two conditions below are satisfied: 979 * a) There are no non-portable control characters belonging to the 980 * "block list" (0..6, 14..25, 28..31). 981 * b) There is at least one printable character belonging to the 982 * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). 983 * - BINARY otherwise. 984 * - The following partially-portable control characters form a 985 * "gray list" that is ignored in this detection algorithm: 986 * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). 987 * IN assertion: the fields Freq of dyn_ltree are set. 988 */ 989local int detect_data_type(deflate_state *s) 990{ 991 /* block_mask is the bit mask of block-listed bytes 992 * set bits 0..6, 14..25, and 28..31 993 * 0xf3ffc07f = binary 11110011111111111100000001111111 994 */ 995 unsigned long block_mask = 0xf3ffc07fUL; 996 int n; 997 998 /* Check for non-textual ("block-listed") bytes. */ 999 for (n = 0; n <= INDEX_31; n++, block_mask >>= 1) 1000 if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) 1001 return Z_BINARY; 1002 1003 /* Check for textual ("allow-listed") bytes. */ 1004 if (s->dyn_ltree[INDEX_9].Freq != 0 || s->dyn_ltree[10].Freq != 0 1005 || s->dyn_ltree[INDEX_13].Freq != 0) 1006 return Z_TEXT; 1007 for (n = INDEX_32; n < LITERALS; n++) 1008 if (s->dyn_ltree[n].Freq != 0) 1009 return Z_TEXT; 1010 1011 /* There are no "block-listed" or "allow-listed" bytes: 1012 * this stream either is empty or has tolerated ("gray-listed") bytes only. 1013 */ 1014 return Z_BINARY; 1015} 1016 1017/* =========================================================================== 1018 * Determine the best encoding for the current block: dynamic trees, static 1019 * trees or store, and write out the encoded block. 1020 */ 1021void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, 1022 ulg stored_len, int last) { 1023 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ 1024 int max_blindex = 0; /* index of last bit length code of non zero freq */ 1025 1026 /* Build the Huffman trees unless a stored block is forced */ 1027 if (s->level > 0) { 1028 1029 /* Check if the file is binary or text */ 1030 if (s->strm->data_type == Z_UNKNOWN) 1031 s->strm->data_type = detect_data_type(s); 1032 1033 /* Construct the literal and distance trees */ 1034 build_tree(s, (tree_desc *)(&(s->l_desc))); 1035 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, 1036 s->static_len)); 1037 1038 build_tree(s, (tree_desc *)(&(s->d_desc))); 1039 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, 1040 s->static_len)); 1041 /* At this point, opt_len and static_len are the total bit lengths of 1042 * the compressed block data, excluding the tree representations. 1043 */ 1044 1045 /* Build the bit length tree for the above two trees, and get the index 1046 * in bl_order of the last bit length code to send. 1047 */ 1048 max_blindex = build_bl_tree(s); 1049 1050 /* Determine the best encoding. Compute the block lengths in bytes. */ 1051 opt_lenb = (s->opt_len + 3 + 7) >> 3; 1052 static_lenb = (s->static_len + 3 + 7) >> 3; 1053 1054 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", 1055 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, 1056 s->sym_next / 3)); 1057 1058#ifndef FORCE_STATIC 1059 if (static_lenb <= opt_lenb || s->strategy == Z_FIXED) 1060#endif 1061 opt_lenb = static_lenb; 1062 1063 } else { 1064 Assert(buf != (char*)0, "lost buf"); 1065 opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ 1066 } 1067 1068#ifdef FORCE_STORED 1069 if (buf != (char*)0) { /* force stored block */ 1070#else 1071 if (stored_len + 4 <= opt_lenb && buf != (char*)0) { 1072 /* 4: two words for the lengths */ 1073#endif 1074 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. 1075 * Otherwise we can't have processed more than WSIZE input bytes since 1076 * the last block flush, because compression would have been 1077 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to 1078 * transform a block into a stored block. 1079 */ 1080 _tr_stored_block(s, buf, stored_len, last); 1081 1082 } else if (static_lenb == opt_lenb) { 1083 send_bits(s, (STATIC_TREES<<1) + last, 3); 1084 compress_block(s, (const ct_data *)static_ltree, 1085 (const ct_data *)static_dtree); 1086#ifdef ZLIB_DEBUG 1087 s->compressed_len += 3 + s->static_len; 1088#endif 1089 } else { 1090 send_bits(s, (DYN_TREES<<1) + last, 3); 1091 send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1, 1092 max_blindex + 1); 1093 compress_block(s, (const ct_data *)s->dyn_ltree, 1094 (const ct_data *)s->dyn_dtree); 1095#ifdef ZLIB_DEBUG 1096 s->compressed_len += 3 + s->opt_len; 1097#endif 1098 } 1099 Assert (s->compressed_len == s->bits_sent, "bad compressed size"); 1100 /* The above check is made mod 2^32, for files larger than 512 MB 1101 * and uLong implemented on 32 bits. 1102 */ 1103 init_block(s); 1104 1105 if (last) { 1106 bi_windup(s); 1107#ifdef ZLIB_DEBUG 1108 s->compressed_len += 7; /* align on byte boundary */ 1109#endif 1110 } 1111 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3, 1112 s->compressed_len - 7*last)); 1113} 1114 1115/* =========================================================================== 1116 * Save the match info and tally the frequency counts. Return true if 1117 * the current block must be flushed. 1118 */ 1119int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { 1120#ifdef LIT_MEM 1121 s->d_buf[s->sym_next] = (ush)dist; 1122 s->l_buf[s->sym_next++] = (uch)lc; 1123#else 1124 s->sym_buf[s->sym_next++] = (uch)dist; 1125 s->sym_buf[s->sym_next++] = (uch)(dist >> 8); 1126 s->sym_buf[s->sym_next++] = (uch)lc; 1127#endif 1128 if (dist == 0) { 1129 /* lc is the unmatched char */ 1130 s->dyn_ltree[lc].Freq++; 1131 } else { 1132 s->matches++; 1133 /* Here, lc is the match length - MIN_MATCH */ 1134 dist--; /* dist = match distance - 1 */ 1135 Assert((ush)dist < (ush)MAX_DIST(s) && 1136 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && 1137 (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); 1138 1139 s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++; 1140 s->dyn_dtree[d_code(dist)].Freq++; 1141 } 1142 return (s->sym_next == s->sym_end); 1143} 1144