1275793eaSopenharmony_ci/* enough.c -- determine the maximum size of inflate's Huffman code tables over 2275793eaSopenharmony_ci * all possible valid and complete prefix codes, subject to a length limit. 3275793eaSopenharmony_ci * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler 4275793eaSopenharmony_ci * Version 1.5 5 August 2018 Mark Adler 5275793eaSopenharmony_ci */ 6275793eaSopenharmony_ci 7275793eaSopenharmony_ci/* Version history: 8275793eaSopenharmony_ci 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) 9275793eaSopenharmony_ci 1.1 4 Jan 2007 Use faster incremental table usage computation 10275793eaSopenharmony_ci Prune examine() search on previously visited states 11275793eaSopenharmony_ci 1.2 5 Jan 2007 Comments clean up 12275793eaSopenharmony_ci As inflate does, decrease root for short codes 13275793eaSopenharmony_ci Refuse cases where inflate would increase root 14275793eaSopenharmony_ci 1.3 17 Feb 2008 Add argument for initial root table size 15275793eaSopenharmony_ci Fix bug for initial root table size == max - 1 16275793eaSopenharmony_ci Use a macro to compute the history index 17275793eaSopenharmony_ci 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) 18275793eaSopenharmony_ci Clean up comparisons of different types 19275793eaSopenharmony_ci Clean up code indentation 20275793eaSopenharmony_ci 1.5 5 Aug 2018 Clean up code style, formatting, and comments 21275793eaSopenharmony_ci Show all the codes for the maximum, and only the maximum 22275793eaSopenharmony_ci */ 23275793eaSopenharmony_ci 24275793eaSopenharmony_ci/* 25275793eaSopenharmony_ci Examine all possible prefix codes for a given number of symbols and a 26275793eaSopenharmony_ci maximum code length in bits to determine the maximum table size for zlib's 27275793eaSopenharmony_ci inflate. Only complete prefix codes are counted. 28275793eaSopenharmony_ci 29275793eaSopenharmony_ci Two codes are considered distinct if the vectors of the number of codes per 30275793eaSopenharmony_ci length are not identical. So permutations of the symbol assignments result 31275793eaSopenharmony_ci in the same code for the counting, as do permutations of the assignments of 32275793eaSopenharmony_ci the bit values to the codes (i.e. only canonical codes are counted). 33275793eaSopenharmony_ci 34275793eaSopenharmony_ci We build a code from shorter to longer lengths, determining how many symbols 35275793eaSopenharmony_ci are coded at each length. At each step, we have how many symbols remain to 36275793eaSopenharmony_ci be coded, what the last code length used was, and how many bit patterns of 37275793eaSopenharmony_ci that length remain unused. Then we add one to the code length and double the 38275793eaSopenharmony_ci number of unused patterns to graduate to the next code length. We then 39275793eaSopenharmony_ci assign all portions of the remaining symbols to that code length that 40275793eaSopenharmony_ci preserve the properties of a correct and eventually complete code. Those 41275793eaSopenharmony_ci properties are: we cannot use more bit patterns than are available; and when 42275793eaSopenharmony_ci all the symbols are used, there are exactly zero possible bit patterns left 43275793eaSopenharmony_ci unused. 44275793eaSopenharmony_ci 45275793eaSopenharmony_ci The inflate Huffman decoding algorithm uses two-level lookup tables for 46275793eaSopenharmony_ci speed. There is a single first-level table to decode codes up to root bits 47275793eaSopenharmony_ci in length (root == 9 for literal/length codes and root == 6 for distance 48275793eaSopenharmony_ci codes, in the current inflate implementation). The base table has 1 << root 49275793eaSopenharmony_ci entries and is indexed by the next root bits of input. Codes shorter than 50275793eaSopenharmony_ci root bits have replicated table entries, so that the correct entry is 51275793eaSopenharmony_ci pointed to regardless of the bits that follow the short code. If the code is 52275793eaSopenharmony_ci longer than root bits, then the table entry points to a second-level table. 53275793eaSopenharmony_ci The size of that table is determined by the longest code with that root-bit 54275793eaSopenharmony_ci prefix. If that longest code has length len, then the table has size 1 << 55275793eaSopenharmony_ci (len - root), to index the remaining bits in that set of codes. Each 56275793eaSopenharmony_ci subsequent root-bit prefix then has its own sub-table. The total number of 57275793eaSopenharmony_ci table entries required by the code is calculated incrementally as the number 58275793eaSopenharmony_ci of codes at each bit length is populated. When all of the codes are shorter 59275793eaSopenharmony_ci than root bits, then root is reduced to the longest code length, resulting 60275793eaSopenharmony_ci in a single, smaller, one-level table. 61275793eaSopenharmony_ci 62275793eaSopenharmony_ci The inflate algorithm also provides for small values of root (relative to 63275793eaSopenharmony_ci the log2 of the number of symbols), where the shortest code has more bits 64275793eaSopenharmony_ci than root. In that case, root is increased to the length of the shortest 65275793eaSopenharmony_ci code. This program, by design, does not handle that case, so it is verified 66275793eaSopenharmony_ci that the number of symbols is less than 1 << (root + 1). 67275793eaSopenharmony_ci 68275793eaSopenharmony_ci In order to speed up the examination (by about ten orders of magnitude for 69275793eaSopenharmony_ci the default arguments), the intermediate states in the build-up of a code 70275793eaSopenharmony_ci are remembered and previously visited branches are pruned. The memory 71275793eaSopenharmony_ci required for this will increase rapidly with the total number of symbols and 72275793eaSopenharmony_ci the maximum code length in bits. However this is a very small price to pay 73275793eaSopenharmony_ci for the vast speedup. 74275793eaSopenharmony_ci 75275793eaSopenharmony_ci First, all of the possible prefix codes are counted, and reachable 76275793eaSopenharmony_ci intermediate states are noted by a non-zero count in a saved-results array. 77275793eaSopenharmony_ci Second, the intermediate states that lead to (root + 1) bit or longer codes 78275793eaSopenharmony_ci are used to look at all sub-codes from those junctures for their inflate 79275793eaSopenharmony_ci memory usage. (The amount of memory used is not affected by the number of 80275793eaSopenharmony_ci codes of root bits or less in length.) Third, the visited states in the 81275793eaSopenharmony_ci construction of those sub-codes and the associated calculation of the table 82275793eaSopenharmony_ci size is recalled in order to avoid recalculating from the same juncture. 83275793eaSopenharmony_ci Beginning the code examination at (root + 1) bit codes, which is enabled by 84275793eaSopenharmony_ci identifying the reachable nodes, accounts for about six of the orders of 85275793eaSopenharmony_ci magnitude of improvement for the default arguments. About another four 86275793eaSopenharmony_ci orders of magnitude come from not revisiting previous states. Out of 87275793eaSopenharmony_ci approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes 88275793eaSopenharmony_ci need to be examined to cover all of the possible table memory usage cases 89275793eaSopenharmony_ci for the default arguments of 286 symbols limited to 15-bit codes. 90275793eaSopenharmony_ci 91275793eaSopenharmony_ci Note that the uintmax_t type is used for counting. It is quite easy to 92275793eaSopenharmony_ci exceed the capacity of an eight-byte integer with a large number of symbols 93275793eaSopenharmony_ci and a large maximum code length, so multiple-precision arithmetic would need 94275793eaSopenharmony_ci to replace the integer arithmetic in that case. This program will abort if 95275793eaSopenharmony_ci an overflow occurs. The big_t type identifies where the counting takes 96275793eaSopenharmony_ci place. 97275793eaSopenharmony_ci 98275793eaSopenharmony_ci The uintmax_t type is also used for calculating the number of possible codes 99275793eaSopenharmony_ci remaining at the maximum length. This limits the maximum code length to the 100275793eaSopenharmony_ci number of bits in a long long minus the number of bits needed to represent 101275793eaSopenharmony_ci the symbols in a flat code. The code_t type identifies where the bit-pattern 102275793eaSopenharmony_ci counting takes place. 103275793eaSopenharmony_ci */ 104275793eaSopenharmony_ci 105275793eaSopenharmony_ci#include <stdio.h> 106275793eaSopenharmony_ci#include <stdlib.h> 107275793eaSopenharmony_ci#include <string.h> 108275793eaSopenharmony_ci#include <stdarg.h> 109275793eaSopenharmony_ci#include <stdint.h> 110275793eaSopenharmony_ci#include <assert.h> 111275793eaSopenharmony_ci 112275793eaSopenharmony_ci#define local static 113275793eaSopenharmony_ci 114275793eaSopenharmony_ci// Special data types. 115275793eaSopenharmony_citypedef uintmax_t big_t; // type for code counting 116275793eaSopenharmony_ci#define PRIbig "ju" // printf format for big_t 117275793eaSopenharmony_citypedef uintmax_t code_t; // type for bit pattern counting 118275793eaSopenharmony_cistruct tab { // type for been-here check 119275793eaSopenharmony_ci size_t len; // allocated length of bit vector in octets 120275793eaSopenharmony_ci char *vec; // allocated bit vector 121275793eaSopenharmony_ci}; 122275793eaSopenharmony_ci 123275793eaSopenharmony_ci/* The array for saving results, num[], is indexed with this triplet: 124275793eaSopenharmony_ci 125275793eaSopenharmony_ci syms: number of symbols remaining to code 126275793eaSopenharmony_ci left: number of available bit patterns at length len 127275793eaSopenharmony_ci len: number of bits in the codes currently being assigned 128275793eaSopenharmony_ci 129275793eaSopenharmony_ci Those indices are constrained thusly when saving results: 130275793eaSopenharmony_ci 131275793eaSopenharmony_ci syms: 3..totsym (totsym == total symbols to code) 132275793eaSopenharmony_ci left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) 133275793eaSopenharmony_ci len: 1..max - 1 (max == maximum code length in bits) 134275793eaSopenharmony_ci 135275793eaSopenharmony_ci syms == 2 is not saved since that immediately leads to a single code. left 136275793eaSopenharmony_ci must be even, since it represents the number of available bit patterns at 137275793eaSopenharmony_ci the current length, which is double the number at the previous length. left 138275793eaSopenharmony_ci ends at syms-1 since left == syms immediately results in a single code. 139275793eaSopenharmony_ci (left > sym is not allowed since that would result in an incomplete code.) 140275793eaSopenharmony_ci len is less than max, since the code completes immediately when len == max. 141275793eaSopenharmony_ci 142275793eaSopenharmony_ci The offset into the array is calculated for the three indices with the first 143275793eaSopenharmony_ci one (syms) being outermost, and the last one (len) being innermost. We build 144275793eaSopenharmony_ci the array with length max-1 lists for the len index, with syms-3 of those 145275793eaSopenharmony_ci for each symbol. There are totsym-2 of those, with each one varying in 146275793eaSopenharmony_ci length as a function of sym. See the calculation of index in map() for the 147275793eaSopenharmony_ci index, and the calculation of size in main() for the size of the array. 148275793eaSopenharmony_ci 149275793eaSopenharmony_ci For the deflate example of 286 symbols limited to 15-bit codes, the array 150275793eaSopenharmony_ci has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half 151275793eaSopenharmony_ci of the space allocated for saved results is actually used -- not all 152275793eaSopenharmony_ci possible triplets are reached in the generation of valid prefix codes. 153275793eaSopenharmony_ci */ 154275793eaSopenharmony_ci 155275793eaSopenharmony_ci/* The array for tracking visited states, done[], is itself indexed identically 156275793eaSopenharmony_ci to the num[] array as described above for the (syms, left, len) triplet. 157275793eaSopenharmony_ci Each element in the array is further indexed by the (mem, rem) doublet, 158275793eaSopenharmony_ci where mem is the amount of inflate table space used so far, and rem is the 159275793eaSopenharmony_ci remaining unused entries in the current inflate sub-table. Each indexed 160275793eaSopenharmony_ci element is simply one bit indicating whether the state has been visited or 161275793eaSopenharmony_ci not. Since the ranges for mem and rem are not known a priori, each bit 162275793eaSopenharmony_ci vector is of a variable size, and grows as needed to accommodate the visited 163275793eaSopenharmony_ci states. mem and rem are used to calculate a single index in a triangular 164275793eaSopenharmony_ci array. Since the range of mem is expected in the default case to be about 165275793eaSopenharmony_ci ten times larger than the range of rem, the array is skewed to reduce the 166275793eaSopenharmony_ci memory usage, with eight times the range for mem than for rem. See the 167275793eaSopenharmony_ci calculations for offset and bit in been_here() for the details. 168275793eaSopenharmony_ci 169275793eaSopenharmony_ci For the deflate example of 286 symbols limited to 15-bit codes, the bit 170275793eaSopenharmony_ci vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. 171275793eaSopenharmony_ci */ 172275793eaSopenharmony_ci 173275793eaSopenharmony_ci// Type for a variable-length, allocated string. 174275793eaSopenharmony_citypedef struct { 175275793eaSopenharmony_ci char *str; // pointer to allocated string 176275793eaSopenharmony_ci size_t size; // size of allocation 177275793eaSopenharmony_ci size_t len; // length of string, not including terminating zero 178275793eaSopenharmony_ci} string_t; 179275793eaSopenharmony_ci 180275793eaSopenharmony_ci// Clear a string_t. 181275793eaSopenharmony_cilocal void string_clear(string_t *s) { 182275793eaSopenharmony_ci s->str[0] = 0; 183275793eaSopenharmony_ci s->len = 0; 184275793eaSopenharmony_ci} 185275793eaSopenharmony_ci 186275793eaSopenharmony_ci// Initialize a string_t. 187275793eaSopenharmony_cilocal void string_init(string_t *s) { 188275793eaSopenharmony_ci s->size = 16; 189275793eaSopenharmony_ci s->str = malloc(s->size); 190275793eaSopenharmony_ci assert(s->str != NULL && "out of memory"); 191275793eaSopenharmony_ci string_clear(s); 192275793eaSopenharmony_ci} 193275793eaSopenharmony_ci 194275793eaSopenharmony_ci// Release the allocation of a string_t. 195275793eaSopenharmony_cilocal void string_free(string_t *s) { 196275793eaSopenharmony_ci free(s->str); 197275793eaSopenharmony_ci s->str = NULL; 198275793eaSopenharmony_ci s->size = 0; 199275793eaSopenharmony_ci s->len = 0; 200275793eaSopenharmony_ci} 201275793eaSopenharmony_ci 202275793eaSopenharmony_ci// Save the results of printf with fmt and the subsequent argument list to s. 203275793eaSopenharmony_ci// Each call appends to s. The allocated space for s is increased as needed. 204275793eaSopenharmony_cilocal void string_printf(string_t *s, char *fmt, ...) { 205275793eaSopenharmony_ci va_list ap; 206275793eaSopenharmony_ci va_start(ap, fmt); 207275793eaSopenharmony_ci size_t len = s->len; 208275793eaSopenharmony_ci int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); 209275793eaSopenharmony_ci assert(ret >= 0 && "out of memory"); 210275793eaSopenharmony_ci s->len += ret; 211275793eaSopenharmony_ci if (s->size < s->len + 1) { 212275793eaSopenharmony_ci do { 213275793eaSopenharmony_ci s->size <<= 1; 214275793eaSopenharmony_ci assert(s->size != 0 && "overflow"); 215275793eaSopenharmony_ci } while (s->size < s->len + 1); 216275793eaSopenharmony_ci s->str = realloc(s->str, s->size); 217275793eaSopenharmony_ci assert(s->str != NULL && "out of memory"); 218275793eaSopenharmony_ci vsnprintf(s->str + len, s->size - len, fmt, ap); 219275793eaSopenharmony_ci } 220275793eaSopenharmony_ci va_end(ap); 221275793eaSopenharmony_ci} 222275793eaSopenharmony_ci 223275793eaSopenharmony_ci// Globals to avoid propagating constants or constant pointers recursively. 224275793eaSopenharmony_cistruct { 225275793eaSopenharmony_ci int max; // maximum allowed bit length for the codes 226275793eaSopenharmony_ci int root; // size of base code table in bits 227275793eaSopenharmony_ci int large; // largest code table so far 228275793eaSopenharmony_ci size_t size; // number of elements in num and done 229275793eaSopenharmony_ci big_t tot; // total number of codes with maximum tables size 230275793eaSopenharmony_ci string_t out; // display of subcodes for maximum tables size 231275793eaSopenharmony_ci int *code; // number of symbols assigned to each bit length 232275793eaSopenharmony_ci big_t *num; // saved results array for code counting 233275793eaSopenharmony_ci struct tab *done; // states already evaluated array 234275793eaSopenharmony_ci} g; 235275793eaSopenharmony_ci 236275793eaSopenharmony_ci// Index function for num[] and done[]. 237275793eaSopenharmony_cilocal inline size_t map(int syms, int left, int len) { 238275793eaSopenharmony_ci return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + 239275793eaSopenharmony_ci (left >> 1) - 1) * (g.max - 1) + 240275793eaSopenharmony_ci len - 1; 241275793eaSopenharmony_ci} 242275793eaSopenharmony_ci 243275793eaSopenharmony_ci// Free allocated space in globals. 244275793eaSopenharmony_cilocal void cleanup(void) { 245275793eaSopenharmony_ci if (g.done != NULL) { 246275793eaSopenharmony_ci for (size_t n = 0; n < g.size; n++) 247275793eaSopenharmony_ci if (g.done[n].len) 248275793eaSopenharmony_ci free(g.done[n].vec); 249275793eaSopenharmony_ci g.size = 0; 250275793eaSopenharmony_ci free(g.done); g.done = NULL; 251275793eaSopenharmony_ci } 252275793eaSopenharmony_ci free(g.num); g.num = NULL; 253275793eaSopenharmony_ci free(g.code); g.code = NULL; 254275793eaSopenharmony_ci string_free(&g.out); 255275793eaSopenharmony_ci} 256275793eaSopenharmony_ci 257275793eaSopenharmony_ci// Return the number of possible prefix codes using bit patterns of lengths len 258275793eaSopenharmony_ci// through max inclusive, coding syms symbols, with left bit patterns of length 259275793eaSopenharmony_ci// len unused -- return -1 if there is an overflow in the counting. Keep a 260275793eaSopenharmony_ci// record of previous results in num to prevent repeating the same calculation. 261275793eaSopenharmony_cilocal big_t count(int syms, int left, int len) { 262275793eaSopenharmony_ci // see if only one possible code 263275793eaSopenharmony_ci if (syms == left) 264275793eaSopenharmony_ci return 1; 265275793eaSopenharmony_ci 266275793eaSopenharmony_ci // note and verify the expected state 267275793eaSopenharmony_ci assert(syms > left && left > 0 && len < g.max); 268275793eaSopenharmony_ci 269275793eaSopenharmony_ci // see if we've done this one already 270275793eaSopenharmony_ci size_t index = map(syms, left, len); 271275793eaSopenharmony_ci big_t got = g.num[index]; 272275793eaSopenharmony_ci if (got) 273275793eaSopenharmony_ci return got; // we have -- return the saved result 274275793eaSopenharmony_ci 275275793eaSopenharmony_ci // we need to use at least this many bit patterns so that the code won't be 276275793eaSopenharmony_ci // incomplete at the next length (more bit patterns than symbols) 277275793eaSopenharmony_ci int least = (left << 1) - syms; 278275793eaSopenharmony_ci if (least < 0) 279275793eaSopenharmony_ci least = 0; 280275793eaSopenharmony_ci 281275793eaSopenharmony_ci // we can use at most this many bit patterns, lest there not be enough 282275793eaSopenharmony_ci // available for the remaining symbols at the maximum length (if there were 283275793eaSopenharmony_ci // no limit to the code length, this would become: most = left - 1) 284275793eaSopenharmony_ci int most = (((code_t)left << (g.max - len)) - syms) / 285275793eaSopenharmony_ci (((code_t)1 << (g.max - len)) - 1); 286275793eaSopenharmony_ci 287275793eaSopenharmony_ci // count all possible codes from this juncture and add them up 288275793eaSopenharmony_ci big_t sum = 0; 289275793eaSopenharmony_ci for (int use = least; use <= most; use++) { 290275793eaSopenharmony_ci got = count(syms - use, (left - use) << 1, len + 1); 291275793eaSopenharmony_ci sum += got; 292275793eaSopenharmony_ci if (got == (big_t)-1 || sum < got) // overflow 293275793eaSopenharmony_ci return (big_t)-1; 294275793eaSopenharmony_ci } 295275793eaSopenharmony_ci 296275793eaSopenharmony_ci // verify that all recursive calls are productive 297275793eaSopenharmony_ci assert(sum != 0); 298275793eaSopenharmony_ci 299275793eaSopenharmony_ci // save the result and return it 300275793eaSopenharmony_ci g.num[index] = sum; 301275793eaSopenharmony_ci return sum; 302275793eaSopenharmony_ci} 303275793eaSopenharmony_ci 304275793eaSopenharmony_ci// Return true if we've been here before, set to true if not. Set a bit in a 305275793eaSopenharmony_ci// bit vector to indicate visiting this state. Each (syms,len,left) state has a 306275793eaSopenharmony_ci// variable size bit vector indexed by (mem,rem). The bit vector is lengthened 307275793eaSopenharmony_ci// as needed to allow setting the (mem,rem) bit. 308275793eaSopenharmony_cilocal int been_here(int syms, int left, int len, int mem, int rem) { 309275793eaSopenharmony_ci // point to vector for (syms,left,len), bit in vector for (mem,rem) 310275793eaSopenharmony_ci size_t index = map(syms, left, len); 311275793eaSopenharmony_ci mem -= 1 << g.root; // mem always includes the root table 312275793eaSopenharmony_ci mem >>= 1; // mem and rem are always even 313275793eaSopenharmony_ci rem >>= 1; 314275793eaSopenharmony_ci size_t offset = (mem >> 3) + rem; 315275793eaSopenharmony_ci offset = ((offset * (offset + 1)) >> 1) + rem; 316275793eaSopenharmony_ci int bit = 1 << (mem & 7); 317275793eaSopenharmony_ci 318275793eaSopenharmony_ci // see if we've been here 319275793eaSopenharmony_ci size_t length = g.done[index].len; 320275793eaSopenharmony_ci if (offset < length && (g.done[index].vec[offset] & bit) != 0) 321275793eaSopenharmony_ci return 1; // done this! 322275793eaSopenharmony_ci 323275793eaSopenharmony_ci // we haven't been here before -- set the bit to show we have now 324275793eaSopenharmony_ci 325275793eaSopenharmony_ci // see if we need to lengthen the vector in order to set the bit 326275793eaSopenharmony_ci if (length <= offset) { 327275793eaSopenharmony_ci // if we have one already, enlarge it, zero out the appended space 328275793eaSopenharmony_ci char *vector; 329275793eaSopenharmony_ci if (length) { 330275793eaSopenharmony_ci do { 331275793eaSopenharmony_ci length <<= 1; 332275793eaSopenharmony_ci } while (length <= offset); 333275793eaSopenharmony_ci vector = realloc(g.done[index].vec, length); 334275793eaSopenharmony_ci assert(vector != NULL && "out of memory"); 335275793eaSopenharmony_ci memset(vector + g.done[index].len, 0, length - g.done[index].len); 336275793eaSopenharmony_ci } 337275793eaSopenharmony_ci 338275793eaSopenharmony_ci // otherwise we need to make a new vector and zero it out 339275793eaSopenharmony_ci else { 340275793eaSopenharmony_ci length = 16; 341275793eaSopenharmony_ci while (length <= offset) 342275793eaSopenharmony_ci length <<= 1; 343275793eaSopenharmony_ci vector = calloc(length, 1); 344275793eaSopenharmony_ci assert(vector != NULL && "out of memory"); 345275793eaSopenharmony_ci } 346275793eaSopenharmony_ci 347275793eaSopenharmony_ci // install the new vector 348275793eaSopenharmony_ci g.done[index].len = length; 349275793eaSopenharmony_ci g.done[index].vec = vector; 350275793eaSopenharmony_ci } 351275793eaSopenharmony_ci 352275793eaSopenharmony_ci // set the bit 353275793eaSopenharmony_ci g.done[index].vec[offset] |= bit; 354275793eaSopenharmony_ci return 0; 355275793eaSopenharmony_ci} 356275793eaSopenharmony_ci 357275793eaSopenharmony_ci// Examine all possible codes from the given node (syms, len, left). Compute 358275793eaSopenharmony_ci// the amount of memory required to build inflate's decoding tables, where the 359275793eaSopenharmony_ci// number of code structures used so far is mem, and the number remaining in 360275793eaSopenharmony_ci// the current sub-table is rem. 361275793eaSopenharmony_cilocal void examine(int syms, int left, int len, int mem, int rem) { 362275793eaSopenharmony_ci // see if we have a complete code 363275793eaSopenharmony_ci if (syms == left) { 364275793eaSopenharmony_ci // set the last code entry 365275793eaSopenharmony_ci g.code[len] = left; 366275793eaSopenharmony_ci 367275793eaSopenharmony_ci // complete computation of memory used by this code 368275793eaSopenharmony_ci while (rem < left) { 369275793eaSopenharmony_ci left -= rem; 370275793eaSopenharmony_ci rem = 1 << (len - g.root); 371275793eaSopenharmony_ci mem += rem; 372275793eaSopenharmony_ci } 373275793eaSopenharmony_ci assert(rem == left); 374275793eaSopenharmony_ci 375275793eaSopenharmony_ci // if this is at the maximum, show the sub-code 376275793eaSopenharmony_ci if (mem >= g.large) { 377275793eaSopenharmony_ci // if this is a new maximum, update the maximum and clear out the 378275793eaSopenharmony_ci // printed sub-codes from the previous maximum 379275793eaSopenharmony_ci if (mem > g.large) { 380275793eaSopenharmony_ci g.large = mem; 381275793eaSopenharmony_ci string_clear(&g.out); 382275793eaSopenharmony_ci } 383275793eaSopenharmony_ci 384275793eaSopenharmony_ci // compute the starting state for this sub-code 385275793eaSopenharmony_ci syms = 0; 386275793eaSopenharmony_ci left = 1 << g.max; 387275793eaSopenharmony_ci for (int bits = g.max; bits > g.root; bits--) { 388275793eaSopenharmony_ci syms += g.code[bits]; 389275793eaSopenharmony_ci left -= g.code[bits]; 390275793eaSopenharmony_ci assert((left & 1) == 0); 391275793eaSopenharmony_ci left >>= 1; 392275793eaSopenharmony_ci } 393275793eaSopenharmony_ci 394275793eaSopenharmony_ci // print the starting state and the resulting sub-code to g.out 395275793eaSopenharmony_ci string_printf(&g.out, "<%u, %u, %u>:", 396275793eaSopenharmony_ci syms, g.root + 1, ((1 << g.root) - left) << 1); 397275793eaSopenharmony_ci for (int bits = g.root + 1; bits <= g.max; bits++) 398275793eaSopenharmony_ci if (g.code[bits]) 399275793eaSopenharmony_ci string_printf(&g.out, " %d[%d]", g.code[bits], bits); 400275793eaSopenharmony_ci string_printf(&g.out, "\n"); 401275793eaSopenharmony_ci } 402275793eaSopenharmony_ci 403275793eaSopenharmony_ci // remove entries as we drop back down in the recursion 404275793eaSopenharmony_ci g.code[len] = 0; 405275793eaSopenharmony_ci return; 406275793eaSopenharmony_ci } 407275793eaSopenharmony_ci 408275793eaSopenharmony_ci // prune the tree if we can 409275793eaSopenharmony_ci if (been_here(syms, left, len, mem, rem)) 410275793eaSopenharmony_ci return; 411275793eaSopenharmony_ci 412275793eaSopenharmony_ci // we need to use at least this many bit patterns so that the code won't be 413275793eaSopenharmony_ci // incomplete at the next length (more bit patterns than symbols) 414275793eaSopenharmony_ci int least = (left << 1) - syms; 415275793eaSopenharmony_ci if (least < 0) 416275793eaSopenharmony_ci least = 0; 417275793eaSopenharmony_ci 418275793eaSopenharmony_ci // we can use at most this many bit patterns, lest there not be enough 419275793eaSopenharmony_ci // available for the remaining symbols at the maximum length (if there were 420275793eaSopenharmony_ci // no limit to the code length, this would become: most = left - 1) 421275793eaSopenharmony_ci int most = (((code_t)left << (g.max - len)) - syms) / 422275793eaSopenharmony_ci (((code_t)1 << (g.max - len)) - 1); 423275793eaSopenharmony_ci 424275793eaSopenharmony_ci // occupy least table spaces, creating new sub-tables as needed 425275793eaSopenharmony_ci int use = least; 426275793eaSopenharmony_ci while (rem < use) { 427275793eaSopenharmony_ci use -= rem; 428275793eaSopenharmony_ci rem = 1 << (len - g.root); 429275793eaSopenharmony_ci mem += rem; 430275793eaSopenharmony_ci } 431275793eaSopenharmony_ci rem -= use; 432275793eaSopenharmony_ci 433275793eaSopenharmony_ci // examine codes from here, updating table space as we go 434275793eaSopenharmony_ci for (use = least; use <= most; use++) { 435275793eaSopenharmony_ci g.code[len] = use; 436275793eaSopenharmony_ci examine(syms - use, (left - use) << 1, len + 1, 437275793eaSopenharmony_ci mem + (rem ? 1 << (len - g.root) : 0), rem << 1); 438275793eaSopenharmony_ci if (rem == 0) { 439275793eaSopenharmony_ci rem = 1 << (len - g.root); 440275793eaSopenharmony_ci mem += rem; 441275793eaSopenharmony_ci } 442275793eaSopenharmony_ci rem--; 443275793eaSopenharmony_ci } 444275793eaSopenharmony_ci 445275793eaSopenharmony_ci // remove entries as we drop back down in the recursion 446275793eaSopenharmony_ci g.code[len] = 0; 447275793eaSopenharmony_ci} 448275793eaSopenharmony_ci 449275793eaSopenharmony_ci// Look at all sub-codes starting with root + 1 bits. Look at only the valid 450275793eaSopenharmony_ci// intermediate code states (syms, left, len). For each completed code, 451275793eaSopenharmony_ci// calculate the amount of memory required by inflate to build the decoding 452275793eaSopenharmony_ci// tables. Find the maximum amount of memory required and show the codes that 453275793eaSopenharmony_ci// require that maximum. 454275793eaSopenharmony_cilocal void enough(int syms) { 455275793eaSopenharmony_ci // clear code 456275793eaSopenharmony_ci for (int n = 0; n <= g.max; n++) 457275793eaSopenharmony_ci g.code[n] = 0; 458275793eaSopenharmony_ci 459275793eaSopenharmony_ci // look at all (root + 1) bit and longer codes 460275793eaSopenharmony_ci string_clear(&g.out); // empty saved results 461275793eaSopenharmony_ci g.large = 1 << g.root; // base table 462275793eaSopenharmony_ci if (g.root < g.max) // otherwise, there's only a base table 463275793eaSopenharmony_ci for (int n = 3; n <= syms; n++) 464275793eaSopenharmony_ci for (int left = 2; left < n; left += 2) { 465275793eaSopenharmony_ci // look at all reachable (root + 1) bit nodes, and the 466275793eaSopenharmony_ci // resulting codes (complete at root + 2 or more) 467275793eaSopenharmony_ci size_t index = map(n, left, g.root + 1); 468275793eaSopenharmony_ci if (g.root + 1 < g.max && g.num[index]) // reachable node 469275793eaSopenharmony_ci examine(n, left, g.root + 1, 1 << g.root, 0); 470275793eaSopenharmony_ci 471275793eaSopenharmony_ci // also look at root bit codes with completions at root + 1 472275793eaSopenharmony_ci // bits (not saved in num, since complete), just in case 473275793eaSopenharmony_ci if (g.num[index - 1] && n <= left << 1) 474275793eaSopenharmony_ci examine((n - left) << 1, (n - left) << 1, g.root + 1, 475275793eaSopenharmony_ci 1 << g.root, 0); 476275793eaSopenharmony_ci } 477275793eaSopenharmony_ci 478275793eaSopenharmony_ci // done 479275793eaSopenharmony_ci printf("maximum of %d table entries for root = %d\n", g.large, g.root); 480275793eaSopenharmony_ci fputs(g.out.str, stdout); 481275793eaSopenharmony_ci} 482275793eaSopenharmony_ci 483275793eaSopenharmony_ci// Examine and show the total number of possible prefix codes for a given 484275793eaSopenharmony_ci// maximum number of symbols, initial root table size, and maximum code length 485275793eaSopenharmony_ci// in bits -- those are the command arguments in that order. The default values 486275793eaSopenharmony_ci// are 286, 9, and 15 respectively, for the deflate literal/length code. The 487275793eaSopenharmony_ci// possible codes are counted for each number of coded symbols from two to the 488275793eaSopenharmony_ci// maximum. The counts for each of those and the total number of codes are 489275793eaSopenharmony_ci// shown. The maximum number of inflate table entries is then calculated across 490275793eaSopenharmony_ci// all possible codes. Each new maximum number of table entries and the 491275793eaSopenharmony_ci// associated sub-code (starting at root + 1 == 10 bits) is shown. 492275793eaSopenharmony_ci// 493275793eaSopenharmony_ci// To count and examine prefix codes that are not length-limited, provide a 494275793eaSopenharmony_ci// maximum length equal to the number of symbols minus one. 495275793eaSopenharmony_ci// 496275793eaSopenharmony_ci// For the deflate literal/length code, use "enough". For the deflate distance 497275793eaSopenharmony_ci// code, use "enough 30 6". 498275793eaSopenharmony_ciint main(int argc, char **argv) { 499275793eaSopenharmony_ci // set up globals for cleanup() 500275793eaSopenharmony_ci g.code = NULL; 501275793eaSopenharmony_ci g.num = NULL; 502275793eaSopenharmony_ci g.done = NULL; 503275793eaSopenharmony_ci string_init(&g.out); 504275793eaSopenharmony_ci 505275793eaSopenharmony_ci // get arguments -- default to the deflate literal/length code 506275793eaSopenharmony_ci int syms = 286; 507275793eaSopenharmony_ci g.root = 9; 508275793eaSopenharmony_ci g.max = 15; 509275793eaSopenharmony_ci if (argc > 1) { 510275793eaSopenharmony_ci syms = atoi(argv[1]); 511275793eaSopenharmony_ci if (argc > 2) { 512275793eaSopenharmony_ci g.root = atoi(argv[2]); 513275793eaSopenharmony_ci if (argc > 3) 514275793eaSopenharmony_ci g.max = atoi(argv[3]); 515275793eaSopenharmony_ci } 516275793eaSopenharmony_ci } 517275793eaSopenharmony_ci if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) { 518275793eaSopenharmony_ci fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", 519275793eaSopenharmony_ci stderr); 520275793eaSopenharmony_ci return 1; 521275793eaSopenharmony_ci } 522275793eaSopenharmony_ci 523275793eaSopenharmony_ci // if not restricting the code length, the longest is syms - 1 524275793eaSopenharmony_ci if (g.max > syms - 1) 525275793eaSopenharmony_ci g.max = syms - 1; 526275793eaSopenharmony_ci 527275793eaSopenharmony_ci // determine the number of bits in a code_t 528275793eaSopenharmony_ci int bits = 0; 529275793eaSopenharmony_ci for (code_t word = 1; word; word <<= 1) 530275793eaSopenharmony_ci bits++; 531275793eaSopenharmony_ci 532275793eaSopenharmony_ci // make sure that the calculation of most will not overflow 533275793eaSopenharmony_ci if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { 534275793eaSopenharmony_ci fputs("abort: code length too long for internal types\n", stderr); 535275793eaSopenharmony_ci return 1; 536275793eaSopenharmony_ci } 537275793eaSopenharmony_ci 538275793eaSopenharmony_ci // reject impossible code requests 539275793eaSopenharmony_ci if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { 540275793eaSopenharmony_ci fprintf(stderr, "%d symbols cannot be coded in %d bits\n", 541275793eaSopenharmony_ci syms, g.max); 542275793eaSopenharmony_ci return 1; 543275793eaSopenharmony_ci } 544275793eaSopenharmony_ci 545275793eaSopenharmony_ci // allocate code vector 546275793eaSopenharmony_ci g.code = calloc(g.max + 1, sizeof(int)); 547275793eaSopenharmony_ci assert(g.code != NULL && "out of memory"); 548275793eaSopenharmony_ci 549275793eaSopenharmony_ci // determine size of saved results array, checking for overflows, 550275793eaSopenharmony_ci // allocate and clear the array (set all to zero with calloc()) 551275793eaSopenharmony_ci if (syms == 2) // iff max == 1 552275793eaSopenharmony_ci g.num = NULL; // won't be saving any results 553275793eaSopenharmony_ci else { 554275793eaSopenharmony_ci g.size = syms >> 1; 555275793eaSopenharmony_ci int n = (syms - 1) >> 1; 556275793eaSopenharmony_ci assert(g.size <= (size_t)-1 / n && "overflow"); 557275793eaSopenharmony_ci g.size *= n; 558275793eaSopenharmony_ci n = g.max - 1; 559275793eaSopenharmony_ci assert(g.size <= (size_t)-1 / n && "overflow"); 560275793eaSopenharmony_ci g.size *= n; 561275793eaSopenharmony_ci g.num = calloc(g.size, sizeof(big_t)); 562275793eaSopenharmony_ci assert(g.num != NULL && "out of memory"); 563275793eaSopenharmony_ci } 564275793eaSopenharmony_ci 565275793eaSopenharmony_ci // count possible codes for all numbers of symbols, add up counts 566275793eaSopenharmony_ci big_t sum = 0; 567275793eaSopenharmony_ci for (int n = 2; n <= syms; n++) { 568275793eaSopenharmony_ci big_t got = count(n, 2, 1); 569275793eaSopenharmony_ci sum += got; 570275793eaSopenharmony_ci assert(got != (big_t)-1 && sum >= got && "overflow"); 571275793eaSopenharmony_ci } 572275793eaSopenharmony_ci printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); 573275793eaSopenharmony_ci if (g.max < syms - 1) 574275793eaSopenharmony_ci printf(" (%d-bit length limit)\n", g.max); 575275793eaSopenharmony_ci else 576275793eaSopenharmony_ci puts(" (no length limit)"); 577275793eaSopenharmony_ci 578275793eaSopenharmony_ci // allocate and clear done array for been_here() 579275793eaSopenharmony_ci if (syms == 2) 580275793eaSopenharmony_ci g.done = NULL; 581275793eaSopenharmony_ci else { 582275793eaSopenharmony_ci g.done = calloc(g.size, sizeof(struct tab)); 583275793eaSopenharmony_ci assert(g.done != NULL && "out of memory"); 584275793eaSopenharmony_ci } 585275793eaSopenharmony_ci 586275793eaSopenharmony_ci // find and show maximum inflate table usage 587275793eaSopenharmony_ci if (g.root > g.max) // reduce root to max length 588275793eaSopenharmony_ci g.root = g.max; 589275793eaSopenharmony_ci if ((code_t)syms < ((code_t)1 << (g.root + 1))) 590275793eaSopenharmony_ci enough(syms); 591275793eaSopenharmony_ci else 592275793eaSopenharmony_ci fputs("cannot handle minimum code lengths > root", stderr); 593275793eaSopenharmony_ci 594275793eaSopenharmony_ci // done 595275793eaSopenharmony_ci cleanup(); 596275793eaSopenharmony_ci return 0; 597275793eaSopenharmony_ci} 598