1cb93a386Sopenharmony_ci/* inftrees.c -- generate Huffman trees for efficient decoding
2cb93a386Sopenharmony_ci * Copyright (C) 1995-2017 Mark Adler
3cb93a386Sopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h
4cb93a386Sopenharmony_ci */
5cb93a386Sopenharmony_ci
6cb93a386Sopenharmony_ci#include "zutil.h"
7cb93a386Sopenharmony_ci#include "inftrees.h"
8cb93a386Sopenharmony_ci
9cb93a386Sopenharmony_ci#define MAXBITS 15
10cb93a386Sopenharmony_ci
11cb93a386Sopenharmony_ciconst char inflate_copyright[] =
12cb93a386Sopenharmony_ci   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
13cb93a386Sopenharmony_ci/*
14cb93a386Sopenharmony_ci  If you use the zlib library in a product, an acknowledgment is welcome
15cb93a386Sopenharmony_ci  in the documentation of your product. If for some reason you cannot
16cb93a386Sopenharmony_ci  include such an acknowledgment, I would appreciate that you keep this
17cb93a386Sopenharmony_ci  copyright string in the executable of your product.
18cb93a386Sopenharmony_ci */
19cb93a386Sopenharmony_ci
20cb93a386Sopenharmony_ci/*
21cb93a386Sopenharmony_ci   Build a set of tables to decode the provided canonical Huffman code.
22cb93a386Sopenharmony_ci   The code lengths are lens[0..codes-1].  The result starts at *table,
23cb93a386Sopenharmony_ci   whose indices are 0..2^bits-1.  work is a writable array of at least
24cb93a386Sopenharmony_ci   lens shorts, which is used as a work area.  type is the type of code
25cb93a386Sopenharmony_ci   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26cb93a386Sopenharmony_ci   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27cb93a386Sopenharmony_ci   on return points to the next available entry's address.  bits is the
28cb93a386Sopenharmony_ci   requested root table index bits, and on return it is the actual root
29cb93a386Sopenharmony_ci   table index bits.  It will differ if the request is greater than the
30cb93a386Sopenharmony_ci   longest code or if it is less than the shortest code.
31cb93a386Sopenharmony_ci */
32cb93a386Sopenharmony_ciint ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
33cb93a386Sopenharmony_cicodetype type;
34cb93a386Sopenharmony_ciunsigned short FAR *lens;
35cb93a386Sopenharmony_ciunsigned codes;
36cb93a386Sopenharmony_cicode FAR * FAR *table;
37cb93a386Sopenharmony_ciunsigned FAR *bits;
38cb93a386Sopenharmony_ciunsigned short FAR *work;
39cb93a386Sopenharmony_ci{
40cb93a386Sopenharmony_ci    unsigned len;               /* a code's length in bits */
41cb93a386Sopenharmony_ci    unsigned sym;               /* index of code symbols */
42cb93a386Sopenharmony_ci    unsigned min, max;          /* minimum and maximum code lengths */
43cb93a386Sopenharmony_ci    unsigned root;              /* number of index bits for root table */
44cb93a386Sopenharmony_ci    unsigned curr;              /* number of index bits for current table */
45cb93a386Sopenharmony_ci    unsigned drop;              /* code bits to drop for sub-table */
46cb93a386Sopenharmony_ci    int left;                   /* number of prefix codes available */
47cb93a386Sopenharmony_ci    unsigned used;              /* code entries in table used */
48cb93a386Sopenharmony_ci    unsigned huff;              /* Huffman code */
49cb93a386Sopenharmony_ci    unsigned incr;              /* for incrementing code, index */
50cb93a386Sopenharmony_ci    unsigned fill;              /* index for replicating entries */
51cb93a386Sopenharmony_ci    unsigned low;               /* low bits for current root entry */
52cb93a386Sopenharmony_ci    unsigned mask;              /* mask for low root bits */
53cb93a386Sopenharmony_ci    code here;                  /* table entry for duplication */
54cb93a386Sopenharmony_ci    code FAR *next;             /* next available space in table */
55cb93a386Sopenharmony_ci    const unsigned short FAR *base;     /* base value table to use */
56cb93a386Sopenharmony_ci    const unsigned short FAR *extra;    /* extra bits table to use */
57cb93a386Sopenharmony_ci    unsigned match;             /* use base and extra for symbol >= match */
58cb93a386Sopenharmony_ci    unsigned short count[MAXBITS+1];    /* number of codes of each length */
59cb93a386Sopenharmony_ci    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
60cb93a386Sopenharmony_ci    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
61cb93a386Sopenharmony_ci        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
62cb93a386Sopenharmony_ci        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
63cb93a386Sopenharmony_ci    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
64cb93a386Sopenharmony_ci        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
65cb93a386Sopenharmony_ci        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
66cb93a386Sopenharmony_ci    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
67cb93a386Sopenharmony_ci        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
68cb93a386Sopenharmony_ci        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
69cb93a386Sopenharmony_ci        8193, 12289, 16385, 24577, 0, 0};
70cb93a386Sopenharmony_ci    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
71cb93a386Sopenharmony_ci        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
72cb93a386Sopenharmony_ci        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
73cb93a386Sopenharmony_ci        28, 28, 29, 29, 64, 64};
74cb93a386Sopenharmony_ci
75cb93a386Sopenharmony_ci    /*
76cb93a386Sopenharmony_ci       Process a set of code lengths to create a canonical Huffman code.  The
77cb93a386Sopenharmony_ci       code lengths are lens[0..codes-1].  Each length corresponds to the
78cb93a386Sopenharmony_ci       symbols 0..codes-1.  The Huffman code is generated by first sorting the
79cb93a386Sopenharmony_ci       symbols by length from short to long, and retaining the symbol order
80cb93a386Sopenharmony_ci       for codes with equal lengths.  Then the code starts with all zero bits
81cb93a386Sopenharmony_ci       for the first code of the shortest length, and the codes are integer
82cb93a386Sopenharmony_ci       increments for the same length, and zeros are appended as the length
83cb93a386Sopenharmony_ci       increases.  For the deflate format, these bits are stored backwards
84cb93a386Sopenharmony_ci       from their more natural integer increment ordering, and so when the
85cb93a386Sopenharmony_ci       decoding tables are built in the large loop below, the integer codes
86cb93a386Sopenharmony_ci       are incremented backwards.
87cb93a386Sopenharmony_ci
88cb93a386Sopenharmony_ci       This routine assumes, but does not check, that all of the entries in
89cb93a386Sopenharmony_ci       lens[] are in the range 0..MAXBITS.  The caller must assure this.
90cb93a386Sopenharmony_ci       1..MAXBITS is interpreted as that code length.  zero means that that
91cb93a386Sopenharmony_ci       symbol does not occur in this code.
92cb93a386Sopenharmony_ci
93cb93a386Sopenharmony_ci       The codes are sorted by computing a count of codes for each length,
94cb93a386Sopenharmony_ci       creating from that a table of starting indices for each length in the
95cb93a386Sopenharmony_ci       sorted table, and then entering the symbols in order in the sorted
96cb93a386Sopenharmony_ci       table.  The sorted table is work[], with that space being provided by
97cb93a386Sopenharmony_ci       the caller.
98cb93a386Sopenharmony_ci
99cb93a386Sopenharmony_ci       The length counts are used for other purposes as well, i.e. finding
100cb93a386Sopenharmony_ci       the minimum and maximum length codes, determining if there are any
101cb93a386Sopenharmony_ci       codes at all, checking for a valid set of lengths, and looking ahead
102cb93a386Sopenharmony_ci       at length counts to determine sub-table sizes when building the
103cb93a386Sopenharmony_ci       decoding tables.
104cb93a386Sopenharmony_ci     */
105cb93a386Sopenharmony_ci
106cb93a386Sopenharmony_ci    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
107cb93a386Sopenharmony_ci    for (len = 0; len <= MAXBITS; len++)
108cb93a386Sopenharmony_ci        count[len] = 0;
109cb93a386Sopenharmony_ci    for (sym = 0; sym < codes; sym++)
110cb93a386Sopenharmony_ci        count[lens[sym]]++;
111cb93a386Sopenharmony_ci
112cb93a386Sopenharmony_ci    /* bound code lengths, force root to be within code lengths */
113cb93a386Sopenharmony_ci    root = *bits;
114cb93a386Sopenharmony_ci    for (max = MAXBITS; max >= 1; max--)
115cb93a386Sopenharmony_ci        if (count[max] != 0) break;
116cb93a386Sopenharmony_ci    if (root > max) root = max;
117cb93a386Sopenharmony_ci    if (max == 0) {                     /* no symbols to code at all */
118cb93a386Sopenharmony_ci        here.op = (unsigned char)64;    /* invalid code marker */
119cb93a386Sopenharmony_ci        here.bits = (unsigned char)1;
120cb93a386Sopenharmony_ci        here.val = (unsigned short)0;
121cb93a386Sopenharmony_ci        *(*table)++ = here;             /* make a table to force an error */
122cb93a386Sopenharmony_ci        *(*table)++ = here;
123cb93a386Sopenharmony_ci        *bits = 1;
124cb93a386Sopenharmony_ci        return 0;     /* no symbols, but wait for decoding to report error */
125cb93a386Sopenharmony_ci    }
126cb93a386Sopenharmony_ci    for (min = 1; min < max; min++)
127cb93a386Sopenharmony_ci        if (count[min] != 0) break;
128cb93a386Sopenharmony_ci    if (root < min) root = min;
129cb93a386Sopenharmony_ci
130cb93a386Sopenharmony_ci    /* check for an over-subscribed or incomplete set of lengths */
131cb93a386Sopenharmony_ci    left = 1;
132cb93a386Sopenharmony_ci    for (len = 1; len <= MAXBITS; len++) {
133cb93a386Sopenharmony_ci        left <<= 1;
134cb93a386Sopenharmony_ci        left -= count[len];
135cb93a386Sopenharmony_ci        if (left < 0) return -1;        /* over-subscribed */
136cb93a386Sopenharmony_ci    }
137cb93a386Sopenharmony_ci    if (left > 0 && (type == CODES || max != 1))
138cb93a386Sopenharmony_ci        return -1;                      /* incomplete set */
139cb93a386Sopenharmony_ci
140cb93a386Sopenharmony_ci    /* generate offsets into symbol table for each length for sorting */
141cb93a386Sopenharmony_ci    offs[1] = 0;
142cb93a386Sopenharmony_ci    for (len = 1; len < MAXBITS; len++)
143cb93a386Sopenharmony_ci        offs[len + 1] = offs[len] + count[len];
144cb93a386Sopenharmony_ci
145cb93a386Sopenharmony_ci    /* sort symbols by length, by symbol order within each length */
146cb93a386Sopenharmony_ci    for (sym = 0; sym < codes; sym++)
147cb93a386Sopenharmony_ci        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
148cb93a386Sopenharmony_ci
149cb93a386Sopenharmony_ci    /*
150cb93a386Sopenharmony_ci       Create and fill in decoding tables.  In this loop, the table being
151cb93a386Sopenharmony_ci       filled is at next and has curr index bits.  The code being used is huff
152cb93a386Sopenharmony_ci       with length len.  That code is converted to an index by dropping drop
153cb93a386Sopenharmony_ci       bits off of the bottom.  For codes where len is less than drop + curr,
154cb93a386Sopenharmony_ci       those top drop + curr - len bits are incremented through all values to
155cb93a386Sopenharmony_ci       fill the table with replicated entries.
156cb93a386Sopenharmony_ci
157cb93a386Sopenharmony_ci       root is the number of index bits for the root table.  When len exceeds
158cb93a386Sopenharmony_ci       root, sub-tables are created pointed to by the root entry with an index
159cb93a386Sopenharmony_ci       of the low root bits of huff.  This is saved in low to check for when a
160cb93a386Sopenharmony_ci       new sub-table should be started.  drop is zero when the root table is
161cb93a386Sopenharmony_ci       being filled, and drop is root when sub-tables are being filled.
162cb93a386Sopenharmony_ci
163cb93a386Sopenharmony_ci       When a new sub-table is needed, it is necessary to look ahead in the
164cb93a386Sopenharmony_ci       code lengths to determine what size sub-table is needed.  The length
165cb93a386Sopenharmony_ci       counts are used for this, and so count[] is decremented as codes are
166cb93a386Sopenharmony_ci       entered in the tables.
167cb93a386Sopenharmony_ci
168cb93a386Sopenharmony_ci       used keeps track of how many table entries have been allocated from the
169cb93a386Sopenharmony_ci       provided *table space.  It is checked for LENS and DIST tables against
170cb93a386Sopenharmony_ci       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
171cb93a386Sopenharmony_ci       the initial root table size constants.  See the comments in inftrees.h
172cb93a386Sopenharmony_ci       for more information.
173cb93a386Sopenharmony_ci
174cb93a386Sopenharmony_ci       sym increments through all symbols, and the loop terminates when
175cb93a386Sopenharmony_ci       all codes of length max, i.e. all codes, have been processed.  This
176cb93a386Sopenharmony_ci       routine permits incomplete codes, so another loop after this one fills
177cb93a386Sopenharmony_ci       in the rest of the decoding tables with invalid code markers.
178cb93a386Sopenharmony_ci     */
179cb93a386Sopenharmony_ci
180cb93a386Sopenharmony_ci    /* set up for code type */
181cb93a386Sopenharmony_ci    switch (type) {
182cb93a386Sopenharmony_ci    case CODES:
183cb93a386Sopenharmony_ci        base = extra = work;    /* dummy value--not used */
184cb93a386Sopenharmony_ci        match = 20;
185cb93a386Sopenharmony_ci        break;
186cb93a386Sopenharmony_ci    case LENS:
187cb93a386Sopenharmony_ci        base = lbase;
188cb93a386Sopenharmony_ci        extra = lext;
189cb93a386Sopenharmony_ci        match = 257;
190cb93a386Sopenharmony_ci        break;
191cb93a386Sopenharmony_ci    default:    /* DISTS */
192cb93a386Sopenharmony_ci        base = dbase;
193cb93a386Sopenharmony_ci        extra = dext;
194cb93a386Sopenharmony_ci        match = 0;
195cb93a386Sopenharmony_ci    }
196cb93a386Sopenharmony_ci
197cb93a386Sopenharmony_ci    /* initialize state for loop */
198cb93a386Sopenharmony_ci    huff = 0;                   /* starting code */
199cb93a386Sopenharmony_ci    sym = 0;                    /* starting code symbol */
200cb93a386Sopenharmony_ci    len = min;                  /* starting code length */
201cb93a386Sopenharmony_ci    next = *table;              /* current table to fill in */
202cb93a386Sopenharmony_ci    curr = root;                /* current table index bits */
203cb93a386Sopenharmony_ci    drop = 0;                   /* current bits to drop from code for index */
204cb93a386Sopenharmony_ci    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
205cb93a386Sopenharmony_ci    used = 1U << root;          /* use root table entries */
206cb93a386Sopenharmony_ci    mask = used - 1;            /* mask for comparing low */
207cb93a386Sopenharmony_ci
208cb93a386Sopenharmony_ci    /* check available table space */
209cb93a386Sopenharmony_ci    if ((type == LENS && used > ENOUGH_LENS) ||
210cb93a386Sopenharmony_ci        (type == DISTS && used > ENOUGH_DISTS))
211cb93a386Sopenharmony_ci        return 1;
212cb93a386Sopenharmony_ci
213cb93a386Sopenharmony_ci    /* process all codes and make table entries */
214cb93a386Sopenharmony_ci    for (;;) {
215cb93a386Sopenharmony_ci        /* create table entry */
216cb93a386Sopenharmony_ci        here.bits = (unsigned char)(len - drop);
217cb93a386Sopenharmony_ci        if (work[sym] + 1U < match) {
218cb93a386Sopenharmony_ci            here.op = (unsigned char)0;
219cb93a386Sopenharmony_ci            here.val = work[sym];
220cb93a386Sopenharmony_ci        }
221cb93a386Sopenharmony_ci        else if (work[sym] >= match) {
222cb93a386Sopenharmony_ci            here.op = (unsigned char)(extra[work[sym] - match]);
223cb93a386Sopenharmony_ci            here.val = base[work[sym] - match];
224cb93a386Sopenharmony_ci        }
225cb93a386Sopenharmony_ci        else {
226cb93a386Sopenharmony_ci            here.op = (unsigned char)(32 + 64);         /* end of block */
227cb93a386Sopenharmony_ci            here.val = 0;
228cb93a386Sopenharmony_ci        }
229cb93a386Sopenharmony_ci
230cb93a386Sopenharmony_ci        /* replicate for those indices with low len bits equal to huff */
231cb93a386Sopenharmony_ci        incr = 1U << (len - drop);
232cb93a386Sopenharmony_ci        fill = 1U << curr;
233cb93a386Sopenharmony_ci        min = fill;                 /* save offset to next table */
234cb93a386Sopenharmony_ci        do {
235cb93a386Sopenharmony_ci            fill -= incr;
236cb93a386Sopenharmony_ci            next[(huff >> drop) + fill] = here;
237cb93a386Sopenharmony_ci        } while (fill != 0);
238cb93a386Sopenharmony_ci
239cb93a386Sopenharmony_ci        /* backwards increment the len-bit code huff */
240cb93a386Sopenharmony_ci        incr = 1U << (len - 1);
241cb93a386Sopenharmony_ci        while (huff & incr)
242cb93a386Sopenharmony_ci            incr >>= 1;
243cb93a386Sopenharmony_ci        if (incr != 0) {
244cb93a386Sopenharmony_ci            huff &= incr - 1;
245cb93a386Sopenharmony_ci            huff += incr;
246cb93a386Sopenharmony_ci        }
247cb93a386Sopenharmony_ci        else
248cb93a386Sopenharmony_ci            huff = 0;
249cb93a386Sopenharmony_ci
250cb93a386Sopenharmony_ci        /* go to next symbol, update count, len */
251cb93a386Sopenharmony_ci        sym++;
252cb93a386Sopenharmony_ci        if (--(count[len]) == 0) {
253cb93a386Sopenharmony_ci            if (len == max) break;
254cb93a386Sopenharmony_ci            len = lens[work[sym]];
255cb93a386Sopenharmony_ci        }
256cb93a386Sopenharmony_ci
257cb93a386Sopenharmony_ci        /* create new sub-table if needed */
258cb93a386Sopenharmony_ci        if (len > root && (huff & mask) != low) {
259cb93a386Sopenharmony_ci            /* if first time, transition to sub-tables */
260cb93a386Sopenharmony_ci            if (drop == 0)
261cb93a386Sopenharmony_ci                drop = root;
262cb93a386Sopenharmony_ci
263cb93a386Sopenharmony_ci            /* increment past last table */
264cb93a386Sopenharmony_ci            next += min;            /* here min is 1 << curr */
265cb93a386Sopenharmony_ci
266cb93a386Sopenharmony_ci            /* determine length of next table */
267cb93a386Sopenharmony_ci            curr = len - drop;
268cb93a386Sopenharmony_ci            left = (int)(1 << curr);
269cb93a386Sopenharmony_ci            while (curr + drop < max) {
270cb93a386Sopenharmony_ci                left -= count[curr + drop];
271cb93a386Sopenharmony_ci                if (left <= 0) break;
272cb93a386Sopenharmony_ci                curr++;
273cb93a386Sopenharmony_ci                left <<= 1;
274cb93a386Sopenharmony_ci            }
275cb93a386Sopenharmony_ci
276cb93a386Sopenharmony_ci            /* check for enough space */
277cb93a386Sopenharmony_ci            used += 1U << curr;
278cb93a386Sopenharmony_ci            if ((type == LENS && used > ENOUGH_LENS) ||
279cb93a386Sopenharmony_ci                (type == DISTS && used > ENOUGH_DISTS))
280cb93a386Sopenharmony_ci                return 1;
281cb93a386Sopenharmony_ci
282cb93a386Sopenharmony_ci            /* point entry in root table to sub-table */
283cb93a386Sopenharmony_ci            low = huff & mask;
284cb93a386Sopenharmony_ci            (*table)[low].op = (unsigned char)curr;
285cb93a386Sopenharmony_ci            (*table)[low].bits = (unsigned char)root;
286cb93a386Sopenharmony_ci            (*table)[low].val = (unsigned short)(next - *table);
287cb93a386Sopenharmony_ci        }
288cb93a386Sopenharmony_ci    }
289cb93a386Sopenharmony_ci
290cb93a386Sopenharmony_ci    /* fill in remaining table entry if code is incomplete (guaranteed to have
291cb93a386Sopenharmony_ci       at most one remaining entry, since if the code is incomplete, the
292cb93a386Sopenharmony_ci       maximum code length that was allowed to get this far is one bit) */
293cb93a386Sopenharmony_ci    if (huff != 0) {
294cb93a386Sopenharmony_ci        here.op = (unsigned char)64;            /* invalid code marker */
295cb93a386Sopenharmony_ci        here.bits = (unsigned char)(len - drop);
296cb93a386Sopenharmony_ci        here.val = (unsigned short)0;
297cb93a386Sopenharmony_ci        next[huff] = here;
298cb93a386Sopenharmony_ci    }
299cb93a386Sopenharmony_ci
300cb93a386Sopenharmony_ci    /* set return parameters */
301cb93a386Sopenharmony_ci    *table += used;
302cb93a386Sopenharmony_ci    *bits = root;
303cb93a386Sopenharmony_ci    return 0;
304cb93a386Sopenharmony_ci}
305