1275793eaSopenharmony_ci/* inftrees.c -- generate Huffman trees for efficient decoding
2275793eaSopenharmony_ci * Copyright (C) 1995-2024 Mark Adler
3275793eaSopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h
4275793eaSopenharmony_ci */
5275793eaSopenharmony_ci
6275793eaSopenharmony_ci#include "zutil.h"
7275793eaSopenharmony_ci#include "inftrees.h"
8275793eaSopenharmony_ci
9275793eaSopenharmony_ci#define MAXBITS 15
10275793eaSopenharmony_ci
11275793eaSopenharmony_ciconst char inflate_copyright[] =
12275793eaSopenharmony_ci   " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
13275793eaSopenharmony_ci/*
14275793eaSopenharmony_ci  If you use the zlib library in a product, an acknowledgment is welcome
15275793eaSopenharmony_ci  in the documentation of your product. If for some reason you cannot
16275793eaSopenharmony_ci  include such an acknowledgment, I would appreciate that you keep this
17275793eaSopenharmony_ci  copyright string in the executable of your product.
18275793eaSopenharmony_ci */
19275793eaSopenharmony_ci
20275793eaSopenharmony_ci/*
21275793eaSopenharmony_ci   Build a set of tables to decode the provided canonical Huffman code.
22275793eaSopenharmony_ci   The code lengths are lens[0..codes-1].  The result starts at *table,
23275793eaSopenharmony_ci   whose indices are 0..2^bits-1.  work is a writable array of at least
24275793eaSopenharmony_ci   lens shorts, which is used as a work area.  type is the type of code
25275793eaSopenharmony_ci   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26275793eaSopenharmony_ci   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27275793eaSopenharmony_ci   on return points to the next available entry's address.  bits is the
28275793eaSopenharmony_ci   requested root table index bits, and on return it is the actual root
29275793eaSopenharmony_ci   table index bits.  It will differ if the request is greater than the
30275793eaSopenharmony_ci   longest code or if it is less than the shortest code.
31275793eaSopenharmony_ci */
32275793eaSopenharmony_ciint ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
33275793eaSopenharmony_ci                                unsigned codes, code FAR * FAR *table,
34275793eaSopenharmony_ci                                unsigned FAR *bits, unsigned short FAR *work) {
35275793eaSopenharmony_ci    unsigned len;               /* a code's length in bits */
36275793eaSopenharmony_ci    unsigned sym;               /* index of code symbols */
37275793eaSopenharmony_ci    unsigned min, max;          /* minimum and maximum code lengths */
38275793eaSopenharmony_ci    unsigned root;              /* number of index bits for root table */
39275793eaSopenharmony_ci    unsigned curr;              /* number of index bits for current table */
40275793eaSopenharmony_ci    unsigned drop;              /* code bits to drop for sub-table */
41275793eaSopenharmony_ci    int left;                   /* number of prefix codes available */
42275793eaSopenharmony_ci    unsigned used;              /* code entries in table used */
43275793eaSopenharmony_ci    unsigned huff;              /* Huffman code */
44275793eaSopenharmony_ci    unsigned incr;              /* for incrementing code, index */
45275793eaSopenharmony_ci    unsigned fill;              /* index for replicating entries */
46275793eaSopenharmony_ci    unsigned low;               /* low bits for current root entry */
47275793eaSopenharmony_ci    unsigned mask;              /* mask for low root bits */
48275793eaSopenharmony_ci    code here;                  /* table entry for duplication */
49275793eaSopenharmony_ci    code FAR *next;             /* next available space in table */
50275793eaSopenharmony_ci    const unsigned short FAR *base;     /* base value table to use */
51275793eaSopenharmony_ci    const unsigned short FAR *extra;    /* extra bits table to use */
52275793eaSopenharmony_ci    unsigned match;             /* use base and extra for symbol >= match */
53275793eaSopenharmony_ci    unsigned short count[MAXBITS+1];    /* number of codes of each length */
54275793eaSopenharmony_ci    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
55275793eaSopenharmony_ci    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
56275793eaSopenharmony_ci        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
57275793eaSopenharmony_ci        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
58275793eaSopenharmony_ci    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
59275793eaSopenharmony_ci        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
60275793eaSopenharmony_ci        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
61275793eaSopenharmony_ci    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
62275793eaSopenharmony_ci        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
63275793eaSopenharmony_ci        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
64275793eaSopenharmony_ci        8193, 12289, 16385, 24577, 0, 0};
65275793eaSopenharmony_ci    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
66275793eaSopenharmony_ci        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
67275793eaSopenharmony_ci        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
68275793eaSopenharmony_ci        28, 28, 29, 29, 64, 64};
69275793eaSopenharmony_ci
70275793eaSopenharmony_ci    /*
71275793eaSopenharmony_ci       Process a set of code lengths to create a canonical Huffman code.  The
72275793eaSopenharmony_ci       code lengths are lens[0..codes-1].  Each length corresponds to the
73275793eaSopenharmony_ci       symbols 0..codes-1.  The Huffman code is generated by first sorting the
74275793eaSopenharmony_ci       symbols by length from short to long, and retaining the symbol order
75275793eaSopenharmony_ci       for codes with equal lengths.  Then the code starts with all zero bits
76275793eaSopenharmony_ci       for the first code of the shortest length, and the codes are integer
77275793eaSopenharmony_ci       increments for the same length, and zeros are appended as the length
78275793eaSopenharmony_ci       increases.  For the deflate format, these bits are stored backwards
79275793eaSopenharmony_ci       from their more natural integer increment ordering, and so when the
80275793eaSopenharmony_ci       decoding tables are built in the large loop below, the integer codes
81275793eaSopenharmony_ci       are incremented backwards.
82275793eaSopenharmony_ci
83275793eaSopenharmony_ci       This routine assumes, but does not check, that all of the entries in
84275793eaSopenharmony_ci       lens[] are in the range 0..MAXBITS.  The caller must assure this.
85275793eaSopenharmony_ci       1..MAXBITS is interpreted as that code length.  zero means that that
86275793eaSopenharmony_ci       symbol does not occur in this code.
87275793eaSopenharmony_ci
88275793eaSopenharmony_ci       The codes are sorted by computing a count of codes for each length,
89275793eaSopenharmony_ci       creating from that a table of starting indices for each length in the
90275793eaSopenharmony_ci       sorted table, and then entering the symbols in order in the sorted
91275793eaSopenharmony_ci       table.  The sorted table is work[], with that space being provided by
92275793eaSopenharmony_ci       the caller.
93275793eaSopenharmony_ci
94275793eaSopenharmony_ci       The length counts are used for other purposes as well, i.e. finding
95275793eaSopenharmony_ci       the minimum and maximum length codes, determining if there are any
96275793eaSopenharmony_ci       codes at all, checking for a valid set of lengths, and looking ahead
97275793eaSopenharmony_ci       at length counts to determine sub-table sizes when building the
98275793eaSopenharmony_ci       decoding tables.
99275793eaSopenharmony_ci     */
100275793eaSopenharmony_ci
101275793eaSopenharmony_ci    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
102275793eaSopenharmony_ci    for (len = 0; len <= MAXBITS; len++)
103275793eaSopenharmony_ci        count[len] = 0;
104275793eaSopenharmony_ci    for (sym = 0; sym < codes; sym++)
105275793eaSopenharmony_ci        count[lens[sym]]++;
106275793eaSopenharmony_ci
107275793eaSopenharmony_ci    /* bound code lengths, force root to be within code lengths */
108275793eaSopenharmony_ci    root = *bits;
109275793eaSopenharmony_ci    for (max = MAXBITS; max >= 1; max--)
110275793eaSopenharmony_ci        if (count[max] != 0) break;
111275793eaSopenharmony_ci    if (root > max) root = max;
112275793eaSopenharmony_ci    if (max == 0) {                     /* no symbols to code at all */
113275793eaSopenharmony_ci        here.op = (unsigned char)64;    /* invalid code marker */
114275793eaSopenharmony_ci        here.bits = (unsigned char)1;
115275793eaSopenharmony_ci        here.val = (unsigned short)0;
116275793eaSopenharmony_ci        *(*table)++ = here;             /* make a table to force an error */
117275793eaSopenharmony_ci        *(*table)++ = here;
118275793eaSopenharmony_ci        *bits = 1;
119275793eaSopenharmony_ci        return 0;     /* no symbols, but wait for decoding to report error */
120275793eaSopenharmony_ci    }
121275793eaSopenharmony_ci    for (min = 1; min < max; min++)
122275793eaSopenharmony_ci        if (count[min] != 0) break;
123275793eaSopenharmony_ci    if (root < min) root = min;
124275793eaSopenharmony_ci
125275793eaSopenharmony_ci    /* check for an over-subscribed or incomplete set of lengths */
126275793eaSopenharmony_ci    left = 1;
127275793eaSopenharmony_ci    for (len = 1; len <= MAXBITS; len++) {
128275793eaSopenharmony_ci        left <<= 1;
129275793eaSopenharmony_ci        left -= count[len];
130275793eaSopenharmony_ci        if (left < 0) return -1;        /* over-subscribed */
131275793eaSopenharmony_ci    }
132275793eaSopenharmony_ci    if (left > 0 && (type == CODES || max != 1))
133275793eaSopenharmony_ci        return -1;                      /* incomplete set */
134275793eaSopenharmony_ci
135275793eaSopenharmony_ci    /* generate offsets into symbol table for each length for sorting */
136275793eaSopenharmony_ci    offs[1] = 0;
137275793eaSopenharmony_ci    for (len = 1; len < MAXBITS; len++)
138275793eaSopenharmony_ci        offs[len + 1] = offs[len] + count[len];
139275793eaSopenharmony_ci
140275793eaSopenharmony_ci    /* sort symbols by length, by symbol order within each length */
141275793eaSopenharmony_ci    for (sym = 0; sym < codes; sym++)
142275793eaSopenharmony_ci        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
143275793eaSopenharmony_ci
144275793eaSopenharmony_ci    /*
145275793eaSopenharmony_ci       Create and fill in decoding tables.  In this loop, the table being
146275793eaSopenharmony_ci       filled is at next and has curr index bits.  The code being used is huff
147275793eaSopenharmony_ci       with length len.  That code is converted to an index by dropping drop
148275793eaSopenharmony_ci       bits off of the bottom.  For codes where len is less than drop + curr,
149275793eaSopenharmony_ci       those top drop + curr - len bits are incremented through all values to
150275793eaSopenharmony_ci       fill the table with replicated entries.
151275793eaSopenharmony_ci
152275793eaSopenharmony_ci       root is the number of index bits for the root table.  When len exceeds
153275793eaSopenharmony_ci       root, sub-tables are created pointed to by the root entry with an index
154275793eaSopenharmony_ci       of the low root bits of huff.  This is saved in low to check for when a
155275793eaSopenharmony_ci       new sub-table should be started.  drop is zero when the root table is
156275793eaSopenharmony_ci       being filled, and drop is root when sub-tables are being filled.
157275793eaSopenharmony_ci
158275793eaSopenharmony_ci       When a new sub-table is needed, it is necessary to look ahead in the
159275793eaSopenharmony_ci       code lengths to determine what size sub-table is needed.  The length
160275793eaSopenharmony_ci       counts are used for this, and so count[] is decremented as codes are
161275793eaSopenharmony_ci       entered in the tables.
162275793eaSopenharmony_ci
163275793eaSopenharmony_ci       used keeps track of how many table entries have been allocated from the
164275793eaSopenharmony_ci       provided *table space.  It is checked for LENS and DIST tables against
165275793eaSopenharmony_ci       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
166275793eaSopenharmony_ci       the initial root table size constants.  See the comments in inftrees.h
167275793eaSopenharmony_ci       for more information.
168275793eaSopenharmony_ci
169275793eaSopenharmony_ci       sym increments through all symbols, and the loop terminates when
170275793eaSopenharmony_ci       all codes of length max, i.e. all codes, have been processed.  This
171275793eaSopenharmony_ci       routine permits incomplete codes, so another loop after this one fills
172275793eaSopenharmony_ci       in the rest of the decoding tables with invalid code markers.
173275793eaSopenharmony_ci     */
174275793eaSopenharmony_ci
175275793eaSopenharmony_ci    /* set up for code type */
176275793eaSopenharmony_ci    switch (type) {
177275793eaSopenharmony_ci    case CODES:
178275793eaSopenharmony_ci        base = extra = work;    /* dummy value--not used */
179275793eaSopenharmony_ci        match = 20;
180275793eaSopenharmony_ci        break;
181275793eaSopenharmony_ci    case LENS:
182275793eaSopenharmony_ci        base = lbase;
183275793eaSopenharmony_ci        extra = lext;
184275793eaSopenharmony_ci        match = 257;
185275793eaSopenharmony_ci        break;
186275793eaSopenharmony_ci    default:    /* DISTS */
187275793eaSopenharmony_ci        base = dbase;
188275793eaSopenharmony_ci        extra = dext;
189275793eaSopenharmony_ci        match = 0;
190275793eaSopenharmony_ci    }
191275793eaSopenharmony_ci
192275793eaSopenharmony_ci    /* initialize state for loop */
193275793eaSopenharmony_ci    huff = 0;                   /* starting code */
194275793eaSopenharmony_ci    sym = 0;                    /* starting code symbol */
195275793eaSopenharmony_ci    len = min;                  /* starting code length */
196275793eaSopenharmony_ci    next = *table;              /* current table to fill in */
197275793eaSopenharmony_ci    curr = root;                /* current table index bits */
198275793eaSopenharmony_ci    drop = 0;                   /* current bits to drop from code for index */
199275793eaSopenharmony_ci    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
200275793eaSopenharmony_ci    used = 1U << root;          /* use root table entries */
201275793eaSopenharmony_ci    mask = used - 1;            /* mask for comparing low */
202275793eaSopenharmony_ci
203275793eaSopenharmony_ci    /* check available table space */
204275793eaSopenharmony_ci    if ((type == LENS && used > ENOUGH_LENS) ||
205275793eaSopenharmony_ci        (type == DISTS && used > ENOUGH_DISTS))
206275793eaSopenharmony_ci        return 1;
207275793eaSopenharmony_ci
208275793eaSopenharmony_ci    /* process all codes and make table entries */
209275793eaSopenharmony_ci    for (;;) {
210275793eaSopenharmony_ci        /* create table entry */
211275793eaSopenharmony_ci        here.bits = (unsigned char)(len - drop);
212275793eaSopenharmony_ci        if (work[sym] + 1U < match) {
213275793eaSopenharmony_ci            here.op = (unsigned char)0;
214275793eaSopenharmony_ci            here.val = work[sym];
215275793eaSopenharmony_ci        }
216275793eaSopenharmony_ci        else if (work[sym] >= match) {
217275793eaSopenharmony_ci            here.op = (unsigned char)(extra[work[sym] - match]);
218275793eaSopenharmony_ci            here.val = base[work[sym] - match];
219275793eaSopenharmony_ci        }
220275793eaSopenharmony_ci        else {
221275793eaSopenharmony_ci            here.op = (unsigned char)(32 + 64);         /* end of block */
222275793eaSopenharmony_ci            here.val = 0;
223275793eaSopenharmony_ci        }
224275793eaSopenharmony_ci
225275793eaSopenharmony_ci        /* replicate for those indices with low len bits equal to huff */
226275793eaSopenharmony_ci        incr = 1U << (len - drop);
227275793eaSopenharmony_ci        fill = 1U << curr;
228275793eaSopenharmony_ci        min = fill;                 /* save offset to next table */
229275793eaSopenharmony_ci        do {
230275793eaSopenharmony_ci            fill -= incr;
231275793eaSopenharmony_ci            next[(huff >> drop) + fill] = here;
232275793eaSopenharmony_ci        } while (fill != 0);
233275793eaSopenharmony_ci
234275793eaSopenharmony_ci        /* backwards increment the len-bit code huff */
235275793eaSopenharmony_ci        incr = 1U << (len - 1);
236275793eaSopenharmony_ci        while (huff & incr)
237275793eaSopenharmony_ci            incr >>= 1;
238275793eaSopenharmony_ci        if (incr != 0) {
239275793eaSopenharmony_ci            huff &= incr - 1;
240275793eaSopenharmony_ci            huff += incr;
241275793eaSopenharmony_ci        }
242275793eaSopenharmony_ci        else
243275793eaSopenharmony_ci            huff = 0;
244275793eaSopenharmony_ci
245275793eaSopenharmony_ci        /* go to next symbol, update count, len */
246275793eaSopenharmony_ci        sym++;
247275793eaSopenharmony_ci        if (--(count[len]) == 0) {
248275793eaSopenharmony_ci            if (len == max) break;
249275793eaSopenharmony_ci            len = lens[work[sym]];
250275793eaSopenharmony_ci        }
251275793eaSopenharmony_ci
252275793eaSopenharmony_ci        /* create new sub-table if needed */
253275793eaSopenharmony_ci        if (len > root && (huff & mask) != low) {
254275793eaSopenharmony_ci            /* if first time, transition to sub-tables */
255275793eaSopenharmony_ci            if (drop == 0)
256275793eaSopenharmony_ci                drop = root;
257275793eaSopenharmony_ci
258275793eaSopenharmony_ci            /* increment past last table */
259275793eaSopenharmony_ci            next += min;            /* here min is 1 << curr */
260275793eaSopenharmony_ci
261275793eaSopenharmony_ci            /* determine length of next table */
262275793eaSopenharmony_ci            curr = len - drop;
263275793eaSopenharmony_ci            left = (int)(1 << curr);
264275793eaSopenharmony_ci            while (curr + drop < max) {
265275793eaSopenharmony_ci                left -= count[curr + drop];
266275793eaSopenharmony_ci                if (left <= 0) break;
267275793eaSopenharmony_ci                curr++;
268275793eaSopenharmony_ci                left <<= 1;
269275793eaSopenharmony_ci            }
270275793eaSopenharmony_ci
271275793eaSopenharmony_ci            /* check for enough space */
272275793eaSopenharmony_ci            used += 1U << curr;
273275793eaSopenharmony_ci            if ((type == LENS && used > ENOUGH_LENS) ||
274275793eaSopenharmony_ci                (type == DISTS && used > ENOUGH_DISTS))
275275793eaSopenharmony_ci                return 1;
276275793eaSopenharmony_ci
277275793eaSopenharmony_ci            /* point entry in root table to sub-table */
278275793eaSopenharmony_ci            low = huff & mask;
279275793eaSopenharmony_ci            (*table)[low].op = (unsigned char)curr;
280275793eaSopenharmony_ci            (*table)[low].bits = (unsigned char)root;
281275793eaSopenharmony_ci            (*table)[low].val = (unsigned short)(next - *table);
282275793eaSopenharmony_ci        }
283275793eaSopenharmony_ci    }
284275793eaSopenharmony_ci
285275793eaSopenharmony_ci    /* fill in remaining table entry if code is incomplete (guaranteed to have
286275793eaSopenharmony_ci       at most one remaining entry, since if the code is incomplete, the
287275793eaSopenharmony_ci       maximum code length that was allowed to get this far is one bit) */
288275793eaSopenharmony_ci    if (huff != 0) {
289275793eaSopenharmony_ci        here.op = (unsigned char)64;            /* invalid code marker */
290275793eaSopenharmony_ci        here.bits = (unsigned char)(len - drop);
291275793eaSopenharmony_ci        here.val = (unsigned short)0;
292275793eaSopenharmony_ci        next[huff] = here;
293275793eaSopenharmony_ci    }
294275793eaSopenharmony_ci
295275793eaSopenharmony_ci    /* set return parameters */
296275793eaSopenharmony_ci    *table += used;
297275793eaSopenharmony_ci    *bits = root;
298275793eaSopenharmony_ci    return 0;
299275793eaSopenharmony_ci}
300