1275793eaSopenharmony_ci/* inftree9.c -- generate Huffman trees for efficient decoding
2275793eaSopenharmony_ci * Copyright (C) 1995-2024 Mark Adler
3275793eaSopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h
4275793eaSopenharmony_ci */
5275793eaSopenharmony_ci
6275793eaSopenharmony_ci#include "zutil.h"
7275793eaSopenharmony_ci#include "inftree9.h"
8275793eaSopenharmony_ci
9275793eaSopenharmony_ci#define MAXBITS 15
10275793eaSopenharmony_ci
11275793eaSopenharmony_ciconst char inflate9_copyright[] =
12275793eaSopenharmony_ci   " inflate9 1.3.1 Copyright 1995-2024 Mark Adler ";
13275793eaSopenharmony_ci/*
14275793eaSopenharmony_ci  If you use the zlib library in a product, an acknowledgment is welcome
15275793eaSopenharmony_ci  in the documentation of your product. If for some reason you cannot
16275793eaSopenharmony_ci  include such an acknowledgment, I would appreciate that you keep this
17275793eaSopenharmony_ci  copyright string in the executable of your product.
18275793eaSopenharmony_ci */
19275793eaSopenharmony_ci
20275793eaSopenharmony_ci/*
21275793eaSopenharmony_ci   Build a set of tables to decode the provided canonical Huffman code.
22275793eaSopenharmony_ci   The code lengths are lens[0..codes-1].  The result starts at *table,
23275793eaSopenharmony_ci   whose indices are 0..2^bits-1.  work is a writable array of at least
24275793eaSopenharmony_ci   lens shorts, which is used as a work area.  type is the type of code
25275793eaSopenharmony_ci   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26275793eaSopenharmony_ci   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27275793eaSopenharmony_ci   on return points to the next available entry's address.  bits is the
28275793eaSopenharmony_ci   requested root table index bits, and on return it is the actual root
29275793eaSopenharmony_ci   table index bits.  It will differ if the request is greater than the
30275793eaSopenharmony_ci   longest code or if it is less than the shortest code.
31275793eaSopenharmony_ci */
32275793eaSopenharmony_ciint inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes,
33275793eaSopenharmony_ci                   code FAR * FAR *table, unsigned FAR *bits,
34275793eaSopenharmony_ci                   unsigned short FAR *work) {
35275793eaSopenharmony_ci    unsigned len;               /* a code's length in bits */
36275793eaSopenharmony_ci    unsigned sym;               /* index of code symbols */
37275793eaSopenharmony_ci    unsigned min, max;          /* minimum and maximum code lengths */
38275793eaSopenharmony_ci    unsigned root;              /* number of index bits for root table */
39275793eaSopenharmony_ci    unsigned curr;              /* number of index bits for current table */
40275793eaSopenharmony_ci    unsigned drop;              /* code bits to drop for sub-table */
41275793eaSopenharmony_ci    int left;                   /* number of prefix codes available */
42275793eaSopenharmony_ci    unsigned used;              /* code entries in table used */
43275793eaSopenharmony_ci    unsigned huff;              /* Huffman code */
44275793eaSopenharmony_ci    unsigned incr;              /* for incrementing code, index */
45275793eaSopenharmony_ci    unsigned fill;              /* index for replicating entries */
46275793eaSopenharmony_ci    unsigned low;               /* low bits for current root entry */
47275793eaSopenharmony_ci    unsigned mask;              /* mask for low root bits */
48275793eaSopenharmony_ci    code this;                  /* table entry for duplication */
49275793eaSopenharmony_ci    code FAR *next;             /* next available space in table */
50275793eaSopenharmony_ci    const unsigned short FAR *base;     /* base value table to use */
51275793eaSopenharmony_ci    const unsigned short FAR *extra;    /* extra bits table to use */
52275793eaSopenharmony_ci    int end;                    /* use base and extra for symbol > end */
53275793eaSopenharmony_ci    unsigned short count[MAXBITS+1];    /* number of codes of each length */
54275793eaSopenharmony_ci    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
55275793eaSopenharmony_ci    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
56275793eaSopenharmony_ci        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17,
57275793eaSopenharmony_ci        19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
58275793eaSopenharmony_ci        131, 163, 195, 227, 3, 0, 0};
59275793eaSopenharmony_ci    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
60275793eaSopenharmony_ci        128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
61275793eaSopenharmony_ci        130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132,
62275793eaSopenharmony_ci        133, 133, 133, 133, 144, 203, 77};
63275793eaSopenharmony_ci    static const unsigned short dbase[32] = { /* Distance codes 0..31 base */
64275793eaSopenharmony_ci        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49,
65275793eaSopenharmony_ci        65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073,
66275793eaSopenharmony_ci        4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153};
67275793eaSopenharmony_ci    static const unsigned short dext[32] = { /* Distance codes 0..31 extra */
68275793eaSopenharmony_ci        128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132,
69275793eaSopenharmony_ci        133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138,
70275793eaSopenharmony_ci        139, 139, 140, 140, 141, 141, 142, 142};
71275793eaSopenharmony_ci
72275793eaSopenharmony_ci    /*
73275793eaSopenharmony_ci       Process a set of code lengths to create a canonical Huffman code.  The
74275793eaSopenharmony_ci       code lengths are lens[0..codes-1].  Each length corresponds to the
75275793eaSopenharmony_ci       symbols 0..codes-1.  The Huffman code is generated by first sorting the
76275793eaSopenharmony_ci       symbols by length from short to long, and retaining the symbol order
77275793eaSopenharmony_ci       for codes with equal lengths.  Then the code starts with all zero bits
78275793eaSopenharmony_ci       for the first code of the shortest length, and the codes are integer
79275793eaSopenharmony_ci       increments for the same length, and zeros are appended as the length
80275793eaSopenharmony_ci       increases.  For the deflate format, these bits are stored backwards
81275793eaSopenharmony_ci       from their more natural integer increment ordering, and so when the
82275793eaSopenharmony_ci       decoding tables are built in the large loop below, the integer codes
83275793eaSopenharmony_ci       are incremented backwards.
84275793eaSopenharmony_ci
85275793eaSopenharmony_ci       This routine assumes, but does not check, that all of the entries in
86275793eaSopenharmony_ci       lens[] are in the range 0..MAXBITS.  The caller must assure this.
87275793eaSopenharmony_ci       1..MAXBITS is interpreted as that code length.  zero means that that
88275793eaSopenharmony_ci       symbol does not occur in this code.
89275793eaSopenharmony_ci
90275793eaSopenharmony_ci       The codes are sorted by computing a count of codes for each length,
91275793eaSopenharmony_ci       creating from that a table of starting indices for each length in the
92275793eaSopenharmony_ci       sorted table, and then entering the symbols in order in the sorted
93275793eaSopenharmony_ci       table.  The sorted table is work[], with that space being provided by
94275793eaSopenharmony_ci       the caller.
95275793eaSopenharmony_ci
96275793eaSopenharmony_ci       The length counts are used for other purposes as well, i.e. finding
97275793eaSopenharmony_ci       the minimum and maximum length codes, determining if there are any
98275793eaSopenharmony_ci       codes at all, checking for a valid set of lengths, and looking ahead
99275793eaSopenharmony_ci       at length counts to determine sub-table sizes when building the
100275793eaSopenharmony_ci       decoding tables.
101275793eaSopenharmony_ci     */
102275793eaSopenharmony_ci
103275793eaSopenharmony_ci    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
104275793eaSopenharmony_ci    for (len = 0; len <= MAXBITS; len++)
105275793eaSopenharmony_ci        count[len] = 0;
106275793eaSopenharmony_ci    for (sym = 0; sym < codes; sym++)
107275793eaSopenharmony_ci        count[lens[sym]]++;
108275793eaSopenharmony_ci
109275793eaSopenharmony_ci    /* bound code lengths, force root to be within code lengths */
110275793eaSopenharmony_ci    root = *bits;
111275793eaSopenharmony_ci    for (max = MAXBITS; max >= 1; max--)
112275793eaSopenharmony_ci        if (count[max] != 0) break;
113275793eaSopenharmony_ci    if (root > max) root = max;
114275793eaSopenharmony_ci    if (max == 0) return -1;            /* no codes! */
115275793eaSopenharmony_ci    for (min = 1; min <= MAXBITS; min++)
116275793eaSopenharmony_ci        if (count[min] != 0) break;
117275793eaSopenharmony_ci    if (root < min) root = min;
118275793eaSopenharmony_ci
119275793eaSopenharmony_ci    /* check for an over-subscribed or incomplete set of lengths */
120275793eaSopenharmony_ci    left = 1;
121275793eaSopenharmony_ci    for (len = 1; len <= MAXBITS; len++) {
122275793eaSopenharmony_ci        left <<= 1;
123275793eaSopenharmony_ci        left -= count[len];
124275793eaSopenharmony_ci        if (left < 0) return -1;        /* over-subscribed */
125275793eaSopenharmony_ci    }
126275793eaSopenharmony_ci    if (left > 0 && (type == CODES || max != 1))
127275793eaSopenharmony_ci        return -1;                      /* incomplete set */
128275793eaSopenharmony_ci
129275793eaSopenharmony_ci    /* generate offsets into symbol table for each length for sorting */
130275793eaSopenharmony_ci    offs[1] = 0;
131275793eaSopenharmony_ci    for (len = 1; len < MAXBITS; len++)
132275793eaSopenharmony_ci        offs[len + 1] = offs[len] + count[len];
133275793eaSopenharmony_ci
134275793eaSopenharmony_ci    /* sort symbols by length, by symbol order within each length */
135275793eaSopenharmony_ci    for (sym = 0; sym < codes; sym++)
136275793eaSopenharmony_ci        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
137275793eaSopenharmony_ci
138275793eaSopenharmony_ci    /*
139275793eaSopenharmony_ci       Create and fill in decoding tables.  In this loop, the table being
140275793eaSopenharmony_ci       filled is at next and has curr index bits.  The code being used is huff
141275793eaSopenharmony_ci       with length len.  That code is converted to an index by dropping drop
142275793eaSopenharmony_ci       bits off of the bottom.  For codes where len is less than drop + curr,
143275793eaSopenharmony_ci       those top drop + curr - len bits are incremented through all values to
144275793eaSopenharmony_ci       fill the table with replicated entries.
145275793eaSopenharmony_ci
146275793eaSopenharmony_ci       root is the number of index bits for the root table.  When len exceeds
147275793eaSopenharmony_ci       root, sub-tables are created pointed to by the root entry with an index
148275793eaSopenharmony_ci       of the low root bits of huff.  This is saved in low to check for when a
149275793eaSopenharmony_ci       new sub-table should be started.  drop is zero when the root table is
150275793eaSopenharmony_ci       being filled, and drop is root when sub-tables are being filled.
151275793eaSopenharmony_ci
152275793eaSopenharmony_ci       When a new sub-table is needed, it is necessary to look ahead in the
153275793eaSopenharmony_ci       code lengths to determine what size sub-table is needed.  The length
154275793eaSopenharmony_ci       counts are used for this, and so count[] is decremented as codes are
155275793eaSopenharmony_ci       entered in the tables.
156275793eaSopenharmony_ci
157275793eaSopenharmony_ci       used keeps track of how many table entries have been allocated from the
158275793eaSopenharmony_ci       provided *table space.  It is checked for LENS and DIST tables against
159275793eaSopenharmony_ci       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
160275793eaSopenharmony_ci       the initial root table size constants.  See the comments in inftree9.h
161275793eaSopenharmony_ci       for more information.
162275793eaSopenharmony_ci
163275793eaSopenharmony_ci       sym increments through all symbols, and the loop terminates when
164275793eaSopenharmony_ci       all codes of length max, i.e. all codes, have been processed.  This
165275793eaSopenharmony_ci       routine permits incomplete codes, so another loop after this one fills
166275793eaSopenharmony_ci       in the rest of the decoding tables with invalid code markers.
167275793eaSopenharmony_ci     */
168275793eaSopenharmony_ci
169275793eaSopenharmony_ci    /* set up for code type */
170275793eaSopenharmony_ci    switch (type) {
171275793eaSopenharmony_ci    case CODES:
172275793eaSopenharmony_ci        base = extra = work;    /* dummy value--not used */
173275793eaSopenharmony_ci        end = 19;
174275793eaSopenharmony_ci        break;
175275793eaSopenharmony_ci    case LENS:
176275793eaSopenharmony_ci        base = lbase;
177275793eaSopenharmony_ci        base -= 257;
178275793eaSopenharmony_ci        extra = lext;
179275793eaSopenharmony_ci        extra -= 257;
180275793eaSopenharmony_ci        end = 256;
181275793eaSopenharmony_ci        break;
182275793eaSopenharmony_ci    default:            /* DISTS */
183275793eaSopenharmony_ci        base = dbase;
184275793eaSopenharmony_ci        extra = dext;
185275793eaSopenharmony_ci        end = -1;
186275793eaSopenharmony_ci    }
187275793eaSopenharmony_ci
188275793eaSopenharmony_ci    /* initialize state for loop */
189275793eaSopenharmony_ci    huff = 0;                   /* starting code */
190275793eaSopenharmony_ci    sym = 0;                    /* starting code symbol */
191275793eaSopenharmony_ci    len = min;                  /* starting code length */
192275793eaSopenharmony_ci    next = *table;              /* current table to fill in */
193275793eaSopenharmony_ci    curr = root;                /* current table index bits */
194275793eaSopenharmony_ci    drop = 0;                   /* current bits to drop from code for index */
195275793eaSopenharmony_ci    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
196275793eaSopenharmony_ci    used = 1U << root;          /* use root table entries */
197275793eaSopenharmony_ci    mask = used - 1;            /* mask for comparing low */
198275793eaSopenharmony_ci
199275793eaSopenharmony_ci    /* check available table space */
200275793eaSopenharmony_ci    if ((type == LENS && used >= ENOUGH_LENS) ||
201275793eaSopenharmony_ci        (type == DISTS && used >= ENOUGH_DISTS))
202275793eaSopenharmony_ci        return 1;
203275793eaSopenharmony_ci
204275793eaSopenharmony_ci    /* process all codes and make table entries */
205275793eaSopenharmony_ci    for (;;) {
206275793eaSopenharmony_ci        /* create table entry */
207275793eaSopenharmony_ci        this.bits = (unsigned char)(len - drop);
208275793eaSopenharmony_ci        if ((int)(work[sym]) < end) {
209275793eaSopenharmony_ci            this.op = (unsigned char)0;
210275793eaSopenharmony_ci            this.val = work[sym];
211275793eaSopenharmony_ci        }
212275793eaSopenharmony_ci        else if ((int)(work[sym]) > end) {
213275793eaSopenharmony_ci            this.op = (unsigned char)(extra[work[sym]]);
214275793eaSopenharmony_ci            this.val = base[work[sym]];
215275793eaSopenharmony_ci        }
216275793eaSopenharmony_ci        else {
217275793eaSopenharmony_ci            this.op = (unsigned char)(32 + 64);         /* end of block */
218275793eaSopenharmony_ci            this.val = 0;
219275793eaSopenharmony_ci        }
220275793eaSopenharmony_ci
221275793eaSopenharmony_ci        /* replicate for those indices with low len bits equal to huff */
222275793eaSopenharmony_ci        incr = 1U << (len - drop);
223275793eaSopenharmony_ci        fill = 1U << curr;
224275793eaSopenharmony_ci        do {
225275793eaSopenharmony_ci            fill -= incr;
226275793eaSopenharmony_ci            next[(huff >> drop) + fill] = this;
227275793eaSopenharmony_ci        } while (fill != 0);
228275793eaSopenharmony_ci
229275793eaSopenharmony_ci        /* backwards increment the len-bit code huff */
230275793eaSopenharmony_ci        incr = 1U << (len - 1);
231275793eaSopenharmony_ci        while (huff & incr)
232275793eaSopenharmony_ci            incr >>= 1;
233275793eaSopenharmony_ci        if (incr != 0) {
234275793eaSopenharmony_ci            huff &= incr - 1;
235275793eaSopenharmony_ci            huff += incr;
236275793eaSopenharmony_ci        }
237275793eaSopenharmony_ci        else
238275793eaSopenharmony_ci            huff = 0;
239275793eaSopenharmony_ci
240275793eaSopenharmony_ci        /* go to next symbol, update count, len */
241275793eaSopenharmony_ci        sym++;
242275793eaSopenharmony_ci        if (--(count[len]) == 0) {
243275793eaSopenharmony_ci            if (len == max) break;
244275793eaSopenharmony_ci            len = lens[work[sym]];
245275793eaSopenharmony_ci        }
246275793eaSopenharmony_ci
247275793eaSopenharmony_ci        /* create new sub-table if needed */
248275793eaSopenharmony_ci        if (len > root && (huff & mask) != low) {
249275793eaSopenharmony_ci            /* if first time, transition to sub-tables */
250275793eaSopenharmony_ci            if (drop == 0)
251275793eaSopenharmony_ci                drop = root;
252275793eaSopenharmony_ci
253275793eaSopenharmony_ci            /* increment past last table */
254275793eaSopenharmony_ci            next += 1U << curr;
255275793eaSopenharmony_ci
256275793eaSopenharmony_ci            /* determine length of next table */
257275793eaSopenharmony_ci            curr = len - drop;
258275793eaSopenharmony_ci            left = (int)(1 << curr);
259275793eaSopenharmony_ci            while (curr + drop < max) {
260275793eaSopenharmony_ci                left -= count[curr + drop];
261275793eaSopenharmony_ci                if (left <= 0) break;
262275793eaSopenharmony_ci                curr++;
263275793eaSopenharmony_ci                left <<= 1;
264275793eaSopenharmony_ci            }
265275793eaSopenharmony_ci
266275793eaSopenharmony_ci            /* check for enough space */
267275793eaSopenharmony_ci            used += 1U << curr;
268275793eaSopenharmony_ci            if ((type == LENS && used >= ENOUGH_LENS) ||
269275793eaSopenharmony_ci                (type == DISTS && used >= ENOUGH_DISTS))
270275793eaSopenharmony_ci                return 1;
271275793eaSopenharmony_ci
272275793eaSopenharmony_ci            /* point entry in root table to sub-table */
273275793eaSopenharmony_ci            low = huff & mask;
274275793eaSopenharmony_ci            (*table)[low].op = (unsigned char)curr;
275275793eaSopenharmony_ci            (*table)[low].bits = (unsigned char)root;
276275793eaSopenharmony_ci            (*table)[low].val = (unsigned short)(next - *table);
277275793eaSopenharmony_ci        }
278275793eaSopenharmony_ci    }
279275793eaSopenharmony_ci
280275793eaSopenharmony_ci    /*
281275793eaSopenharmony_ci       Fill in rest of table for incomplete codes.  This loop is similar to the
282275793eaSopenharmony_ci       loop above in incrementing huff for table indices.  It is assumed that
283275793eaSopenharmony_ci       len is equal to curr + drop, so there is no loop needed to increment
284275793eaSopenharmony_ci       through high index bits.  When the current sub-table is filled, the loop
285275793eaSopenharmony_ci       drops back to the root table to fill in any remaining entries there.
286275793eaSopenharmony_ci     */
287275793eaSopenharmony_ci    this.op = (unsigned char)64;                /* invalid code marker */
288275793eaSopenharmony_ci    this.bits = (unsigned char)(len - drop);
289275793eaSopenharmony_ci    this.val = (unsigned short)0;
290275793eaSopenharmony_ci    while (huff != 0) {
291275793eaSopenharmony_ci        /* when done with sub-table, drop back to root table */
292275793eaSopenharmony_ci        if (drop != 0 && (huff & mask) != low) {
293275793eaSopenharmony_ci            drop = 0;
294275793eaSopenharmony_ci            len = root;
295275793eaSopenharmony_ci            next = *table;
296275793eaSopenharmony_ci            curr = root;
297275793eaSopenharmony_ci            this.bits = (unsigned char)len;
298275793eaSopenharmony_ci        }
299275793eaSopenharmony_ci
300275793eaSopenharmony_ci        /* put invalid code marker in table */
301275793eaSopenharmony_ci        next[huff >> drop] = this;
302275793eaSopenharmony_ci
303275793eaSopenharmony_ci        /* backwards increment the len-bit code huff */
304275793eaSopenharmony_ci        incr = 1U << (len - 1);
305275793eaSopenharmony_ci        while (huff & incr)
306275793eaSopenharmony_ci            incr >>= 1;
307275793eaSopenharmony_ci        if (incr != 0) {
308275793eaSopenharmony_ci            huff &= incr - 1;
309275793eaSopenharmony_ci            huff += incr;
310275793eaSopenharmony_ci        }
311275793eaSopenharmony_ci        else
312275793eaSopenharmony_ci            huff = 0;
313275793eaSopenharmony_ci    }
314275793eaSopenharmony_ci
315275793eaSopenharmony_ci    /* set return parameters */
316275793eaSopenharmony_ci    *table += used;
317275793eaSopenharmony_ci    *bits = root;
318275793eaSopenharmony_ci    return 0;
319275793eaSopenharmony_ci}
320