1/* inffast_chunk.c -- fast decoding
2 * Copyright (C) 1995-2017 Mark Adler
3 * Copyright 2023 The Chromium Authors
4 * For conditions of distribution and use, see copyright notice in zlib.h
5 */
6
7#include "zutil.h"
8#include "inftrees.h"
9#include "inflate.h"
10#include "contrib/optimizations/inffast_chunk.h"
11#include "contrib/optimizations/chunkcopy.h"
12
13#ifdef ASMINF
14#  pragma message("Assembler code may have bugs -- use at your own risk")
15#else
16
17/*
18   Decode literal, length, and distance codes and write out the resulting
19   literal and match bytes until either not enough input or output is
20   available, an end-of-block is encountered, or a data error is encountered.
21   When large enough input and output buffers are supplied to inflate(), for
22   example, a 16K input buffer and a 64K output buffer, more than 95% of the
23   inflate() execution time is spent in this routine.
24
25   Entry assumptions:
26
27        state->mode == LEN
28        strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes)
29        strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes)
30        start >= strm->avail_out
31        state->bits < 8
32        (state->hold >> state->bits) == 0
33        strm->next_out[0..strm->avail_out] does not overlap with
34              strm->next_in[0..strm->avail_in]
35        strm->state->window is allocated with an additional
36              CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
37
38   On return, state->mode is one of:
39
40        LEN -- ran out of enough output space or enough available input
41        TYPE -- reached end of block code, inflate() to interpret next block
42        BAD -- error in block data
43
44   Notes:
45
46    INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes
47
48    - The maximum input bits used by a length/distance pair is 15 bits for the
49      length code, 5 bits for the length extra, 15 bits for the distance code,
50      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
51      Therefore if strm->avail_in >= 6, then there is enough input to avoid
52      checking for available input while decoding.
53
54    - The wide input data reading option reads 64 input bits at a time. Thus,
55      if strm->avail_in >= 8, then there is enough input to avoid checking for
56      available input while decoding. Reading consumes the input with:
57
58          hold |= read64le(in) << bits;
59          in += 6;
60          bits += 48;
61
62      reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
63      up, worst case) and 6 bytes is enough to decode as noted above. At exit,
64      hold &= (1U << bits) - 1 drops excess input to keep the invariant:
65
66          (state->hold >> state->bits) == 0
67
68    INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes
69
70    - The maximum bytes that a single length/distance pair can output is 258
71      bytes, which is the maximum length that can be coded.  inflate_fast()
72      requires strm->avail_out >= 260 for each loop to avoid checking for
73      available output space while decoding.
74 */
75void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) {
76    struct inflate_state FAR *state;
77    z_const unsigned char FAR *in;      /* local strm->next_in */
78    z_const unsigned char FAR *last;    /* have enough input while in < last */
79    unsigned char FAR *out;     /* local strm->next_out */
80    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
81    unsigned char FAR *end;     /* while out < end, enough space available */
82    unsigned char FAR *limit;   /* safety limit for chunky copies */
83#ifdef INFLATE_STRICT
84    unsigned dmax;              /* maximum distance from zlib header */
85#endif
86    unsigned wsize;             /* window size or zero if not using window */
87    unsigned whave;             /* valid bytes in the window */
88    unsigned wnext;             /* window write index */
89    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
90    inflate_holder_t hold;      /* local strm->hold */
91    unsigned bits;              /* local strm->bits */
92    code const FAR *lcode;      /* local strm->lencode */
93    code const FAR *dcode;      /* local strm->distcode */
94    unsigned lmask;             /* mask for first level of length codes */
95    unsigned dmask;             /* mask for first level of distance codes */
96    code const *here;           /* retrieved table entry */
97    unsigned op;                /* code bits, operation, extra bits, or */
98                                /*  window position, window bytes to copy */
99    unsigned len;               /* match length, unused bytes */
100    unsigned dist;              /* match distance */
101    unsigned char FAR *from;    /* where to copy match from */
102
103    /* copy state to local variables */
104    state = (struct inflate_state FAR *)strm->state;
105    in = strm->next_in;
106    last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
107    out = strm->next_out;
108    beg = out - (start - strm->avail_out);
109    end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
110    limit = out + strm->avail_out;
111#ifdef INFLATE_STRICT
112    dmax = state->dmax;
113#endif
114    wsize = state->wsize;
115    whave = state->whave;
116    wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
117    window = state->window;
118    hold = state->hold;
119    bits = state->bits;
120    lcode = state->lencode;
121    dcode = state->distcode;
122    lmask = (1U << state->lenbits) - 1;
123    dmask = (1U << state->distbits) - 1;
124
125#ifdef INFLATE_CHUNK_READ_64LE
126#define REFILL() do { \
127        Assert(bits < 64, "### Too many bits in inflate_fast."); \
128        hold |= read64le(in) << bits; \
129        in += 7; \
130        in -= bits >> 3; \
131        bits |= 56; \
132    } while (0)
133#endif
134
135    /* decode literals and length/distances until end-of-block or not enough
136       input data or output space */
137    do {
138#ifdef INFLATE_CHUNK_READ_64LE
139        REFILL();
140#else
141        if (bits < 15) {
142            hold += (unsigned long)(*in++) << bits;
143            bits += 8;
144            hold += (unsigned long)(*in++) << bits;
145            bits += 8;
146        }
147#endif
148        here = lcode + (hold & lmask);
149#ifdef INFLATE_CHUNK_READ_64LE
150        if (here->op == 0) {                    /* literal */
151            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
152                    "inflate:         literal '%c'\n" :
153                    "inflate:         literal 0x%02x\n", here->val));
154            *out++ = (unsigned char)(here->val);
155            hold >>= here->bits;
156            bits -= here->bits;
157            here = lcode + (hold & lmask);
158            if (here->op == 0) {                /* literal */
159                Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
160                        "inflate:    2nd  literal '%c'\n" :
161                        "inflate:    2nd  literal 0x%02x\n", here->val));
162                *out++ = (unsigned char)(here->val);
163                hold >>= here->bits;
164                bits -= here->bits;
165                here = lcode + (hold & lmask);
166            }
167        }
168#endif
169      dolen:
170        op = (unsigned)(here->bits);
171        hold >>= op;
172        bits -= op;
173        op = (unsigned)(here->op);
174        if (op == 0) {                          /* literal */
175            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
176                    "inflate:         literal '%c'\n" :
177                    "inflate:         literal 0x%02x\n", here->val));
178            *out++ = (unsigned char)(here->val);
179        }
180        else if (op & 16) {                     /* length base */
181            len = (unsigned)(here->val);
182            op &= 15;                           /* number of extra bits */
183            if (op) {
184#ifndef INFLATE_CHUNK_READ_64LE
185                if (bits < op) {
186                    hold += (unsigned long)(*in++) << bits;
187                    bits += 8;
188                }
189#endif
190                len += (unsigned)hold & ((1U << op) - 1);
191                hold >>= op;
192                bits -= op;
193            }
194            Tracevv((stderr, "inflate:         length %u\n", len));
195#ifndef INFLATE_CHUNK_READ_64LE
196            if (bits < 15) {
197                hold += (unsigned long)(*in++) << bits;
198                bits += 8;
199                hold += (unsigned long)(*in++) << bits;
200                bits += 8;
201            }
202#endif
203            here = dcode + (hold & dmask);
204          dodist:
205            op = (unsigned)(here->bits);
206            hold >>= op;
207            bits -= op;
208            op = (unsigned)(here->op);
209            if (op & 16) {                      /* distance base */
210                dist = (unsigned)(here->val);
211                op &= 15;                       /* number of extra bits */
212                /* we have two fast-path loads: 10+10 + 15+5 + 15 = 55,
213                   but we may need to refill here in the worst case */
214                if (bits < op) {
215#ifdef INFLATE_CHUNK_READ_64LE
216                    REFILL();
217#else
218                    hold += (unsigned long)(*in++) << bits;
219                    bits += 8;
220                    if (bits < op) {
221                        hold += (unsigned long)(*in++) << bits;
222                        bits += 8;
223                    }
224#endif
225                }
226                dist += (unsigned)hold & ((1U << op) - 1);
227#ifdef INFLATE_STRICT
228                if (dist > dmax) {
229                    strm->msg = (char *)"invalid distance too far back";
230                    state->mode = BAD;
231                    break;
232                }
233#endif
234                hold >>= op;
235                bits -= op;
236                Tracevv((stderr, "inflate:         distance %u\n", dist));
237                op = (unsigned)(out - beg);     /* max distance in output */
238                if (dist > op) {                /* see if copy from window */
239                    op = dist - op;             /* distance back in window */
240                    if (op > whave) {
241                        if (state->sane) {
242                            strm->msg =
243                                (char *)"invalid distance too far back";
244                            state->mode = BAD;
245                            break;
246                        }
247#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
248                        if (len <= op - whave) {
249                            do {
250                                *out++ = 0;
251                            } while (--len);
252                            continue;
253                        }
254                        len -= op - whave;
255                        do {
256                            *out++ = 0;
257                        } while (--op > whave);
258                        if (op == 0) {
259                            from = out - dist;
260                            do {
261                                *out++ = *from++;
262                            } while (--len);
263                            continue;
264                        }
265#endif
266                    }
267                    from = window;
268                    if (wnext >= op) {          /* contiguous in window */
269                        from += wnext - op;
270                    }
271                    else {                      /* wrap around window */
272                        op -= wnext;
273                        from += wsize - op;
274                        if (op < len) {         /* some from end of window */
275                            len -= op;
276                            out = chunkcopy_safe(out, from, op, limit);
277                            from = window;      /* more from start of window */
278                            op = wnext;
279                            /* This (rare) case can create a situation where
280                               the first chunkcopy below must be checked.
281                             */
282                        }
283                    }
284                    if (op < len) {             /* still need some from output */
285                        out = chunkcopy_safe(out, from, op, limit);
286                        len -= op;
287                        /* When dist is small the amount of data that can be
288                           copied from the window is also small, and progress
289                           towards the dangerous end of the output buffer is
290                           also small.  This means that for trivial memsets and
291                           for chunkunroll_relaxed() a safety check is
292                           unnecessary.  However, these conditions may not be
293                           entered at all, and in that case it's possible that
294                           the main copy is near the end.
295                          */
296                        out = chunkunroll_relaxed(out, &dist, &len);
297                        out = chunkcopy_safe_ugly(out, dist, len, limit);
298                    } else {
299                        /* from points to window, so there is no risk of
300                           overlapping pointers requiring memset-like behaviour
301                         */
302                        out = chunkcopy_safe(out, from, len, limit);
303                    }
304                }
305                else {
306                    /* Whole reference is in range of current output.  No
307                       range checks are necessary because we start with room
308                       for at least 258 bytes of output, so unroll and roundoff
309                       operations can write beyond `out+len` so long as they
310                       stay within 258 bytes of `out`.
311                     */
312                    out = chunkcopy_lapped_relaxed(out, dist, len);
313                }
314            }
315            else if ((op & 64) == 0) {          /* 2nd level distance code */
316                here = dcode + here->val + (hold & ((1U << op) - 1));
317                goto dodist;
318            }
319            else {
320                strm->msg = (char *)"invalid distance code";
321                state->mode = BAD;
322                break;
323            }
324        }
325        else if ((op & 64) == 0) {              /* 2nd level length code */
326            here = lcode + here->val + (hold & ((1U << op) - 1));
327            goto dolen;
328        }
329        else if (op & 32) {                     /* end-of-block */
330            Tracevv((stderr, "inflate:         end of block\n"));
331            state->mode = TYPE;
332            break;
333        }
334        else {
335            strm->msg = (char *)"invalid literal/length code";
336            state->mode = BAD;
337            break;
338        }
339    } while (in < last && out < end);
340
341    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
342    len = bits >> 3;
343    in -= len;
344    bits -= len << 3;
345    hold &= (1U << bits) - 1;
346
347    /* update state and return */
348    strm->next_in = in;
349    strm->next_out = out;
350    strm->avail_in = (unsigned)(in < last ?
351        (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
352        (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
353    strm->avail_out = (unsigned)(out < end ?
354        (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
355        (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
356    state->hold = hold;
357    state->bits = bits;
358
359    Assert((state->hold >> state->bits) == 0, "invalid input data state");
360}
361
362/*
363   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
364   - Using bit fields for code structure
365   - Different op definition to avoid & for extra bits (do & for table bits)
366   - Three separate decoding do-loops for direct, window, and wnext == 0
367   - Special case for distance > 1 copies to do overlapped load and store copy
368   - Explicit branch predictions (based on measured branch probabilities)
369   - Deferring match copy and interspersed it with decoding subsequent codes
370   - Swapping literal/length else
371   - Swapping window/direct else
372   - Larger unrolled copy loops (three is about right)
373   - Moving len -= 3 statement into middle of loop
374 */
375
376#endif /* !ASMINF */
377