1/* inffast_chunk.c -- fast decoding 2 * Copyright (C) 1995-2017 Mark Adler 3 * Copyright 2023 The Chromium Authors 4 * For conditions of distribution and use, see copyright notice in zlib.h 5 */ 6 7#include "zutil.h" 8#include "inftrees.h" 9#include "inflate.h" 10#include "contrib/optimizations/inffast_chunk.h" 11#include "contrib/optimizations/chunkcopy.h" 12 13#ifdef ASMINF 14# pragma message("Assembler code may have bugs -- use at your own risk") 15#else 16 17/* 18 Decode literal, length, and distance codes and write out the resulting 19 literal and match bytes until either not enough input or output is 20 available, an end-of-block is encountered, or a data error is encountered. 21 When large enough input and output buffers are supplied to inflate(), for 22 example, a 16K input buffer and a 64K output buffer, more than 95% of the 23 inflate() execution time is spent in this routine. 24 25 Entry assumptions: 26 27 state->mode == LEN 28 strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes) 29 strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes) 30 start >= strm->avail_out 31 state->bits < 8 32 (state->hold >> state->bits) == 0 33 strm->next_out[0..strm->avail_out] does not overlap with 34 strm->next_in[0..strm->avail_in] 35 strm->state->window is allocated with an additional 36 CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize 37 38 On return, state->mode is one of: 39 40 LEN -- ran out of enough output space or enough available input 41 TYPE -- reached end of block code, inflate() to interpret next block 42 BAD -- error in block data 43 44 Notes: 45 46 INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes 47 48 - The maximum input bits used by a length/distance pair is 15 bits for the 49 length code, 5 bits for the length extra, 15 bits for the distance code, 50 and 13 bits for the distance extra. This totals 48 bits, or six bytes. 51 Therefore if strm->avail_in >= 6, then there is enough input to avoid 52 checking for available input while decoding. 53 54 - The wide input data reading option reads 64 input bits at a time. Thus, 55 if strm->avail_in >= 8, then there is enough input to avoid checking for 56 available input while decoding. Reading consumes the input with: 57 58 hold |= read64le(in) << bits; 59 in += 6; 60 bits += 48; 61 62 reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded 63 up, worst case) and 6 bytes is enough to decode as noted above. At exit, 64 hold &= (1U << bits) - 1 drops excess input to keep the invariant: 65 66 (state->hold >> state->bits) == 0 67 68 INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes 69 70 - The maximum bytes that a single length/distance pair can output is 258 71 bytes, which is the maximum length that can be coded. inflate_fast() 72 requires strm->avail_out >= 260 for each loop to avoid checking for 73 available output space while decoding. 74 */ 75void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) { 76 struct inflate_state FAR *state; 77 z_const unsigned char FAR *in; /* local strm->next_in */ 78 z_const unsigned char FAR *last; /* have enough input while in < last */ 79 unsigned char FAR *out; /* local strm->next_out */ 80 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 81 unsigned char FAR *end; /* while out < end, enough space available */ 82 unsigned char FAR *limit; /* safety limit for chunky copies */ 83#ifdef INFLATE_STRICT 84 unsigned dmax; /* maximum distance from zlib header */ 85#endif 86 unsigned wsize; /* window size or zero if not using window */ 87 unsigned whave; /* valid bytes in the window */ 88 unsigned wnext; /* window write index */ 89 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 90 inflate_holder_t hold; /* local strm->hold */ 91 unsigned bits; /* local strm->bits */ 92 code const FAR *lcode; /* local strm->lencode */ 93 code const FAR *dcode; /* local strm->distcode */ 94 unsigned lmask; /* mask for first level of length codes */ 95 unsigned dmask; /* mask for first level of distance codes */ 96 code const *here; /* retrieved table entry */ 97 unsigned op; /* code bits, operation, extra bits, or */ 98 /* window position, window bytes to copy */ 99 unsigned len; /* match length, unused bytes */ 100 unsigned dist; /* match distance */ 101 unsigned char FAR *from; /* where to copy match from */ 102 103 /* copy state to local variables */ 104 state = (struct inflate_state FAR *)strm->state; 105 in = strm->next_in; 106 last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1)); 107 out = strm->next_out; 108 beg = out - (start - strm->avail_out); 109 end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1)); 110 limit = out + strm->avail_out; 111#ifdef INFLATE_STRICT 112 dmax = state->dmax; 113#endif 114 wsize = state->wsize; 115 whave = state->whave; 116 wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext; 117 window = state->window; 118 hold = state->hold; 119 bits = state->bits; 120 lcode = state->lencode; 121 dcode = state->distcode; 122 lmask = (1U << state->lenbits) - 1; 123 dmask = (1U << state->distbits) - 1; 124 125#ifdef INFLATE_CHUNK_READ_64LE 126#define REFILL() do { \ 127 Assert(bits < 64, "### Too many bits in inflate_fast."); \ 128 hold |= read64le(in) << bits; \ 129 in += 7; \ 130 in -= bits >> 3; \ 131 bits |= 56; \ 132 } while (0) 133#endif 134 135 /* decode literals and length/distances until end-of-block or not enough 136 input data or output space */ 137 do { 138#ifdef INFLATE_CHUNK_READ_64LE 139 REFILL(); 140#else 141 if (bits < 15) { 142 hold += (unsigned long)(*in++) << bits; 143 bits += 8; 144 hold += (unsigned long)(*in++) << bits; 145 bits += 8; 146 } 147#endif 148 here = lcode + (hold & lmask); 149#ifdef INFLATE_CHUNK_READ_64LE 150 if (here->op == 0) { /* literal */ 151 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? 152 "inflate: literal '%c'\n" : 153 "inflate: literal 0x%02x\n", here->val)); 154 *out++ = (unsigned char)(here->val); 155 hold >>= here->bits; 156 bits -= here->bits; 157 here = lcode + (hold & lmask); 158 if (here->op == 0) { /* literal */ 159 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? 160 "inflate: 2nd literal '%c'\n" : 161 "inflate: 2nd literal 0x%02x\n", here->val)); 162 *out++ = (unsigned char)(here->val); 163 hold >>= here->bits; 164 bits -= here->bits; 165 here = lcode + (hold & lmask); 166 } 167 } 168#endif 169 dolen: 170 op = (unsigned)(here->bits); 171 hold >>= op; 172 bits -= op; 173 op = (unsigned)(here->op); 174 if (op == 0) { /* literal */ 175 Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? 176 "inflate: literal '%c'\n" : 177 "inflate: literal 0x%02x\n", here->val)); 178 *out++ = (unsigned char)(here->val); 179 } 180 else if (op & 16) { /* length base */ 181 len = (unsigned)(here->val); 182 op &= 15; /* number of extra bits */ 183 if (op) { 184#ifndef INFLATE_CHUNK_READ_64LE 185 if (bits < op) { 186 hold += (unsigned long)(*in++) << bits; 187 bits += 8; 188 } 189#endif 190 len += (unsigned)hold & ((1U << op) - 1); 191 hold >>= op; 192 bits -= op; 193 } 194 Tracevv((stderr, "inflate: length %u\n", len)); 195#ifndef INFLATE_CHUNK_READ_64LE 196 if (bits < 15) { 197 hold += (unsigned long)(*in++) << bits; 198 bits += 8; 199 hold += (unsigned long)(*in++) << bits; 200 bits += 8; 201 } 202#endif 203 here = dcode + (hold & dmask); 204 dodist: 205 op = (unsigned)(here->bits); 206 hold >>= op; 207 bits -= op; 208 op = (unsigned)(here->op); 209 if (op & 16) { /* distance base */ 210 dist = (unsigned)(here->val); 211 op &= 15; /* number of extra bits */ 212 /* we have two fast-path loads: 10+10 + 15+5 + 15 = 55, 213 but we may need to refill here in the worst case */ 214 if (bits < op) { 215#ifdef INFLATE_CHUNK_READ_64LE 216 REFILL(); 217#else 218 hold += (unsigned long)(*in++) << bits; 219 bits += 8; 220 if (bits < op) { 221 hold += (unsigned long)(*in++) << bits; 222 bits += 8; 223 } 224#endif 225 } 226 dist += (unsigned)hold & ((1U << op) - 1); 227#ifdef INFLATE_STRICT 228 if (dist > dmax) { 229 strm->msg = (char *)"invalid distance too far back"; 230 state->mode = BAD; 231 break; 232 } 233#endif 234 hold >>= op; 235 bits -= op; 236 Tracevv((stderr, "inflate: distance %u\n", dist)); 237 op = (unsigned)(out - beg); /* max distance in output */ 238 if (dist > op) { /* see if copy from window */ 239 op = dist - op; /* distance back in window */ 240 if (op > whave) { 241 if (state->sane) { 242 strm->msg = 243 (char *)"invalid distance too far back"; 244 state->mode = BAD; 245 break; 246 } 247#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 248 if (len <= op - whave) { 249 do { 250 *out++ = 0; 251 } while (--len); 252 continue; 253 } 254 len -= op - whave; 255 do { 256 *out++ = 0; 257 } while (--op > whave); 258 if (op == 0) { 259 from = out - dist; 260 do { 261 *out++ = *from++; 262 } while (--len); 263 continue; 264 } 265#endif 266 } 267 from = window; 268 if (wnext >= op) { /* contiguous in window */ 269 from += wnext - op; 270 } 271 else { /* wrap around window */ 272 op -= wnext; 273 from += wsize - op; 274 if (op < len) { /* some from end of window */ 275 len -= op; 276 out = chunkcopy_safe(out, from, op, limit); 277 from = window; /* more from start of window */ 278 op = wnext; 279 /* This (rare) case can create a situation where 280 the first chunkcopy below must be checked. 281 */ 282 } 283 } 284 if (op < len) { /* still need some from output */ 285 out = chunkcopy_safe(out, from, op, limit); 286 len -= op; 287 /* When dist is small the amount of data that can be 288 copied from the window is also small, and progress 289 towards the dangerous end of the output buffer is 290 also small. This means that for trivial memsets and 291 for chunkunroll_relaxed() a safety check is 292 unnecessary. However, these conditions may not be 293 entered at all, and in that case it's possible that 294 the main copy is near the end. 295 */ 296 out = chunkunroll_relaxed(out, &dist, &len); 297 out = chunkcopy_safe_ugly(out, dist, len, limit); 298 } else { 299 /* from points to window, so there is no risk of 300 overlapping pointers requiring memset-like behaviour 301 */ 302 out = chunkcopy_safe(out, from, len, limit); 303 } 304 } 305 else { 306 /* Whole reference is in range of current output. No 307 range checks are necessary because we start with room 308 for at least 258 bytes of output, so unroll and roundoff 309 operations can write beyond `out+len` so long as they 310 stay within 258 bytes of `out`. 311 */ 312 out = chunkcopy_lapped_relaxed(out, dist, len); 313 } 314 } 315 else if ((op & 64) == 0) { /* 2nd level distance code */ 316 here = dcode + here->val + (hold & ((1U << op) - 1)); 317 goto dodist; 318 } 319 else { 320 strm->msg = (char *)"invalid distance code"; 321 state->mode = BAD; 322 break; 323 } 324 } 325 else if ((op & 64) == 0) { /* 2nd level length code */ 326 here = lcode + here->val + (hold & ((1U << op) - 1)); 327 goto dolen; 328 } 329 else if (op & 32) { /* end-of-block */ 330 Tracevv((stderr, "inflate: end of block\n")); 331 state->mode = TYPE; 332 break; 333 } 334 else { 335 strm->msg = (char *)"invalid literal/length code"; 336 state->mode = BAD; 337 break; 338 } 339 } while (in < last && out < end); 340 341 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 342 len = bits >> 3; 343 in -= len; 344 bits -= len << 3; 345 hold &= (1U << bits) - 1; 346 347 /* update state and return */ 348 strm->next_in = in; 349 strm->next_out = out; 350 strm->avail_in = (unsigned)(in < last ? 351 (INFLATE_FAST_MIN_INPUT - 1) + (last - in) : 352 (INFLATE_FAST_MIN_INPUT - 1) - (in - last)); 353 strm->avail_out = (unsigned)(out < end ? 354 (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) : 355 (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end)); 356 state->hold = hold; 357 state->bits = bits; 358 359 Assert((state->hold >> state->bits) == 0, "invalid input data state"); 360} 361 362/* 363 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 364 - Using bit fields for code structure 365 - Different op definition to avoid & for extra bits (do & for table bits) 366 - Three separate decoding do-loops for direct, window, and wnext == 0 367 - Special case for distance > 1 copies to do overlapped load and store copy 368 - Explicit branch predictions (based on measured branch probabilities) 369 - Deferring match copy and interspersed it with decoding subsequent codes 370 - Swapping literal/length else 371 - Swapping window/direct else 372 - Larger unrolled copy loops (three is about right) 373 - Moving len -= 3 statement into middle of loop 374 */ 375 376#endif /* !ASMINF */ 377