162306a36Sopenharmony_ci/* inffast.c -- fast decoding 262306a36Sopenharmony_ci * Copyright (C) 1995-2004 Mark Adler 362306a36Sopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/zutil.h> 762306a36Sopenharmony_ci#include "inftrees.h" 862306a36Sopenharmony_ci#include "inflate.h" 962306a36Sopenharmony_ci#include "inffast.h" 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#ifndef ASMINF 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ciunion uu { 1462306a36Sopenharmony_ci unsigned short us; 1562306a36Sopenharmony_ci unsigned char b[2]; 1662306a36Sopenharmony_ci}; 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* Endian independent version */ 1962306a36Sopenharmony_cistatic inline unsigned short 2062306a36Sopenharmony_ciget_unaligned16(const unsigned short *p) 2162306a36Sopenharmony_ci{ 2262306a36Sopenharmony_ci union uu mm; 2362306a36Sopenharmony_ci unsigned char *b = (unsigned char *)p; 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci mm.b[0] = b[0]; 2662306a36Sopenharmony_ci mm.b[1] = b[1]; 2762306a36Sopenharmony_ci return mm.us; 2862306a36Sopenharmony_ci} 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* 3162306a36Sopenharmony_ci Decode literal, length, and distance codes and write out the resulting 3262306a36Sopenharmony_ci literal and match bytes until either not enough input or output is 3362306a36Sopenharmony_ci available, an end-of-block is encountered, or a data error is encountered. 3462306a36Sopenharmony_ci When large enough input and output buffers are supplied to inflate(), for 3562306a36Sopenharmony_ci example, a 16K input buffer and a 64K output buffer, more than 95% of the 3662306a36Sopenharmony_ci inflate execution time is spent in this routine. 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci Entry assumptions: 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci state->mode == LEN 4162306a36Sopenharmony_ci strm->avail_in >= 6 4262306a36Sopenharmony_ci strm->avail_out >= 258 4362306a36Sopenharmony_ci start >= strm->avail_out 4462306a36Sopenharmony_ci state->bits < 8 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci On return, state->mode is one of: 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci LEN -- ran out of enough output space or enough available input 4962306a36Sopenharmony_ci TYPE -- reached end of block code, inflate() to interpret next block 5062306a36Sopenharmony_ci BAD -- error in block data 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci Notes: 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci - The maximum input bits used by a length/distance pair is 15 bits for the 5562306a36Sopenharmony_ci length code, 5 bits for the length extra, 15 bits for the distance code, 5662306a36Sopenharmony_ci and 13 bits for the distance extra. This totals 48 bits, or six bytes. 5762306a36Sopenharmony_ci Therefore if strm->avail_in >= 6, then there is enough input to avoid 5862306a36Sopenharmony_ci checking for available input while decoding. 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci - The maximum bytes that a single length/distance pair can output is 258 6162306a36Sopenharmony_ci bytes, which is the maximum length that can be coded. inflate_fast() 6262306a36Sopenharmony_ci requires strm->avail_out >= 258 for each loop to avoid checking for 6362306a36Sopenharmony_ci output space. 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci - @start: inflate()'s starting value for strm->avail_out 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_civoid inflate_fast(z_streamp strm, unsigned start) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci struct inflate_state *state; 7062306a36Sopenharmony_ci const unsigned char *in; /* local strm->next_in */ 7162306a36Sopenharmony_ci const unsigned char *last; /* while in < last, enough input available */ 7262306a36Sopenharmony_ci unsigned char *out; /* local strm->next_out */ 7362306a36Sopenharmony_ci unsigned char *beg; /* inflate()'s initial strm->next_out */ 7462306a36Sopenharmony_ci unsigned char *end; /* while out < end, enough space available */ 7562306a36Sopenharmony_ci#ifdef INFLATE_STRICT 7662306a36Sopenharmony_ci unsigned dmax; /* maximum distance from zlib header */ 7762306a36Sopenharmony_ci#endif 7862306a36Sopenharmony_ci unsigned wsize; /* window size or zero if not using window */ 7962306a36Sopenharmony_ci unsigned whave; /* valid bytes in the window */ 8062306a36Sopenharmony_ci unsigned write; /* window write index */ 8162306a36Sopenharmony_ci unsigned char *window; /* allocated sliding window, if wsize != 0 */ 8262306a36Sopenharmony_ci unsigned long hold; /* local strm->hold */ 8362306a36Sopenharmony_ci unsigned bits; /* local strm->bits */ 8462306a36Sopenharmony_ci code const *lcode; /* local strm->lencode */ 8562306a36Sopenharmony_ci code const *dcode; /* local strm->distcode */ 8662306a36Sopenharmony_ci unsigned lmask; /* mask for first level of length codes */ 8762306a36Sopenharmony_ci unsigned dmask; /* mask for first level of distance codes */ 8862306a36Sopenharmony_ci code this; /* retrieved table entry */ 8962306a36Sopenharmony_ci unsigned op; /* code bits, operation, extra bits, or */ 9062306a36Sopenharmony_ci /* window position, window bytes to copy */ 9162306a36Sopenharmony_ci unsigned len; /* match length, unused bytes */ 9262306a36Sopenharmony_ci unsigned dist; /* match distance */ 9362306a36Sopenharmony_ci unsigned char *from; /* where to copy match from */ 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci /* copy state to local variables */ 9662306a36Sopenharmony_ci state = (struct inflate_state *)strm->state; 9762306a36Sopenharmony_ci in = strm->next_in; 9862306a36Sopenharmony_ci last = in + (strm->avail_in - 5); 9962306a36Sopenharmony_ci out = strm->next_out; 10062306a36Sopenharmony_ci beg = out - (start - strm->avail_out); 10162306a36Sopenharmony_ci end = out + (strm->avail_out - 257); 10262306a36Sopenharmony_ci#ifdef INFLATE_STRICT 10362306a36Sopenharmony_ci dmax = state->dmax; 10462306a36Sopenharmony_ci#endif 10562306a36Sopenharmony_ci wsize = state->wsize; 10662306a36Sopenharmony_ci whave = state->whave; 10762306a36Sopenharmony_ci write = state->write; 10862306a36Sopenharmony_ci window = state->window; 10962306a36Sopenharmony_ci hold = state->hold; 11062306a36Sopenharmony_ci bits = state->bits; 11162306a36Sopenharmony_ci lcode = state->lencode; 11262306a36Sopenharmony_ci dcode = state->distcode; 11362306a36Sopenharmony_ci lmask = (1U << state->lenbits) - 1; 11462306a36Sopenharmony_ci dmask = (1U << state->distbits) - 1; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci /* decode literals and length/distances until end-of-block or not enough 11762306a36Sopenharmony_ci input data or output space */ 11862306a36Sopenharmony_ci do { 11962306a36Sopenharmony_ci if (bits < 15) { 12062306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 12162306a36Sopenharmony_ci bits += 8; 12262306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 12362306a36Sopenharmony_ci bits += 8; 12462306a36Sopenharmony_ci } 12562306a36Sopenharmony_ci this = lcode[hold & lmask]; 12662306a36Sopenharmony_ci dolen: 12762306a36Sopenharmony_ci op = (unsigned)(this.bits); 12862306a36Sopenharmony_ci hold >>= op; 12962306a36Sopenharmony_ci bits -= op; 13062306a36Sopenharmony_ci op = (unsigned)(this.op); 13162306a36Sopenharmony_ci if (op == 0) { /* literal */ 13262306a36Sopenharmony_ci *out++ = (unsigned char)(this.val); 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci else if (op & 16) { /* length base */ 13562306a36Sopenharmony_ci len = (unsigned)(this.val); 13662306a36Sopenharmony_ci op &= 15; /* number of extra bits */ 13762306a36Sopenharmony_ci if (op) { 13862306a36Sopenharmony_ci if (bits < op) { 13962306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 14062306a36Sopenharmony_ci bits += 8; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci len += (unsigned)hold & ((1U << op) - 1); 14362306a36Sopenharmony_ci hold >>= op; 14462306a36Sopenharmony_ci bits -= op; 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_ci if (bits < 15) { 14762306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 14862306a36Sopenharmony_ci bits += 8; 14962306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 15062306a36Sopenharmony_ci bits += 8; 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci this = dcode[hold & dmask]; 15362306a36Sopenharmony_ci dodist: 15462306a36Sopenharmony_ci op = (unsigned)(this.bits); 15562306a36Sopenharmony_ci hold >>= op; 15662306a36Sopenharmony_ci bits -= op; 15762306a36Sopenharmony_ci op = (unsigned)(this.op); 15862306a36Sopenharmony_ci if (op & 16) { /* distance base */ 15962306a36Sopenharmony_ci dist = (unsigned)(this.val); 16062306a36Sopenharmony_ci op &= 15; /* number of extra bits */ 16162306a36Sopenharmony_ci if (bits < op) { 16262306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 16362306a36Sopenharmony_ci bits += 8; 16462306a36Sopenharmony_ci if (bits < op) { 16562306a36Sopenharmony_ci hold += (unsigned long)(*in++) << bits; 16662306a36Sopenharmony_ci bits += 8; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci } 16962306a36Sopenharmony_ci dist += (unsigned)hold & ((1U << op) - 1); 17062306a36Sopenharmony_ci#ifdef INFLATE_STRICT 17162306a36Sopenharmony_ci if (dist > dmax) { 17262306a36Sopenharmony_ci strm->msg = (char *)"invalid distance too far back"; 17362306a36Sopenharmony_ci state->mode = BAD; 17462306a36Sopenharmony_ci break; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci#endif 17762306a36Sopenharmony_ci hold >>= op; 17862306a36Sopenharmony_ci bits -= op; 17962306a36Sopenharmony_ci op = (unsigned)(out - beg); /* max distance in output */ 18062306a36Sopenharmony_ci if (dist > op) { /* see if copy from window */ 18162306a36Sopenharmony_ci op = dist - op; /* distance back in window */ 18262306a36Sopenharmony_ci if (op > whave) { 18362306a36Sopenharmony_ci strm->msg = (char *)"invalid distance too far back"; 18462306a36Sopenharmony_ci state->mode = BAD; 18562306a36Sopenharmony_ci break; 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci from = window; 18862306a36Sopenharmony_ci if (write == 0) { /* very common case */ 18962306a36Sopenharmony_ci from += wsize - op; 19062306a36Sopenharmony_ci if (op < len) { /* some from window */ 19162306a36Sopenharmony_ci len -= op; 19262306a36Sopenharmony_ci do { 19362306a36Sopenharmony_ci *out++ = *from++; 19462306a36Sopenharmony_ci } while (--op); 19562306a36Sopenharmony_ci from = out - dist; /* rest from output */ 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci else if (write < op) { /* wrap around window */ 19962306a36Sopenharmony_ci from += wsize + write - op; 20062306a36Sopenharmony_ci op -= write; 20162306a36Sopenharmony_ci if (op < len) { /* some from end of window */ 20262306a36Sopenharmony_ci len -= op; 20362306a36Sopenharmony_ci do { 20462306a36Sopenharmony_ci *out++ = *from++; 20562306a36Sopenharmony_ci } while (--op); 20662306a36Sopenharmony_ci from = window; 20762306a36Sopenharmony_ci if (write < len) { /* some from start of window */ 20862306a36Sopenharmony_ci op = write; 20962306a36Sopenharmony_ci len -= op; 21062306a36Sopenharmony_ci do { 21162306a36Sopenharmony_ci *out++ = *from++; 21262306a36Sopenharmony_ci } while (--op); 21362306a36Sopenharmony_ci from = out - dist; /* rest from output */ 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci else { /* contiguous in window */ 21862306a36Sopenharmony_ci from += write - op; 21962306a36Sopenharmony_ci if (op < len) { /* some from window */ 22062306a36Sopenharmony_ci len -= op; 22162306a36Sopenharmony_ci do { 22262306a36Sopenharmony_ci *out++ = *from++; 22362306a36Sopenharmony_ci } while (--op); 22462306a36Sopenharmony_ci from = out - dist; /* rest from output */ 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci while (len > 2) { 22862306a36Sopenharmony_ci *out++ = *from++; 22962306a36Sopenharmony_ci *out++ = *from++; 23062306a36Sopenharmony_ci *out++ = *from++; 23162306a36Sopenharmony_ci len -= 3; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci if (len) { 23462306a36Sopenharmony_ci *out++ = *from++; 23562306a36Sopenharmony_ci if (len > 1) 23662306a36Sopenharmony_ci *out++ = *from++; 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci else { 24062306a36Sopenharmony_ci unsigned short *sout; 24162306a36Sopenharmony_ci unsigned long loops; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci from = out - dist; /* copy direct from output */ 24462306a36Sopenharmony_ci /* minimum length is three */ 24562306a36Sopenharmony_ci /* Align out addr */ 24662306a36Sopenharmony_ci if (!((long)(out - 1) & 1)) { 24762306a36Sopenharmony_ci *out++ = *from++; 24862306a36Sopenharmony_ci len--; 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci sout = (unsigned short *)(out); 25162306a36Sopenharmony_ci if (dist > 2) { 25262306a36Sopenharmony_ci unsigned short *sfrom; 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci sfrom = (unsigned short *)(from); 25562306a36Sopenharmony_ci loops = len >> 1; 25662306a36Sopenharmony_ci do { 25762306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 25862306a36Sopenharmony_ci *sout++ = *sfrom++; 25962306a36Sopenharmony_ci else 26062306a36Sopenharmony_ci *sout++ = get_unaligned16(sfrom++); 26162306a36Sopenharmony_ci } while (--loops); 26262306a36Sopenharmony_ci out = (unsigned char *)sout; 26362306a36Sopenharmony_ci from = (unsigned char *)sfrom; 26462306a36Sopenharmony_ci } else { /* dist == 1 or dist == 2 */ 26562306a36Sopenharmony_ci unsigned short pat16; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci pat16 = *(sout-1); 26862306a36Sopenharmony_ci if (dist == 1) { 26962306a36Sopenharmony_ci union uu mm; 27062306a36Sopenharmony_ci /* copy one char pattern to both bytes */ 27162306a36Sopenharmony_ci mm.us = pat16; 27262306a36Sopenharmony_ci mm.b[0] = mm.b[1]; 27362306a36Sopenharmony_ci pat16 = mm.us; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci loops = len >> 1; 27662306a36Sopenharmony_ci do 27762306a36Sopenharmony_ci *sout++ = pat16; 27862306a36Sopenharmony_ci while (--loops); 27962306a36Sopenharmony_ci out = (unsigned char *)sout; 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci if (len & 1) 28262306a36Sopenharmony_ci *out++ = *from++; 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci else if ((op & 64) == 0) { /* 2nd level distance code */ 28662306a36Sopenharmony_ci this = dcode[this.val + (hold & ((1U << op) - 1))]; 28762306a36Sopenharmony_ci goto dodist; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci else { 29062306a36Sopenharmony_ci strm->msg = (char *)"invalid distance code"; 29162306a36Sopenharmony_ci state->mode = BAD; 29262306a36Sopenharmony_ci break; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci else if ((op & 64) == 0) { /* 2nd level length code */ 29662306a36Sopenharmony_ci this = lcode[this.val + (hold & ((1U << op) - 1))]; 29762306a36Sopenharmony_ci goto dolen; 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci else if (op & 32) { /* end-of-block */ 30062306a36Sopenharmony_ci state->mode = TYPE; 30162306a36Sopenharmony_ci break; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci else { 30462306a36Sopenharmony_ci strm->msg = (char *)"invalid literal/length code"; 30562306a36Sopenharmony_ci state->mode = BAD; 30662306a36Sopenharmony_ci break; 30762306a36Sopenharmony_ci } 30862306a36Sopenharmony_ci } while (in < last && out < end); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 31162306a36Sopenharmony_ci len = bits >> 3; 31262306a36Sopenharmony_ci in -= len; 31362306a36Sopenharmony_ci bits -= len << 3; 31462306a36Sopenharmony_ci hold &= (1U << bits) - 1; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* update state and return */ 31762306a36Sopenharmony_ci strm->next_in = in; 31862306a36Sopenharmony_ci strm->next_out = out; 31962306a36Sopenharmony_ci strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 32062306a36Sopenharmony_ci strm->avail_out = (unsigned)(out < end ? 32162306a36Sopenharmony_ci 257 + (end - out) : 257 - (out - end)); 32262306a36Sopenharmony_ci state->hold = hold; 32362306a36Sopenharmony_ci state->bits = bits; 32462306a36Sopenharmony_ci return; 32562306a36Sopenharmony_ci} 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci/* 32862306a36Sopenharmony_ci inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 32962306a36Sopenharmony_ci - Using bit fields for code structure 33062306a36Sopenharmony_ci - Different op definition to avoid & for extra bits (do & for table bits) 33162306a36Sopenharmony_ci - Three separate decoding do-loops for direct, window, and write == 0 33262306a36Sopenharmony_ci - Special case for distance > 1 copies to do overlapped load and store copy 33362306a36Sopenharmony_ci - Explicit branch predictions (based on measured branch probabilities) 33462306a36Sopenharmony_ci - Deferring match copy and interspersed it with decoding subsequent codes 33562306a36Sopenharmony_ci - Swapping literal/length else 33662306a36Sopenharmony_ci - Swapping window/direct else 33762306a36Sopenharmony_ci - Larger unrolled copy loops (three is about right) 33862306a36Sopenharmony_ci - Moving len -= 3 statement into middle of loop 33962306a36Sopenharmony_ci */ 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci#endif /* !ASMINF */ 342