10f66f451Sopenharmony_ci/* deflate.c - deflate/inflate code for gzip and friends 20f66f451Sopenharmony_ci * 30f66f451Sopenharmony_ci * Copyright 2014 Rob Landley <rob@landley.net> 40f66f451Sopenharmony_ci * 50f66f451Sopenharmony_ci * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip) 60f66f451Sopenharmony_ci * LSB 4.1 has gzip, gunzip, and zcat 70f66f451Sopenharmony_ci * 80f66f451Sopenharmony_ci * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout 90f66f451Sopenharmony_ci */ 100f66f451Sopenharmony_ci 110f66f451Sopenharmony_ci#include "toys.h" 120f66f451Sopenharmony_ci 130f66f451Sopenharmony_cistruct deflate { 140f66f451Sopenharmony_ci // Huffman codes: base offset and extra bits tables (length and distance) 150f66f451Sopenharmony_ci char lenbits[29], distbits[30]; 160f66f451Sopenharmony_ci unsigned short lenbase[29], distbase[30]; 170f66f451Sopenharmony_ci void *fixdisthuff, *fixlithuff; 180f66f451Sopenharmony_ci 190f66f451Sopenharmony_ci // CRC 200f66f451Sopenharmony_ci void (*crcfunc)(struct deflate *dd, char *data, unsigned len); 210f66f451Sopenharmony_ci unsigned crctable[256], crc; 220f66f451Sopenharmony_ci 230f66f451Sopenharmony_ci 240f66f451Sopenharmony_ci // Tables only used for deflation 250f66f451Sopenharmony_ci unsigned short *hashhead, *hashchain; 260f66f451Sopenharmony_ci 270f66f451Sopenharmony_ci // Compressed data buffer (extra space malloced at end) 280f66f451Sopenharmony_ci unsigned pos, len; 290f66f451Sopenharmony_ci int infd, outfd; 300f66f451Sopenharmony_ci char data[]; 310f66f451Sopenharmony_ci}; 320f66f451Sopenharmony_ci 330f66f451Sopenharmony_ci// little endian bit buffer 340f66f451Sopenharmony_cistruct bitbuf { 350f66f451Sopenharmony_ci int fd, bitpos, len, max; 360f66f451Sopenharmony_ci char buf[]; 370f66f451Sopenharmony_ci}; 380f66f451Sopenharmony_ci 390f66f451Sopenharmony_ci// malloc a struct bitbuf 400f66f451Sopenharmony_cistruct bitbuf *bitbuf_init(int fd, int size) 410f66f451Sopenharmony_ci{ 420f66f451Sopenharmony_ci struct bitbuf *bb = xzalloc(sizeof(struct bitbuf)+size); 430f66f451Sopenharmony_ci 440f66f451Sopenharmony_ci bb->max = size; 450f66f451Sopenharmony_ci bb->fd = fd; 460f66f451Sopenharmony_ci 470f66f451Sopenharmony_ci return bb; 480f66f451Sopenharmony_ci} 490f66f451Sopenharmony_ci 500f66f451Sopenharmony_ci// Advance bitpos without the overhead of recording bits 510f66f451Sopenharmony_ci// Loads more data when input buffer empty 520f66f451Sopenharmony_ci// call with 0 to just load data, returns 0 at EOF 530f66f451Sopenharmony_ciint bitbuf_skip(struct bitbuf *bb, int bits) 540f66f451Sopenharmony_ci{ 550f66f451Sopenharmony_ci int pos = bb->bitpos + bits + (bits<0), len; 560f66f451Sopenharmony_ci 570f66f451Sopenharmony_ci while (pos >= (len = bb->len<<3)) { 580f66f451Sopenharmony_ci pos -= len; 590f66f451Sopenharmony_ci if (1 > (bb->len = read(bb->fd, bb->buf, bb->max))) { 600f66f451Sopenharmony_ci if (!bb->len && !bits) break; 610f66f451Sopenharmony_ci error_exit("inflate EOF"); 620f66f451Sopenharmony_ci } 630f66f451Sopenharmony_ci } 640f66f451Sopenharmony_ci bb->bitpos = pos; 650f66f451Sopenharmony_ci 660f66f451Sopenharmony_ci return pos<len; 670f66f451Sopenharmony_ci} 680f66f451Sopenharmony_ci 690f66f451Sopenharmony_ci// Optimized single bit inlined version 700f66f451Sopenharmony_cistatic inline int bitbuf_bit(struct bitbuf *bb) 710f66f451Sopenharmony_ci{ 720f66f451Sopenharmony_ci int bufpos = bb->bitpos>>3; 730f66f451Sopenharmony_ci 740f66f451Sopenharmony_ci if (bufpos == bb->len) { 750f66f451Sopenharmony_ci bitbuf_skip(bb, -1); 760f66f451Sopenharmony_ci bufpos = 0; 770f66f451Sopenharmony_ci } 780f66f451Sopenharmony_ci 790f66f451Sopenharmony_ci return (bb->buf[bufpos]>>(bb->bitpos++&7))&1; 800f66f451Sopenharmony_ci} 810f66f451Sopenharmony_ci 820f66f451Sopenharmony_ci// Fetch the next X bits from the bitbuf, little endian 830f66f451Sopenharmony_ciunsigned bitbuf_get(struct bitbuf *bb, int bits) 840f66f451Sopenharmony_ci{ 850f66f451Sopenharmony_ci int result = 0, offset = 0; 860f66f451Sopenharmony_ci 870f66f451Sopenharmony_ci while (bits) { 880f66f451Sopenharmony_ci int click = bb->bitpos >> 3, blow, blen; 890f66f451Sopenharmony_ci 900f66f451Sopenharmony_ci // Load more data if buffer empty 910f66f451Sopenharmony_ci if (click == bb->len) { 920f66f451Sopenharmony_ci bitbuf_skip(bb, -1); 930f66f451Sopenharmony_ci click = 0; 940f66f451Sopenharmony_ci } 950f66f451Sopenharmony_ci 960f66f451Sopenharmony_ci // grab bits from next byte 970f66f451Sopenharmony_ci blow = bb->bitpos & 7; 980f66f451Sopenharmony_ci blen = 8-blow; 990f66f451Sopenharmony_ci if (blen > bits) blen = bits; 1000f66f451Sopenharmony_ci result |= ((bb->buf[click] >> blow) & ((1<<blen)-1)) << offset; 1010f66f451Sopenharmony_ci offset += blen; 1020f66f451Sopenharmony_ci bits -= blen; 1030f66f451Sopenharmony_ci bb->bitpos += blen; 1040f66f451Sopenharmony_ci } 1050f66f451Sopenharmony_ci 1060f66f451Sopenharmony_ci return result; 1070f66f451Sopenharmony_ci} 1080f66f451Sopenharmony_ci 1090f66f451Sopenharmony_civoid bitbuf_flush(struct bitbuf *bb) 1100f66f451Sopenharmony_ci{ 1110f66f451Sopenharmony_ci if (!bb->bitpos) return; 1120f66f451Sopenharmony_ci 1130f66f451Sopenharmony_ci xwrite(bb->fd, bb->buf, (bb->bitpos+7)>>3); 1140f66f451Sopenharmony_ci memset(bb->buf, 0, bb->max); 1150f66f451Sopenharmony_ci bb->bitpos = 0; 1160f66f451Sopenharmony_ci} 1170f66f451Sopenharmony_ci 1180f66f451Sopenharmony_civoid bitbuf_put(struct bitbuf *bb, int data, int len) 1190f66f451Sopenharmony_ci{ 1200f66f451Sopenharmony_ci while (len) { 1210f66f451Sopenharmony_ci int click = bb->bitpos >> 3, blow, blen; 1220f66f451Sopenharmony_ci 1230f66f451Sopenharmony_ci // Flush buffer if necessary 1240f66f451Sopenharmony_ci if (click == bb->max) { 1250f66f451Sopenharmony_ci bitbuf_flush(bb); 1260f66f451Sopenharmony_ci click = 0; 1270f66f451Sopenharmony_ci } 1280f66f451Sopenharmony_ci blow = bb->bitpos & 7; 1290f66f451Sopenharmony_ci blen = 8-blow; 1300f66f451Sopenharmony_ci if (blen > len) blen = len; 1310f66f451Sopenharmony_ci bb->buf[click] |= data << blow; 1320f66f451Sopenharmony_ci bb->bitpos += blen; 1330f66f451Sopenharmony_ci data >>= blen; 1340f66f451Sopenharmony_ci len -= blen; 1350f66f451Sopenharmony_ci } 1360f66f451Sopenharmony_ci} 1370f66f451Sopenharmony_ci 1380f66f451Sopenharmony_cistatic void output_byte(struct deflate *dd, char sym) 1390f66f451Sopenharmony_ci{ 1400f66f451Sopenharmony_ci int pos = dd->pos++ & 32767; 1410f66f451Sopenharmony_ci 1420f66f451Sopenharmony_ci dd->data[pos] = sym; 1430f66f451Sopenharmony_ci 1440f66f451Sopenharmony_ci if (pos == 32767) { 1450f66f451Sopenharmony_ci xwrite(dd->outfd, dd->data, 32768); 1460f66f451Sopenharmony_ci if (dd->crcfunc) dd->crcfunc(dd, dd->data, 32768); 1470f66f451Sopenharmony_ci } 1480f66f451Sopenharmony_ci} 1490f66f451Sopenharmony_ci 1500f66f451Sopenharmony_ci// Huffman coding uses bits to traverse a binary tree to a leaf node, 1510f66f451Sopenharmony_ci// By placing frequently occurring symbols at shorter paths, frequently 1520f66f451Sopenharmony_ci// used symbols may be represented in fewer bits than uncommon symbols. 1530f66f451Sopenharmony_ci// (length[0] isn't used but code's clearer if it's there.) 1540f66f451Sopenharmony_ci 1550f66f451Sopenharmony_cistruct huff { 1560f66f451Sopenharmony_ci unsigned short length[16]; // How many symbols have this bit length? 1570f66f451Sopenharmony_ci unsigned short symbol[288]; // sorted by bit length, then ascending order 1580f66f451Sopenharmony_ci}; 1590f66f451Sopenharmony_ci 1600f66f451Sopenharmony_ci// Create simple huffman tree from array of bit lengths. 1610f66f451Sopenharmony_ci 1620f66f451Sopenharmony_ci// The symbols in the huffman trees are sorted (first by bit length 1630f66f451Sopenharmony_ci// of the code to reach them, then by symbol number). This means that given 1640f66f451Sopenharmony_ci// the bit length of each symbol, we can construct a unique tree. 1650f66f451Sopenharmony_cistatic void len2huff(struct huff *huff, char bitlen[], int len) 1660f66f451Sopenharmony_ci{ 1670f66f451Sopenharmony_ci int offset[16]; 1680f66f451Sopenharmony_ci int i; 1690f66f451Sopenharmony_ci 1700f66f451Sopenharmony_ci // Count number of codes at each bit length 1710f66f451Sopenharmony_ci memset(huff, 0, sizeof(struct huff)); 1720f66f451Sopenharmony_ci for (i = 0; i<len; i++) huff->length[bitlen[i]]++; 1730f66f451Sopenharmony_ci 1740f66f451Sopenharmony_ci // Sort symbols by bit length, then symbol. Get list of starting positions 1750f66f451Sopenharmony_ci // for each group, then write each symbol to next position within its group. 1760f66f451Sopenharmony_ci *huff->length = *offset = 0; 1770f66f451Sopenharmony_ci for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1]; 1780f66f451Sopenharmony_ci for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i; 1790f66f451Sopenharmony_ci} 1800f66f451Sopenharmony_ci 1810f66f451Sopenharmony_ci// Fetch and decode next huffman coded symbol from bitbuf. 1820f66f451Sopenharmony_ci// This takes advantage of the sorting to navigate the tree as an array: 1830f66f451Sopenharmony_ci// each time we fetch a bit we have all the codes at that bit level in 1840f66f451Sopenharmony_ci// order with no gaps. 1850f66f451Sopenharmony_cistatic unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) 1860f66f451Sopenharmony_ci{ 1870f66f451Sopenharmony_ci unsigned short *length = huff->length; 1880f66f451Sopenharmony_ci int start = 0, offset = 0; 1890f66f451Sopenharmony_ci 1900f66f451Sopenharmony_ci // Traverse through the bit lengths until our code is in this range 1910f66f451Sopenharmony_ci for (;;) { 1920f66f451Sopenharmony_ci offset = (offset << 1) | bitbuf_bit(bb); 1930f66f451Sopenharmony_ci start += *++length; 1940f66f451Sopenharmony_ci if ((offset -= *length) < 0) break; 1950f66f451Sopenharmony_ci if ((length - huff->length) & 16) error_exit("bad symbol"); 1960f66f451Sopenharmony_ci } 1970f66f451Sopenharmony_ci 1980f66f451Sopenharmony_ci return huff->symbol[start + offset]; 1990f66f451Sopenharmony_ci} 2000f66f451Sopenharmony_ci 2010f66f451Sopenharmony_ci// Decompress deflated data from bitbuf to dd->outfd. 2020f66f451Sopenharmony_cistatic void inflate(struct deflate *dd, struct bitbuf *bb) 2030f66f451Sopenharmony_ci{ 2040f66f451Sopenharmony_ci dd->crc = ~0; 2050f66f451Sopenharmony_ci 2060f66f451Sopenharmony_ci // repeat until spanked 2070f66f451Sopenharmony_ci for (;;) { 2080f66f451Sopenharmony_ci int final, type; 2090f66f451Sopenharmony_ci 2100f66f451Sopenharmony_ci final = bitbuf_get(bb, 1); 2110f66f451Sopenharmony_ci type = bitbuf_get(bb, 2); 2120f66f451Sopenharmony_ci 2130f66f451Sopenharmony_ci if (type == 3) error_exit("bad type"); 2140f66f451Sopenharmony_ci 2150f66f451Sopenharmony_ci // Uncompressed block? 2160f66f451Sopenharmony_ci if (!type) { 2170f66f451Sopenharmony_ci int len, nlen; 2180f66f451Sopenharmony_ci 2190f66f451Sopenharmony_ci // Align to byte, read length 2200f66f451Sopenharmony_ci bitbuf_skip(bb, (8-bb->bitpos)&7); 2210f66f451Sopenharmony_ci len = bitbuf_get(bb, 16); 2220f66f451Sopenharmony_ci nlen = bitbuf_get(bb, 16); 2230f66f451Sopenharmony_ci if (len != (0xffff & ~nlen)) error_exit("bad len"); 2240f66f451Sopenharmony_ci 2250f66f451Sopenharmony_ci // Dump literal output data 2260f66f451Sopenharmony_ci while (len) { 2270f66f451Sopenharmony_ci int pos = bb->bitpos >> 3, bblen = bb->len - pos; 2280f66f451Sopenharmony_ci char *p = bb->buf+pos; 2290f66f451Sopenharmony_ci 2300f66f451Sopenharmony_ci // dump bytes until done or end of current bitbuf contents 2310f66f451Sopenharmony_ci if (bblen > len) bblen = len; 2320f66f451Sopenharmony_ci pos = bblen; 2330f66f451Sopenharmony_ci while (pos--) output_byte(dd, *(p++)); 2340f66f451Sopenharmony_ci bitbuf_skip(bb, bblen << 3); 2350f66f451Sopenharmony_ci len -= bblen; 2360f66f451Sopenharmony_ci } 2370f66f451Sopenharmony_ci 2380f66f451Sopenharmony_ci // Compressed block 2390f66f451Sopenharmony_ci } else { 2400f66f451Sopenharmony_ci struct huff *disthuff, *lithuff; 2410f66f451Sopenharmony_ci 2420f66f451Sopenharmony_ci // Dynamic huffman codes? 2430f66f451Sopenharmony_ci if (type == 2) { 2440f66f451Sopenharmony_ci struct huff *h2 = ((struct huff *)libbuf)+1; 2450f66f451Sopenharmony_ci int i, litlen, distlen, hufflen; 2460f66f451Sopenharmony_ci char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b" 2470f66f451Sopenharmony_ci "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits; 2480f66f451Sopenharmony_ci 2490f66f451Sopenharmony_ci // The huffman trees are stored as a series of bit lengths 2500f66f451Sopenharmony_ci litlen = bitbuf_get(bb, 5)+257; // max 288 2510f66f451Sopenharmony_ci distlen = bitbuf_get(bb, 5)+1; // max 32 2520f66f451Sopenharmony_ci hufflen = bitbuf_get(bb, 4)+4; // max 19 2530f66f451Sopenharmony_ci 2540f66f451Sopenharmony_ci // The literal and distance codes are themselves compressed, in 2550f66f451Sopenharmony_ci // a complicated way: an array of bit lengths (hufflen many 2560f66f451Sopenharmony_ci // entries, each 3 bits) is used to fill out an array of 19 entries 2570f66f451Sopenharmony_ci // in a magic order, leaving the rest 0. Then make a tree out of it: 2580f66f451Sopenharmony_ci memset(bits = libbuf+1, 0, 19); 2590f66f451Sopenharmony_ci for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3); 2600f66f451Sopenharmony_ci len2huff(h2, bits, 19); 2610f66f451Sopenharmony_ci 2620f66f451Sopenharmony_ci // Use that tree to read in the literal and distance bit lengths 2630f66f451Sopenharmony_ci for (i = 0; i < litlen + distlen;) { 2640f66f451Sopenharmony_ci int sym = huff_and_puff(bb, h2); 2650f66f451Sopenharmony_ci 2660f66f451Sopenharmony_ci // 0-15 are literals, 16 = repeat previous code 3-6 times, 2670f66f451Sopenharmony_ci // 17 = 3-10 zeroes (3 bit), 18 = 11-138 zeroes (7 bit) 2680f66f451Sopenharmony_ci if (sym < 16) bits[i++] = sym; 2690f66f451Sopenharmony_ci else { 2700f66f451Sopenharmony_ci int len = sym & 2; 2710f66f451Sopenharmony_ci 2720f66f451Sopenharmony_ci len = bitbuf_get(bb, sym-14+len+(len>>1)) + 3 + (len<<2); 2730f66f451Sopenharmony_ci memset(bits+i, bits[i-1] * !(sym&3), len); 2740f66f451Sopenharmony_ci i += len; 2750f66f451Sopenharmony_ci } 2760f66f451Sopenharmony_ci } 2770f66f451Sopenharmony_ci if (i > litlen+distlen) error_exit("bad tree"); 2780f66f451Sopenharmony_ci 2790f66f451Sopenharmony_ci len2huff(lithuff = h2, bits, litlen); 2800f66f451Sopenharmony_ci len2huff(disthuff = ((struct huff *)libbuf)+2, bits+litlen, distlen); 2810f66f451Sopenharmony_ci 2820f66f451Sopenharmony_ci // Static huffman codes 2830f66f451Sopenharmony_ci } else { 2840f66f451Sopenharmony_ci lithuff = dd->fixlithuff; 2850f66f451Sopenharmony_ci disthuff = dd->fixdisthuff; 2860f66f451Sopenharmony_ci } 2870f66f451Sopenharmony_ci 2880f66f451Sopenharmony_ci // Use huffman tables to decode block of compressed symbols 2890f66f451Sopenharmony_ci for (;;) { 2900f66f451Sopenharmony_ci int sym = huff_and_puff(bb, lithuff); 2910f66f451Sopenharmony_ci 2920f66f451Sopenharmony_ci // Literal? 2930f66f451Sopenharmony_ci if (sym < 256) output_byte(dd, sym); 2940f66f451Sopenharmony_ci 2950f66f451Sopenharmony_ci // Copy range? 2960f66f451Sopenharmony_ci else if (sym > 256) { 2970f66f451Sopenharmony_ci int len, dist; 2980f66f451Sopenharmony_ci 2990f66f451Sopenharmony_ci sym -= 257; 3000f66f451Sopenharmony_ci len = dd->lenbase[sym] + bitbuf_get(bb, dd->lenbits[sym]); 3010f66f451Sopenharmony_ci sym = huff_and_puff(bb, disthuff); 3020f66f451Sopenharmony_ci dist = dd->distbase[sym] + bitbuf_get(bb, dd->distbits[sym]); 3030f66f451Sopenharmony_ci sym = dd->pos & 32767; 3040f66f451Sopenharmony_ci 3050f66f451Sopenharmony_ci while (len--) output_byte(dd, dd->data[(dd->pos-dist) & 32767]); 3060f66f451Sopenharmony_ci 3070f66f451Sopenharmony_ci // End of block 3080f66f451Sopenharmony_ci } else break; 3090f66f451Sopenharmony_ci } 3100f66f451Sopenharmony_ci } 3110f66f451Sopenharmony_ci 3120f66f451Sopenharmony_ci // Was that the last block? 3130f66f451Sopenharmony_ci if (final) break; 3140f66f451Sopenharmony_ci } 3150f66f451Sopenharmony_ci 3160f66f451Sopenharmony_ci if (dd->pos & 32767) { 3170f66f451Sopenharmony_ci xwrite(dd->outfd, dd->data, dd->pos&32767); 3180f66f451Sopenharmony_ci if (dd->crcfunc) dd->crcfunc(dd, dd->data, dd->pos&32767); 3190f66f451Sopenharmony_ci } 3200f66f451Sopenharmony_ci} 3210f66f451Sopenharmony_ci 3220f66f451Sopenharmony_ci// Deflate from dd->infd to bitbuf 3230f66f451Sopenharmony_ci// For deflate, dd->len = input read, dd->pos = input consumed 3240f66f451Sopenharmony_cistatic void deflate(struct deflate *dd, struct bitbuf *bb) 3250f66f451Sopenharmony_ci{ 3260f66f451Sopenharmony_ci char *data = dd->data; 3270f66f451Sopenharmony_ci int len, final = 0; 3280f66f451Sopenharmony_ci 3290f66f451Sopenharmony_ci dd->crc = ~0; 3300f66f451Sopenharmony_ci 3310f66f451Sopenharmony_ci while (!final) { 3320f66f451Sopenharmony_ci // Read next half-window of data if we haven't hit EOF yet. 3330f66f451Sopenharmony_ci len = readall(dd->infd, data+(dd->len&32768), 32768); 3340f66f451Sopenharmony_ci if (len < 0) perror_exit("read"); // todo: add filename 3350f66f451Sopenharmony_ci if (len != 32768) final++; 3360f66f451Sopenharmony_ci if (dd->crcfunc) dd->crcfunc(dd, data+(dd->len&32768), len); 3370f66f451Sopenharmony_ci // dd->len += len; crcfunc advances len TODO 3380f66f451Sopenharmony_ci 3390f66f451Sopenharmony_ci // store block as literal 3400f66f451Sopenharmony_ci bitbuf_put(bb, final, 1); 3410f66f451Sopenharmony_ci bitbuf_put(bb, 0, 1); 3420f66f451Sopenharmony_ci 3430f66f451Sopenharmony_ci bitbuf_put(bb, 0, (8-bb->bitpos)&7); 3440f66f451Sopenharmony_ci bitbuf_put(bb, len, 16); 3450f66f451Sopenharmony_ci bitbuf_put(bb, 0xffff & ~len, 16); 3460f66f451Sopenharmony_ci 3470f66f451Sopenharmony_ci // repeat until spanked 3480f66f451Sopenharmony_ci while (dd->pos != dd->len) { 3490f66f451Sopenharmony_ci unsigned pos = dd->pos&65535; 3500f66f451Sopenharmony_ci 3510f66f451Sopenharmony_ci bitbuf_put(bb, data[pos], 8); 3520f66f451Sopenharmony_ci 3530f66f451Sopenharmony_ci // need to refill buffer? 3540f66f451Sopenharmony_ci if (!(32767 & ++dd->pos) && !final) break; 3550f66f451Sopenharmony_ci } 3560f66f451Sopenharmony_ci } 3570f66f451Sopenharmony_ci bitbuf_flush(bb); 3580f66f451Sopenharmony_ci} 3590f66f451Sopenharmony_ci 3600f66f451Sopenharmony_ci// Allocate memory for deflate/inflate. 3610f66f451Sopenharmony_cistatic struct deflate *init_deflate(int compress) 3620f66f451Sopenharmony_ci{ 3630f66f451Sopenharmony_ci int i, n = 1; 3640f66f451Sopenharmony_ci struct deflate *dd = xmalloc(sizeof(struct deflate)+32768*(compress ? 4 : 1)); 3650f66f451Sopenharmony_ci 3660f66f451Sopenharmony_ci memset(dd, 0, sizeof(struct deflate)); 3670f66f451Sopenharmony_ci // decompress needs 32k history, compress adds 64k hashhead and 32k hashchain 3680f66f451Sopenharmony_ci if (compress) { 3690f66f451Sopenharmony_ci dd->hashhead = (unsigned short *)(dd->data+65536); 3700f66f451Sopenharmony_ci dd->hashchain = (unsigned short *)(dd->data+65536+32768); 3710f66f451Sopenharmony_ci } 3720f66f451Sopenharmony_ci 3730f66f451Sopenharmony_ci // Calculate lenbits, lenbase, distbits, distbase 3740f66f451Sopenharmony_ci *dd->lenbase = 3; 3750f66f451Sopenharmony_ci for (i = 0; i<sizeof(dd->lenbits)-1; i++) { 3760f66f451Sopenharmony_ci if (i>4) { 3770f66f451Sopenharmony_ci if (!(i&3)) { 3780f66f451Sopenharmony_ci dd->lenbits[i]++; 3790f66f451Sopenharmony_ci n <<= 1; 3800f66f451Sopenharmony_ci } 3810f66f451Sopenharmony_ci if (i == 27) n--; 3820f66f451Sopenharmony_ci else dd->lenbits[i+1] = dd->lenbits[i]; 3830f66f451Sopenharmony_ci } 3840f66f451Sopenharmony_ci dd->lenbase[i+1] = n + dd->lenbase[i]; 3850f66f451Sopenharmony_ci } 3860f66f451Sopenharmony_ci n = 0; 3870f66f451Sopenharmony_ci for (i = 0; i<sizeof(dd->distbits); i++) { 3880f66f451Sopenharmony_ci dd->distbase[i] = 1<<n; 3890f66f451Sopenharmony_ci if (i) dd->distbase[i] += dd->distbase[i-1]; 3900f66f451Sopenharmony_ci if (i>3 && !(i&1)) n++; 3910f66f451Sopenharmony_ci dd->distbits[i] = n; 3920f66f451Sopenharmony_ci } 3930f66f451Sopenharmony_ci 3940f66f451Sopenharmony_ci// TODO layout and lifetime of this? 3950f66f451Sopenharmony_ci // Init fixed huffman tables 3960f66f451Sopenharmony_ci for (i=0; i<288; i++) libbuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279); 3970f66f451Sopenharmony_ci len2huff(dd->fixlithuff = ((struct huff *)libbuf)+3, libbuf, 288); 3980f66f451Sopenharmony_ci memset(libbuf, 5, 30); 3990f66f451Sopenharmony_ci len2huff(dd->fixdisthuff = ((struct huff *)libbuf)+4, libbuf, 30); 4000f66f451Sopenharmony_ci 4010f66f451Sopenharmony_ci return dd; 4020f66f451Sopenharmony_ci} 4030f66f451Sopenharmony_ci 4040f66f451Sopenharmony_ci// Return true/false whether we consumed a gzip header. 4050f66f451Sopenharmony_cistatic int is_gzip(struct bitbuf *bb) 4060f66f451Sopenharmony_ci{ 4070f66f451Sopenharmony_ci int flags; 4080f66f451Sopenharmony_ci 4090f66f451Sopenharmony_ci // Confirm signature 4100f66f451Sopenharmony_ci if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31) 4110f66f451Sopenharmony_ci return 0; 4120f66f451Sopenharmony_ci bitbuf_skip(bb, 6*8); 4130f66f451Sopenharmony_ci 4140f66f451Sopenharmony_ci // Skip extra, name, comment, header CRC fields 4150f66f451Sopenharmony_ci if (flags & 4) bitbuf_skip(bb, 16); 4160f66f451Sopenharmony_ci if (flags & 8) while (bitbuf_get(bb, 8)); 4170f66f451Sopenharmony_ci if (flags & 16) while (bitbuf_get(bb, 8)); 4180f66f451Sopenharmony_ci if (flags & 2) bitbuf_skip(bb, 16); 4190f66f451Sopenharmony_ci 4200f66f451Sopenharmony_ci return 1; 4210f66f451Sopenharmony_ci} 4220f66f451Sopenharmony_ci 4230f66f451Sopenharmony_civoid gzip_crc(struct deflate *dd, char *data, unsigned len) 4240f66f451Sopenharmony_ci{ 4250f66f451Sopenharmony_ci int i; 4260f66f451Sopenharmony_ci unsigned crc, *crc_table = dd->crctable; 4270f66f451Sopenharmony_ci 4280f66f451Sopenharmony_ci crc = dd->crc; 4290f66f451Sopenharmony_ci for (i = 0; i<len; i++) crc = crc_table[(crc^data[i])&0xff] ^ (crc>>8); 4300f66f451Sopenharmony_ci dd->crc = crc; 4310f66f451Sopenharmony_ci dd->len += len; 4320f66f451Sopenharmony_ci} 4330f66f451Sopenharmony_ci 4340f66f451Sopenharmony_ci/* 4350f66f451Sopenharmony_ci// Start with crc = 1, or pass in last crc to append more data 4360f66f451Sopenharmony_ciunsigned adler32(char *buf, unsigned len, unsigned crc) 4370f66f451Sopenharmony_ci{ 4380f66f451Sopenharmony_ci unsigned aa = crc&((1<<16)-1), bb = crc>>16; 4390f66f451Sopenharmony_ci while (len--) { 4400f66f451Sopenharmony_ci aa = (aa+*buf)%65521; 4410f66f451Sopenharmony_ci bb = (bb+aa)%65521; 4420f66f451Sopenharmony_ci } 4430f66f451Sopenharmony_ci return (bb<16)+aa; 4440f66f451Sopenharmony_ci} 4450f66f451Sopenharmony_ci*/ 4460f66f451Sopenharmony_ci 4470f66f451Sopenharmony_cilong long gzip_fd(int infd, int outfd) 4480f66f451Sopenharmony_ci{ 4490f66f451Sopenharmony_ci struct bitbuf *bb = bitbuf_init(outfd, 4096); 4500f66f451Sopenharmony_ci struct deflate *dd = init_deflate(1); 4510f66f451Sopenharmony_ci long long rc; 4520f66f451Sopenharmony_ci 4530f66f451Sopenharmony_ci // Header from RFC 1952 section 2.2: 4540f66f451Sopenharmony_ci // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none), 4550f66f451Sopenharmony_ci // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression), 4560f66f451Sopenharmony_ci // Operating System (FF=unknown) 4570f66f451Sopenharmony_ci 4580f66f451Sopenharmony_ci dd->infd = infd; 4590f66f451Sopenharmony_ci xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10); 4600f66f451Sopenharmony_ci 4610f66f451Sopenharmony_ci // Little endian crc table 4620f66f451Sopenharmony_ci crc_init(dd->crctable, 1); 4630f66f451Sopenharmony_ci dd->crcfunc = gzip_crc; 4640f66f451Sopenharmony_ci 4650f66f451Sopenharmony_ci deflate(dd, bb); 4660f66f451Sopenharmony_ci 4670f66f451Sopenharmony_ci // tail: crc32, len32 4680f66f451Sopenharmony_ci 4690f66f451Sopenharmony_ci bitbuf_put(bb, 0, (8-bb->bitpos)&7); 4700f66f451Sopenharmony_ci bitbuf_put(bb, ~dd->crc, 32); 4710f66f451Sopenharmony_ci bitbuf_put(bb, dd->len, 32); 4720f66f451Sopenharmony_ci rc = dd->len; 4730f66f451Sopenharmony_ci 4740f66f451Sopenharmony_ci bitbuf_flush(bb); 4750f66f451Sopenharmony_ci free(bb); 4760f66f451Sopenharmony_ci free(dd); 4770f66f451Sopenharmony_ci 4780f66f451Sopenharmony_ci return rc; 4790f66f451Sopenharmony_ci} 4800f66f451Sopenharmony_ci 4810f66f451Sopenharmony_cilong long gunzip_fd(int infd, int outfd) 4820f66f451Sopenharmony_ci{ 4830f66f451Sopenharmony_ci struct bitbuf *bb = bitbuf_init(infd, 4096); 4840f66f451Sopenharmony_ci struct deflate *dd = init_deflate(0); 4850f66f451Sopenharmony_ci long long rc = 0; 4860f66f451Sopenharmony_ci 4870f66f451Sopenharmony_ci // Little endian crc table 4880f66f451Sopenharmony_ci crc_init(dd->crctable, 1); 4890f66f451Sopenharmony_ci dd->crcfunc = gzip_crc; 4900f66f451Sopenharmony_ci dd->outfd = outfd; 4910f66f451Sopenharmony_ci 4920f66f451Sopenharmony_ci do { 4930f66f451Sopenharmony_ci if (!is_gzip(bb)) error_exit("not gzip"); 4940f66f451Sopenharmony_ci 4950f66f451Sopenharmony_ci inflate(dd, bb); 4960f66f451Sopenharmony_ci 4970f66f451Sopenharmony_ci // tail: crc32, len32 4980f66f451Sopenharmony_ci bitbuf_skip(bb, (8-bb->bitpos)&7); 4990f66f451Sopenharmony_ci if (~dd->crc != bitbuf_get(bb, 32) || dd->len != bitbuf_get(bb, 32)) 5000f66f451Sopenharmony_ci error_exit("bad crc"); 5010f66f451Sopenharmony_ci rc += dd->len; 5020f66f451Sopenharmony_ci 5030f66f451Sopenharmony_ci bitbuf_skip(bb, (8-bb->bitpos)&7); 5040f66f451Sopenharmony_ci dd->pos = dd->len = 0; 5050f66f451Sopenharmony_ci } while (bitbuf_skip(bb, 0)); 5060f66f451Sopenharmony_ci free(bb); 5070f66f451Sopenharmony_ci free(dd); 5080f66f451Sopenharmony_ci 5090f66f451Sopenharmony_ci return rc; 5100f66f451Sopenharmony_ci} 511