10f66f451Sopenharmony_ci/* deflate.c - deflate/inflate code for gzip and friends
20f66f451Sopenharmony_ci *
30f66f451Sopenharmony_ci * Copyright 2014 Rob Landley <rob@landley.net>
40f66f451Sopenharmony_ci *
50f66f451Sopenharmony_ci * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip)
60f66f451Sopenharmony_ci * LSB 4.1 has gzip, gunzip, and zcat
70f66f451Sopenharmony_ci *
80f66f451Sopenharmony_ci * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout
90f66f451Sopenharmony_ci */
100f66f451Sopenharmony_ci
110f66f451Sopenharmony_ci#include "toys.h"
120f66f451Sopenharmony_ci
130f66f451Sopenharmony_cistruct deflate {
140f66f451Sopenharmony_ci  // Huffman codes: base offset and extra bits tables (length and distance)
150f66f451Sopenharmony_ci  char lenbits[29], distbits[30];
160f66f451Sopenharmony_ci  unsigned short lenbase[29], distbase[30];
170f66f451Sopenharmony_ci  void *fixdisthuff, *fixlithuff;
180f66f451Sopenharmony_ci
190f66f451Sopenharmony_ci  // CRC
200f66f451Sopenharmony_ci  void (*crcfunc)(struct deflate *dd, char *data, unsigned len);
210f66f451Sopenharmony_ci  unsigned crctable[256], crc;
220f66f451Sopenharmony_ci
230f66f451Sopenharmony_ci
240f66f451Sopenharmony_ci  // Tables only used for deflation
250f66f451Sopenharmony_ci  unsigned short *hashhead, *hashchain;
260f66f451Sopenharmony_ci
270f66f451Sopenharmony_ci  // Compressed data buffer (extra space malloced at end)
280f66f451Sopenharmony_ci  unsigned pos, len;
290f66f451Sopenharmony_ci  int infd, outfd;
300f66f451Sopenharmony_ci  char data[];
310f66f451Sopenharmony_ci};
320f66f451Sopenharmony_ci
330f66f451Sopenharmony_ci// little endian bit buffer
340f66f451Sopenharmony_cistruct bitbuf {
350f66f451Sopenharmony_ci  int fd, bitpos, len, max;
360f66f451Sopenharmony_ci  char buf[];
370f66f451Sopenharmony_ci};
380f66f451Sopenharmony_ci
390f66f451Sopenharmony_ci// malloc a struct bitbuf
400f66f451Sopenharmony_cistruct bitbuf *bitbuf_init(int fd, int size)
410f66f451Sopenharmony_ci{
420f66f451Sopenharmony_ci  struct bitbuf *bb = xzalloc(sizeof(struct bitbuf)+size);
430f66f451Sopenharmony_ci
440f66f451Sopenharmony_ci  bb->max = size;
450f66f451Sopenharmony_ci  bb->fd = fd;
460f66f451Sopenharmony_ci
470f66f451Sopenharmony_ci  return bb;
480f66f451Sopenharmony_ci}
490f66f451Sopenharmony_ci
500f66f451Sopenharmony_ci// Advance bitpos without the overhead of recording bits
510f66f451Sopenharmony_ci// Loads more data when input buffer empty
520f66f451Sopenharmony_ci// call with 0 to just load data, returns 0 at EOF
530f66f451Sopenharmony_ciint bitbuf_skip(struct bitbuf *bb, int bits)
540f66f451Sopenharmony_ci{
550f66f451Sopenharmony_ci  int pos = bb->bitpos + bits + (bits<0), len;
560f66f451Sopenharmony_ci
570f66f451Sopenharmony_ci  while (pos >= (len = bb->len<<3)) {
580f66f451Sopenharmony_ci    pos -= len;
590f66f451Sopenharmony_ci    if (1 > (bb->len = read(bb->fd, bb->buf, bb->max))) {
600f66f451Sopenharmony_ci      if (!bb->len && !bits) break;
610f66f451Sopenharmony_ci      error_exit("inflate EOF");
620f66f451Sopenharmony_ci    }
630f66f451Sopenharmony_ci  }
640f66f451Sopenharmony_ci  bb->bitpos = pos;
650f66f451Sopenharmony_ci
660f66f451Sopenharmony_ci  return pos<len;
670f66f451Sopenharmony_ci}
680f66f451Sopenharmony_ci
690f66f451Sopenharmony_ci// Optimized single bit inlined version
700f66f451Sopenharmony_cistatic inline int bitbuf_bit(struct bitbuf *bb)
710f66f451Sopenharmony_ci{
720f66f451Sopenharmony_ci  int bufpos = bb->bitpos>>3;
730f66f451Sopenharmony_ci
740f66f451Sopenharmony_ci  if (bufpos == bb->len) {
750f66f451Sopenharmony_ci    bitbuf_skip(bb, -1);
760f66f451Sopenharmony_ci    bufpos = 0;
770f66f451Sopenharmony_ci  }
780f66f451Sopenharmony_ci
790f66f451Sopenharmony_ci  return (bb->buf[bufpos]>>(bb->bitpos++&7))&1;
800f66f451Sopenharmony_ci}
810f66f451Sopenharmony_ci
820f66f451Sopenharmony_ci// Fetch the next X bits from the bitbuf, little endian
830f66f451Sopenharmony_ciunsigned bitbuf_get(struct bitbuf *bb, int bits)
840f66f451Sopenharmony_ci{
850f66f451Sopenharmony_ci  int result = 0, offset = 0;
860f66f451Sopenharmony_ci
870f66f451Sopenharmony_ci  while (bits) {
880f66f451Sopenharmony_ci    int click = bb->bitpos >> 3, blow, blen;
890f66f451Sopenharmony_ci
900f66f451Sopenharmony_ci    // Load more data if buffer empty
910f66f451Sopenharmony_ci    if (click == bb->len) {
920f66f451Sopenharmony_ci      bitbuf_skip(bb, -1);
930f66f451Sopenharmony_ci      click = 0;
940f66f451Sopenharmony_ci    }
950f66f451Sopenharmony_ci
960f66f451Sopenharmony_ci    // grab bits from next byte
970f66f451Sopenharmony_ci    blow = bb->bitpos & 7;
980f66f451Sopenharmony_ci    blen = 8-blow;
990f66f451Sopenharmony_ci    if (blen > bits) blen = bits;
1000f66f451Sopenharmony_ci    result |= ((bb->buf[click] >> blow) & ((1<<blen)-1)) << offset;
1010f66f451Sopenharmony_ci    offset += blen;
1020f66f451Sopenharmony_ci    bits -= blen;
1030f66f451Sopenharmony_ci    bb->bitpos += blen;
1040f66f451Sopenharmony_ci  }
1050f66f451Sopenharmony_ci
1060f66f451Sopenharmony_ci  return result;
1070f66f451Sopenharmony_ci}
1080f66f451Sopenharmony_ci
1090f66f451Sopenharmony_civoid bitbuf_flush(struct bitbuf *bb)
1100f66f451Sopenharmony_ci{
1110f66f451Sopenharmony_ci  if (!bb->bitpos) return;
1120f66f451Sopenharmony_ci
1130f66f451Sopenharmony_ci  xwrite(bb->fd, bb->buf, (bb->bitpos+7)>>3);
1140f66f451Sopenharmony_ci  memset(bb->buf, 0, bb->max);
1150f66f451Sopenharmony_ci  bb->bitpos = 0;
1160f66f451Sopenharmony_ci}
1170f66f451Sopenharmony_ci
1180f66f451Sopenharmony_civoid bitbuf_put(struct bitbuf *bb, int data, int len)
1190f66f451Sopenharmony_ci{
1200f66f451Sopenharmony_ci  while (len) {
1210f66f451Sopenharmony_ci    int click = bb->bitpos >> 3, blow, blen;
1220f66f451Sopenharmony_ci
1230f66f451Sopenharmony_ci    // Flush buffer if necessary
1240f66f451Sopenharmony_ci    if (click == bb->max) {
1250f66f451Sopenharmony_ci      bitbuf_flush(bb);
1260f66f451Sopenharmony_ci      click = 0;
1270f66f451Sopenharmony_ci    }
1280f66f451Sopenharmony_ci    blow = bb->bitpos & 7;
1290f66f451Sopenharmony_ci    blen = 8-blow;
1300f66f451Sopenharmony_ci    if (blen > len) blen = len;
1310f66f451Sopenharmony_ci    bb->buf[click] |= data << blow;
1320f66f451Sopenharmony_ci    bb->bitpos += blen;
1330f66f451Sopenharmony_ci    data >>= blen;
1340f66f451Sopenharmony_ci    len -= blen;
1350f66f451Sopenharmony_ci  }
1360f66f451Sopenharmony_ci}
1370f66f451Sopenharmony_ci
1380f66f451Sopenharmony_cistatic void output_byte(struct deflate *dd, char sym)
1390f66f451Sopenharmony_ci{
1400f66f451Sopenharmony_ci  int pos = dd->pos++ & 32767;
1410f66f451Sopenharmony_ci
1420f66f451Sopenharmony_ci  dd->data[pos] = sym;
1430f66f451Sopenharmony_ci
1440f66f451Sopenharmony_ci  if (pos == 32767) {
1450f66f451Sopenharmony_ci    xwrite(dd->outfd, dd->data, 32768);
1460f66f451Sopenharmony_ci    if (dd->crcfunc) dd->crcfunc(dd, dd->data, 32768);
1470f66f451Sopenharmony_ci  }
1480f66f451Sopenharmony_ci}
1490f66f451Sopenharmony_ci
1500f66f451Sopenharmony_ci// Huffman coding uses bits to traverse a binary tree to a leaf node,
1510f66f451Sopenharmony_ci// By placing frequently occurring symbols at shorter paths, frequently
1520f66f451Sopenharmony_ci// used symbols may be represented in fewer bits than uncommon symbols.
1530f66f451Sopenharmony_ci// (length[0] isn't used but code's clearer if it's there.)
1540f66f451Sopenharmony_ci
1550f66f451Sopenharmony_cistruct huff {
1560f66f451Sopenharmony_ci  unsigned short length[16];  // How many symbols have this bit length?
1570f66f451Sopenharmony_ci  unsigned short symbol[288]; // sorted by bit length, then ascending order
1580f66f451Sopenharmony_ci};
1590f66f451Sopenharmony_ci
1600f66f451Sopenharmony_ci// Create simple huffman tree from array of bit lengths.
1610f66f451Sopenharmony_ci
1620f66f451Sopenharmony_ci// The symbols in the huffman trees are sorted (first by bit length
1630f66f451Sopenharmony_ci// of the code to reach them, then by symbol number). This means that given
1640f66f451Sopenharmony_ci// the bit length of each symbol, we can construct a unique tree.
1650f66f451Sopenharmony_cistatic void len2huff(struct huff *huff, char bitlen[], int len)
1660f66f451Sopenharmony_ci{
1670f66f451Sopenharmony_ci  int offset[16];
1680f66f451Sopenharmony_ci  int i;
1690f66f451Sopenharmony_ci
1700f66f451Sopenharmony_ci  // Count number of codes at each bit length
1710f66f451Sopenharmony_ci  memset(huff, 0, sizeof(struct huff));
1720f66f451Sopenharmony_ci  for (i = 0; i<len; i++) huff->length[bitlen[i]]++;
1730f66f451Sopenharmony_ci
1740f66f451Sopenharmony_ci  // Sort symbols by bit length, then symbol. Get list of starting positions
1750f66f451Sopenharmony_ci  // for each group, then write each symbol to next position within its group.
1760f66f451Sopenharmony_ci  *huff->length = *offset = 0;
1770f66f451Sopenharmony_ci  for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1];
1780f66f451Sopenharmony_ci  for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
1790f66f451Sopenharmony_ci}
1800f66f451Sopenharmony_ci
1810f66f451Sopenharmony_ci// Fetch and decode next huffman coded symbol from bitbuf.
1820f66f451Sopenharmony_ci// This takes advantage of the sorting to navigate the tree as an array:
1830f66f451Sopenharmony_ci// each time we fetch a bit we have all the codes at that bit level in
1840f66f451Sopenharmony_ci// order with no gaps.
1850f66f451Sopenharmony_cistatic unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff)
1860f66f451Sopenharmony_ci{
1870f66f451Sopenharmony_ci  unsigned short *length = huff->length;
1880f66f451Sopenharmony_ci  int start = 0, offset = 0;
1890f66f451Sopenharmony_ci
1900f66f451Sopenharmony_ci  // Traverse through the bit lengths until our code is in this range
1910f66f451Sopenharmony_ci  for (;;) {
1920f66f451Sopenharmony_ci    offset = (offset << 1) | bitbuf_bit(bb);
1930f66f451Sopenharmony_ci    start += *++length;
1940f66f451Sopenharmony_ci    if ((offset -= *length) < 0) break;
1950f66f451Sopenharmony_ci    if ((length - huff->length) & 16) error_exit("bad symbol");
1960f66f451Sopenharmony_ci  }
1970f66f451Sopenharmony_ci
1980f66f451Sopenharmony_ci  return huff->symbol[start + offset];
1990f66f451Sopenharmony_ci}
2000f66f451Sopenharmony_ci
2010f66f451Sopenharmony_ci// Decompress deflated data from bitbuf to dd->outfd.
2020f66f451Sopenharmony_cistatic void inflate(struct deflate *dd, struct bitbuf *bb)
2030f66f451Sopenharmony_ci{
2040f66f451Sopenharmony_ci  dd->crc = ~0;
2050f66f451Sopenharmony_ci
2060f66f451Sopenharmony_ci  // repeat until spanked
2070f66f451Sopenharmony_ci  for (;;) {
2080f66f451Sopenharmony_ci    int final, type;
2090f66f451Sopenharmony_ci
2100f66f451Sopenharmony_ci    final = bitbuf_get(bb, 1);
2110f66f451Sopenharmony_ci    type = bitbuf_get(bb, 2);
2120f66f451Sopenharmony_ci
2130f66f451Sopenharmony_ci    if (type == 3) error_exit("bad type");
2140f66f451Sopenharmony_ci
2150f66f451Sopenharmony_ci    // Uncompressed block?
2160f66f451Sopenharmony_ci    if (!type) {
2170f66f451Sopenharmony_ci      int len, nlen;
2180f66f451Sopenharmony_ci
2190f66f451Sopenharmony_ci      // Align to byte, read length
2200f66f451Sopenharmony_ci      bitbuf_skip(bb, (8-bb->bitpos)&7);
2210f66f451Sopenharmony_ci      len = bitbuf_get(bb, 16);
2220f66f451Sopenharmony_ci      nlen = bitbuf_get(bb, 16);
2230f66f451Sopenharmony_ci      if (len != (0xffff & ~nlen)) error_exit("bad len");
2240f66f451Sopenharmony_ci
2250f66f451Sopenharmony_ci      // Dump literal output data
2260f66f451Sopenharmony_ci      while (len) {
2270f66f451Sopenharmony_ci        int pos = bb->bitpos >> 3, bblen = bb->len - pos;
2280f66f451Sopenharmony_ci        char *p = bb->buf+pos;
2290f66f451Sopenharmony_ci
2300f66f451Sopenharmony_ci        // dump bytes until done or end of current bitbuf contents
2310f66f451Sopenharmony_ci        if (bblen > len) bblen = len;
2320f66f451Sopenharmony_ci        pos = bblen;
2330f66f451Sopenharmony_ci        while (pos--) output_byte(dd, *(p++));
2340f66f451Sopenharmony_ci        bitbuf_skip(bb, bblen << 3);
2350f66f451Sopenharmony_ci        len -= bblen;
2360f66f451Sopenharmony_ci      }
2370f66f451Sopenharmony_ci
2380f66f451Sopenharmony_ci    // Compressed block
2390f66f451Sopenharmony_ci    } else {
2400f66f451Sopenharmony_ci      struct huff *disthuff, *lithuff;
2410f66f451Sopenharmony_ci
2420f66f451Sopenharmony_ci      // Dynamic huffman codes?
2430f66f451Sopenharmony_ci      if (type == 2) {
2440f66f451Sopenharmony_ci        struct huff *h2 = ((struct huff *)libbuf)+1;
2450f66f451Sopenharmony_ci        int i, litlen, distlen, hufflen;
2460f66f451Sopenharmony_ci        char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b"
2470f66f451Sopenharmony_ci                              "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits;
2480f66f451Sopenharmony_ci
2490f66f451Sopenharmony_ci        // The huffman trees are stored as a series of bit lengths
2500f66f451Sopenharmony_ci        litlen = bitbuf_get(bb, 5)+257;  // max 288
2510f66f451Sopenharmony_ci        distlen = bitbuf_get(bb, 5)+1;   // max 32
2520f66f451Sopenharmony_ci        hufflen = bitbuf_get(bb, 4)+4;   // max 19
2530f66f451Sopenharmony_ci
2540f66f451Sopenharmony_ci        // The literal and distance codes are themselves compressed, in
2550f66f451Sopenharmony_ci        // a complicated way: an array of bit lengths (hufflen many
2560f66f451Sopenharmony_ci        // entries, each 3 bits) is used to fill out an array of 19 entries
2570f66f451Sopenharmony_ci        // in a magic order, leaving the rest 0. Then make a tree out of it:
2580f66f451Sopenharmony_ci        memset(bits = libbuf+1, 0, 19);
2590f66f451Sopenharmony_ci        for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3);
2600f66f451Sopenharmony_ci        len2huff(h2, bits, 19);
2610f66f451Sopenharmony_ci
2620f66f451Sopenharmony_ci        // Use that tree to read in the literal and distance bit lengths
2630f66f451Sopenharmony_ci        for (i = 0; i < litlen + distlen;) {
2640f66f451Sopenharmony_ci          int sym = huff_and_puff(bb, h2);
2650f66f451Sopenharmony_ci
2660f66f451Sopenharmony_ci          // 0-15 are literals, 16 = repeat previous code 3-6 times,
2670f66f451Sopenharmony_ci          // 17 = 3-10 zeroes (3 bit), 18 = 11-138 zeroes (7 bit)
2680f66f451Sopenharmony_ci          if (sym < 16) bits[i++] = sym;
2690f66f451Sopenharmony_ci          else {
2700f66f451Sopenharmony_ci            int len = sym & 2;
2710f66f451Sopenharmony_ci
2720f66f451Sopenharmony_ci            len = bitbuf_get(bb, sym-14+len+(len>>1)) + 3 + (len<<2);
2730f66f451Sopenharmony_ci            memset(bits+i, bits[i-1] * !(sym&3), len);
2740f66f451Sopenharmony_ci            i += len;
2750f66f451Sopenharmony_ci          }
2760f66f451Sopenharmony_ci        }
2770f66f451Sopenharmony_ci        if (i > litlen+distlen) error_exit("bad tree");
2780f66f451Sopenharmony_ci
2790f66f451Sopenharmony_ci        len2huff(lithuff = h2, bits, litlen);
2800f66f451Sopenharmony_ci        len2huff(disthuff = ((struct huff *)libbuf)+2, bits+litlen, distlen);
2810f66f451Sopenharmony_ci
2820f66f451Sopenharmony_ci      // Static huffman codes
2830f66f451Sopenharmony_ci      } else {
2840f66f451Sopenharmony_ci        lithuff = dd->fixlithuff;
2850f66f451Sopenharmony_ci        disthuff = dd->fixdisthuff;
2860f66f451Sopenharmony_ci      }
2870f66f451Sopenharmony_ci
2880f66f451Sopenharmony_ci      // Use huffman tables to decode block of compressed symbols
2890f66f451Sopenharmony_ci      for (;;) {
2900f66f451Sopenharmony_ci        int sym = huff_and_puff(bb, lithuff);
2910f66f451Sopenharmony_ci
2920f66f451Sopenharmony_ci        // Literal?
2930f66f451Sopenharmony_ci        if (sym < 256) output_byte(dd, sym);
2940f66f451Sopenharmony_ci
2950f66f451Sopenharmony_ci        // Copy range?
2960f66f451Sopenharmony_ci        else if (sym > 256) {
2970f66f451Sopenharmony_ci          int len, dist;
2980f66f451Sopenharmony_ci
2990f66f451Sopenharmony_ci          sym -= 257;
3000f66f451Sopenharmony_ci          len = dd->lenbase[sym] + bitbuf_get(bb, dd->lenbits[sym]);
3010f66f451Sopenharmony_ci          sym = huff_and_puff(bb, disthuff);
3020f66f451Sopenharmony_ci          dist = dd->distbase[sym] + bitbuf_get(bb, dd->distbits[sym]);
3030f66f451Sopenharmony_ci          sym = dd->pos & 32767;
3040f66f451Sopenharmony_ci
3050f66f451Sopenharmony_ci          while (len--) output_byte(dd, dd->data[(dd->pos-dist) & 32767]);
3060f66f451Sopenharmony_ci
3070f66f451Sopenharmony_ci        // End of block
3080f66f451Sopenharmony_ci        } else break;
3090f66f451Sopenharmony_ci      }
3100f66f451Sopenharmony_ci    }
3110f66f451Sopenharmony_ci
3120f66f451Sopenharmony_ci    // Was that the last block?
3130f66f451Sopenharmony_ci    if (final) break;
3140f66f451Sopenharmony_ci  }
3150f66f451Sopenharmony_ci
3160f66f451Sopenharmony_ci  if (dd->pos & 32767) {
3170f66f451Sopenharmony_ci    xwrite(dd->outfd, dd->data, dd->pos&32767);
3180f66f451Sopenharmony_ci    if (dd->crcfunc) dd->crcfunc(dd, dd->data, dd->pos&32767);
3190f66f451Sopenharmony_ci  }
3200f66f451Sopenharmony_ci}
3210f66f451Sopenharmony_ci
3220f66f451Sopenharmony_ci// Deflate from dd->infd to bitbuf
3230f66f451Sopenharmony_ci// For deflate, dd->len = input read, dd->pos = input consumed
3240f66f451Sopenharmony_cistatic void deflate(struct deflate *dd, struct bitbuf *bb)
3250f66f451Sopenharmony_ci{
3260f66f451Sopenharmony_ci  char *data = dd->data;
3270f66f451Sopenharmony_ci  int len, final = 0;
3280f66f451Sopenharmony_ci
3290f66f451Sopenharmony_ci  dd->crc = ~0;
3300f66f451Sopenharmony_ci
3310f66f451Sopenharmony_ci  while (!final) {
3320f66f451Sopenharmony_ci    // Read next half-window of data if we haven't hit EOF yet.
3330f66f451Sopenharmony_ci    len = readall(dd->infd, data+(dd->len&32768), 32768);
3340f66f451Sopenharmony_ci    if (len < 0) perror_exit("read"); // todo: add filename
3350f66f451Sopenharmony_ci    if (len != 32768) final++;
3360f66f451Sopenharmony_ci    if (dd->crcfunc) dd->crcfunc(dd, data+(dd->len&32768), len);
3370f66f451Sopenharmony_ci    // dd->len += len;  crcfunc advances len TODO
3380f66f451Sopenharmony_ci
3390f66f451Sopenharmony_ci    // store block as literal
3400f66f451Sopenharmony_ci    bitbuf_put(bb, final, 1);
3410f66f451Sopenharmony_ci    bitbuf_put(bb, 0, 1);
3420f66f451Sopenharmony_ci
3430f66f451Sopenharmony_ci    bitbuf_put(bb, 0, (8-bb->bitpos)&7);
3440f66f451Sopenharmony_ci    bitbuf_put(bb, len, 16);
3450f66f451Sopenharmony_ci    bitbuf_put(bb, 0xffff & ~len, 16);
3460f66f451Sopenharmony_ci
3470f66f451Sopenharmony_ci    // repeat until spanked
3480f66f451Sopenharmony_ci    while (dd->pos != dd->len) {
3490f66f451Sopenharmony_ci      unsigned pos = dd->pos&65535;
3500f66f451Sopenharmony_ci
3510f66f451Sopenharmony_ci      bitbuf_put(bb, data[pos], 8);
3520f66f451Sopenharmony_ci
3530f66f451Sopenharmony_ci      // need to refill buffer?
3540f66f451Sopenharmony_ci      if (!(32767 & ++dd->pos) && !final) break;
3550f66f451Sopenharmony_ci    }
3560f66f451Sopenharmony_ci  }
3570f66f451Sopenharmony_ci  bitbuf_flush(bb);
3580f66f451Sopenharmony_ci}
3590f66f451Sopenharmony_ci
3600f66f451Sopenharmony_ci// Allocate memory for deflate/inflate.
3610f66f451Sopenharmony_cistatic struct deflate *init_deflate(int compress)
3620f66f451Sopenharmony_ci{
3630f66f451Sopenharmony_ci  int i, n = 1;
3640f66f451Sopenharmony_ci  struct deflate *dd = xmalloc(sizeof(struct deflate)+32768*(compress ? 4 : 1));
3650f66f451Sopenharmony_ci
3660f66f451Sopenharmony_ci  memset(dd, 0, sizeof(struct deflate));
3670f66f451Sopenharmony_ci  // decompress needs 32k history, compress adds 64k hashhead and 32k hashchain
3680f66f451Sopenharmony_ci  if (compress) {
3690f66f451Sopenharmony_ci    dd->hashhead = (unsigned short *)(dd->data+65536);
3700f66f451Sopenharmony_ci    dd->hashchain = (unsigned short *)(dd->data+65536+32768);
3710f66f451Sopenharmony_ci  }
3720f66f451Sopenharmony_ci
3730f66f451Sopenharmony_ci  // Calculate lenbits, lenbase, distbits, distbase
3740f66f451Sopenharmony_ci  *dd->lenbase = 3;
3750f66f451Sopenharmony_ci  for (i = 0; i<sizeof(dd->lenbits)-1; i++) {
3760f66f451Sopenharmony_ci    if (i>4) {
3770f66f451Sopenharmony_ci      if (!(i&3)) {
3780f66f451Sopenharmony_ci        dd->lenbits[i]++;
3790f66f451Sopenharmony_ci        n <<= 1;
3800f66f451Sopenharmony_ci      }
3810f66f451Sopenharmony_ci      if (i == 27) n--;
3820f66f451Sopenharmony_ci      else dd->lenbits[i+1] = dd->lenbits[i];
3830f66f451Sopenharmony_ci    }
3840f66f451Sopenharmony_ci    dd->lenbase[i+1] = n + dd->lenbase[i];
3850f66f451Sopenharmony_ci  }
3860f66f451Sopenharmony_ci  n = 0;
3870f66f451Sopenharmony_ci  for (i = 0; i<sizeof(dd->distbits); i++) {
3880f66f451Sopenharmony_ci    dd->distbase[i] = 1<<n;
3890f66f451Sopenharmony_ci    if (i) dd->distbase[i] += dd->distbase[i-1];
3900f66f451Sopenharmony_ci    if (i>3 && !(i&1)) n++;
3910f66f451Sopenharmony_ci    dd->distbits[i] = n;
3920f66f451Sopenharmony_ci  }
3930f66f451Sopenharmony_ci
3940f66f451Sopenharmony_ci// TODO layout and lifetime of this?
3950f66f451Sopenharmony_ci  // Init fixed huffman tables
3960f66f451Sopenharmony_ci  for (i=0; i<288; i++) libbuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279);
3970f66f451Sopenharmony_ci  len2huff(dd->fixlithuff = ((struct huff *)libbuf)+3, libbuf, 288);
3980f66f451Sopenharmony_ci  memset(libbuf, 5, 30);
3990f66f451Sopenharmony_ci  len2huff(dd->fixdisthuff = ((struct huff *)libbuf)+4, libbuf, 30);
4000f66f451Sopenharmony_ci
4010f66f451Sopenharmony_ci  return dd;
4020f66f451Sopenharmony_ci}
4030f66f451Sopenharmony_ci
4040f66f451Sopenharmony_ci// Return true/false whether we consumed a gzip header.
4050f66f451Sopenharmony_cistatic int is_gzip(struct bitbuf *bb)
4060f66f451Sopenharmony_ci{
4070f66f451Sopenharmony_ci  int flags;
4080f66f451Sopenharmony_ci
4090f66f451Sopenharmony_ci  // Confirm signature
4100f66f451Sopenharmony_ci  if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31)
4110f66f451Sopenharmony_ci    return 0;
4120f66f451Sopenharmony_ci  bitbuf_skip(bb, 6*8);
4130f66f451Sopenharmony_ci
4140f66f451Sopenharmony_ci  // Skip extra, name, comment, header CRC fields
4150f66f451Sopenharmony_ci  if (flags & 4) bitbuf_skip(bb, 16);
4160f66f451Sopenharmony_ci  if (flags & 8) while (bitbuf_get(bb, 8));
4170f66f451Sopenharmony_ci  if (flags & 16) while (bitbuf_get(bb, 8));
4180f66f451Sopenharmony_ci  if (flags & 2) bitbuf_skip(bb, 16);
4190f66f451Sopenharmony_ci
4200f66f451Sopenharmony_ci  return 1;
4210f66f451Sopenharmony_ci}
4220f66f451Sopenharmony_ci
4230f66f451Sopenharmony_civoid gzip_crc(struct deflate *dd, char *data, unsigned len)
4240f66f451Sopenharmony_ci{
4250f66f451Sopenharmony_ci  int i;
4260f66f451Sopenharmony_ci  unsigned crc, *crc_table = dd->crctable;
4270f66f451Sopenharmony_ci
4280f66f451Sopenharmony_ci  crc = dd->crc;
4290f66f451Sopenharmony_ci  for (i = 0; i<len; i++) crc = crc_table[(crc^data[i])&0xff] ^ (crc>>8);
4300f66f451Sopenharmony_ci  dd->crc = crc;
4310f66f451Sopenharmony_ci  dd->len += len;
4320f66f451Sopenharmony_ci}
4330f66f451Sopenharmony_ci
4340f66f451Sopenharmony_ci/*
4350f66f451Sopenharmony_ci// Start with crc = 1, or pass in last crc to append more data
4360f66f451Sopenharmony_ciunsigned adler32(char *buf, unsigned len, unsigned crc)
4370f66f451Sopenharmony_ci{
4380f66f451Sopenharmony_ci  unsigned aa = crc&((1<<16)-1), bb = crc>>16;
4390f66f451Sopenharmony_ci  while (len--) {
4400f66f451Sopenharmony_ci    aa = (aa+*buf)%65521;
4410f66f451Sopenharmony_ci    bb = (bb+aa)%65521;
4420f66f451Sopenharmony_ci  }
4430f66f451Sopenharmony_ci  return (bb<16)+aa;
4440f66f451Sopenharmony_ci}
4450f66f451Sopenharmony_ci*/
4460f66f451Sopenharmony_ci
4470f66f451Sopenharmony_cilong long gzip_fd(int infd, int outfd)
4480f66f451Sopenharmony_ci{
4490f66f451Sopenharmony_ci  struct bitbuf *bb = bitbuf_init(outfd, 4096);
4500f66f451Sopenharmony_ci  struct deflate *dd = init_deflate(1);
4510f66f451Sopenharmony_ci  long long rc;
4520f66f451Sopenharmony_ci
4530f66f451Sopenharmony_ci  // Header from RFC 1952 section 2.2:
4540f66f451Sopenharmony_ci  // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none),
4550f66f451Sopenharmony_ci  // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression),
4560f66f451Sopenharmony_ci  // Operating System (FF=unknown)
4570f66f451Sopenharmony_ci
4580f66f451Sopenharmony_ci  dd->infd = infd;
4590f66f451Sopenharmony_ci  xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10);
4600f66f451Sopenharmony_ci
4610f66f451Sopenharmony_ci  // Little endian crc table
4620f66f451Sopenharmony_ci  crc_init(dd->crctable, 1);
4630f66f451Sopenharmony_ci  dd->crcfunc = gzip_crc;
4640f66f451Sopenharmony_ci
4650f66f451Sopenharmony_ci  deflate(dd, bb);
4660f66f451Sopenharmony_ci
4670f66f451Sopenharmony_ci  // tail: crc32, len32
4680f66f451Sopenharmony_ci
4690f66f451Sopenharmony_ci  bitbuf_put(bb, 0, (8-bb->bitpos)&7);
4700f66f451Sopenharmony_ci  bitbuf_put(bb, ~dd->crc, 32);
4710f66f451Sopenharmony_ci  bitbuf_put(bb, dd->len, 32);
4720f66f451Sopenharmony_ci  rc = dd->len;
4730f66f451Sopenharmony_ci
4740f66f451Sopenharmony_ci  bitbuf_flush(bb);
4750f66f451Sopenharmony_ci  free(bb);
4760f66f451Sopenharmony_ci  free(dd);
4770f66f451Sopenharmony_ci
4780f66f451Sopenharmony_ci  return rc;
4790f66f451Sopenharmony_ci}
4800f66f451Sopenharmony_ci
4810f66f451Sopenharmony_cilong long gunzip_fd(int infd, int outfd)
4820f66f451Sopenharmony_ci{
4830f66f451Sopenharmony_ci  struct bitbuf *bb = bitbuf_init(infd, 4096);
4840f66f451Sopenharmony_ci  struct deflate *dd = init_deflate(0);
4850f66f451Sopenharmony_ci  long long rc = 0;
4860f66f451Sopenharmony_ci
4870f66f451Sopenharmony_ci  // Little endian crc table
4880f66f451Sopenharmony_ci  crc_init(dd->crctable, 1);
4890f66f451Sopenharmony_ci  dd->crcfunc = gzip_crc;
4900f66f451Sopenharmony_ci  dd->outfd = outfd;
4910f66f451Sopenharmony_ci
4920f66f451Sopenharmony_ci  do {
4930f66f451Sopenharmony_ci    if (!is_gzip(bb)) error_exit("not gzip");
4940f66f451Sopenharmony_ci
4950f66f451Sopenharmony_ci    inflate(dd, bb);
4960f66f451Sopenharmony_ci
4970f66f451Sopenharmony_ci    // tail: crc32, len32
4980f66f451Sopenharmony_ci    bitbuf_skip(bb, (8-bb->bitpos)&7);
4990f66f451Sopenharmony_ci    if (~dd->crc != bitbuf_get(bb, 32) || dd->len != bitbuf_get(bb, 32))
5000f66f451Sopenharmony_ci      error_exit("bad crc");
5010f66f451Sopenharmony_ci    rc += dd->len;
5020f66f451Sopenharmony_ci
5030f66f451Sopenharmony_ci    bitbuf_skip(bb, (8-bb->bitpos)&7);
5040f66f451Sopenharmony_ci    dd->pos = dd->len = 0;
5050f66f451Sopenharmony_ci  } while (bitbuf_skip(bb, 0));
5060f66f451Sopenharmony_ci  free(bb);
5070f66f451Sopenharmony_ci  free(dd);
5080f66f451Sopenharmony_ci
5090f66f451Sopenharmony_ci  return rc;
5100f66f451Sopenharmony_ci}
511