1275793eaSopenharmony_ci/* gznorm.c -- normalize a gzip stream
2275793eaSopenharmony_ci * Copyright (C) 2018 Mark Adler
3275793eaSopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h
4275793eaSopenharmony_ci * Version 1.0  7 Oct 2018  Mark Adler */
5275793eaSopenharmony_ci
6275793eaSopenharmony_ci// gznorm takes a gzip stream, potentially containing multiple members, and
7275793eaSopenharmony_ci// converts it to a gzip stream with a single member. In addition the gzip
8275793eaSopenharmony_ci// header is normalized, removing the file name and time stamp, and setting the
9275793eaSopenharmony_ci// other header contents (XFL, OS) to fixed values. gznorm does not recompress
10275793eaSopenharmony_ci// the data, so it is fast, but no advantage is gained from the history that
11275793eaSopenharmony_ci// could be available across member boundaries.
12275793eaSopenharmony_ci
13275793eaSopenharmony_ci#include <stdio.h>      // fread, fwrite, putc, fflush, ferror, fprintf,
14275793eaSopenharmony_ci                        // vsnprintf, stdout, stderr, NULL, FILE
15275793eaSopenharmony_ci#include <stdlib.h>     // malloc, free
16275793eaSopenharmony_ci#include <string.h>     // strerror
17275793eaSopenharmony_ci#include <errno.h>      // errno
18275793eaSopenharmony_ci#include <stdarg.h>     // va_list, va_start, va_end
19275793eaSopenharmony_ci#include "zlib.h"       // inflateInit2, inflate, inflateReset, inflateEnd,
20275793eaSopenharmony_ci                        // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK,
21275793eaSopenharmony_ci                        // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR,
22275793eaSopenharmony_ci                        // Z_MEM_ERROR
23275793eaSopenharmony_ci
24275793eaSopenharmony_ci#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25275793eaSopenharmony_ci#  include <fcntl.h>
26275793eaSopenharmony_ci#  include <io.h>
27275793eaSopenharmony_ci#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
28275793eaSopenharmony_ci#else
29275793eaSopenharmony_ci#  define SET_BINARY_MODE(file)
30275793eaSopenharmony_ci#endif
31275793eaSopenharmony_ci
32275793eaSopenharmony_ci#define local static
33275793eaSopenharmony_ci
34275793eaSopenharmony_ci// printf to an allocated string. Return the string, or NULL if the printf or
35275793eaSopenharmony_ci// allocation fails.
36275793eaSopenharmony_cilocal char *aprintf(char *fmt, ...) {
37275793eaSopenharmony_ci    // Get the length of the result of the printf.
38275793eaSopenharmony_ci    va_list args;
39275793eaSopenharmony_ci    va_start(args, fmt);
40275793eaSopenharmony_ci    int len = vsnprintf(NULL, 0, fmt, args);
41275793eaSopenharmony_ci    va_end(args);
42275793eaSopenharmony_ci    if (len < 0)
43275793eaSopenharmony_ci        return NULL;
44275793eaSopenharmony_ci
45275793eaSopenharmony_ci    // Allocate the required space and printf to it.
46275793eaSopenharmony_ci    char *str = malloc(len + 1);
47275793eaSopenharmony_ci    if (str == NULL)
48275793eaSopenharmony_ci        return NULL;
49275793eaSopenharmony_ci    va_start(args, fmt);
50275793eaSopenharmony_ci    vsnprintf(str, len + 1, fmt, args);
51275793eaSopenharmony_ci    va_end(args);
52275793eaSopenharmony_ci    return str;
53275793eaSopenharmony_ci}
54275793eaSopenharmony_ci
55275793eaSopenharmony_ci// Return with an error, putting an allocated error message in *err. Doing an
56275793eaSopenharmony_ci// inflateEnd() on an already ended state, or one with state set to Z_NULL, is
57275793eaSopenharmony_ci// permitted.
58275793eaSopenharmony_ci#define BYE(...) \
59275793eaSopenharmony_ci    do { \
60275793eaSopenharmony_ci        inflateEnd(&strm); \
61275793eaSopenharmony_ci        *err = aprintf(__VA_ARGS__); \
62275793eaSopenharmony_ci        return 1; \
63275793eaSopenharmony_ci    } while (0)
64275793eaSopenharmony_ci
65275793eaSopenharmony_ci// Chunk size for buffered reads and for decompression. Twice this many bytes
66275793eaSopenharmony_ci// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned.
67275793eaSopenharmony_ci#define CHUNK 16384
68275793eaSopenharmony_ci
69275793eaSopenharmony_ci// Read a gzip stream from in and write an equivalent normalized gzip stream to
70275793eaSopenharmony_ci// out. If given no input, an empty gzip stream will be written. If successful,
71275793eaSopenharmony_ci// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the
72275793eaSopenharmony_ci// details of the error are returned in *err, a pointer to an allocated string.
73275793eaSopenharmony_ci//
74275793eaSopenharmony_ci// The input may be a stream with multiple gzip members, which is converted to
75275793eaSopenharmony_ci// a single gzip member on the output. Each gzip member is decompressed at the
76275793eaSopenharmony_ci// level of deflate blocks. This enables clearing the last-block bit, shifting
77275793eaSopenharmony_ci// the compressed data to concatenate to the previous member's compressed data,
78275793eaSopenharmony_ci// which can end at an arbitrary bit boundary, and identifying stored blocks in
79275793eaSopenharmony_ci// order to resynchronize those to byte boundaries. The deflate compressed data
80275793eaSopenharmony_ci// is terminated with a 10-bit empty fixed block. If any members on the input
81275793eaSopenharmony_ci// end with a 10-bit empty fixed block, then that block is excised from the
82275793eaSopenharmony_ci// stream. This avoids appending empty fixed blocks for every normalization,
83275793eaSopenharmony_ci// and assures that gzip_normalize applied a second time will not change the
84275793eaSopenharmony_ci// input. The pad bits after stored block headers and after the final deflate
85275793eaSopenharmony_ci// block are all forced to zeros.
86275793eaSopenharmony_cilocal int gzip_normalize(FILE *in, FILE *out, char **err) {
87275793eaSopenharmony_ci    // initialize the inflate engine to process a gzip member
88275793eaSopenharmony_ci    z_stream strm;
89275793eaSopenharmony_ci    strm.zalloc = Z_NULL;
90275793eaSopenharmony_ci    strm.zfree = Z_NULL;
91275793eaSopenharmony_ci    strm.opaque = Z_NULL;
92275793eaSopenharmony_ci    strm.avail_in = 0;
93275793eaSopenharmony_ci    strm.next_in = Z_NULL;
94275793eaSopenharmony_ci    if (inflateInit2(&strm, 15 + 16) != Z_OK)
95275793eaSopenharmony_ci        BYE("out of memory");
96275793eaSopenharmony_ci
97275793eaSopenharmony_ci    // State while processing the input gzip stream.
98275793eaSopenharmony_ci    enum {              // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ...
99275793eaSopenharmony_ci        BETWEEN,        // between gzip members (must end in this state)
100275793eaSopenharmony_ci        HEAD,           // reading a gzip header
101275793eaSopenharmony_ci        BLOCK,          // reading deflate blocks
102275793eaSopenharmony_ci        TAIL            // reading a gzip trailer
103275793eaSopenharmony_ci    } state = BETWEEN;              // current component being processed
104275793eaSopenharmony_ci    unsigned long crc = 0;          // accumulated CRC of uncompressed data
105275793eaSopenharmony_ci    unsigned long len = 0;          // accumulated length of uncompressed data
106275793eaSopenharmony_ci    unsigned long buf = 0;          // deflate stream bit buffer of num bits
107275793eaSopenharmony_ci    int num = 0;                    // number of bits in buf (at bottom)
108275793eaSopenharmony_ci
109275793eaSopenharmony_ci    // Write a canonical gzip header (no mod time, file name, comment, extra
110275793eaSopenharmony_ci    // block, or extra flags, and OS is marked as unknown).
111275793eaSopenharmony_ci    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
112275793eaSopenharmony_ci
113275793eaSopenharmony_ci    // Process the gzip stream from in until reaching the end of the input,
114275793eaSopenharmony_ci    // encountering invalid input, or experiencing an i/o error.
115275793eaSopenharmony_ci    int more;                       // true if not at the end of the input
116275793eaSopenharmony_ci    do {
117275793eaSopenharmony_ci        // State inside this loop.
118275793eaSopenharmony_ci        unsigned char *put;         // next input buffer location to process
119275793eaSopenharmony_ci        int prev;                   // number of bits from previous block in
120275793eaSopenharmony_ci                                    // the bit buffer, or -1 if not at the
121275793eaSopenharmony_ci                                    // start of a block
122275793eaSopenharmony_ci        unsigned long long memb;    // uncompressed length of member
123275793eaSopenharmony_ci        size_t tail;                // number of trailer bytes read (0..8)
124275793eaSopenharmony_ci        unsigned long part;         // accumulated trailer component
125275793eaSopenharmony_ci
126275793eaSopenharmony_ci        // Get the next chunk of input from in.
127275793eaSopenharmony_ci        unsigned char dat[CHUNK];
128275793eaSopenharmony_ci        strm.avail_in = fread(dat, 1, CHUNK, in);
129275793eaSopenharmony_ci        if (strm.avail_in == 0)
130275793eaSopenharmony_ci            break;
131275793eaSopenharmony_ci        more = strm.avail_in == CHUNK;
132275793eaSopenharmony_ci        strm.next_in = put = dat;
133275793eaSopenharmony_ci
134275793eaSopenharmony_ci        // Run that chunk of input through the inflate engine to exhaustion.
135275793eaSopenharmony_ci        do {
136275793eaSopenharmony_ci            // At this point it is assured that strm.avail_in > 0.
137275793eaSopenharmony_ci
138275793eaSopenharmony_ci            // Inflate until the end of a gzip component (header, deflate
139275793eaSopenharmony_ci            // block, trailer) is reached, or until all of the chunk is
140275793eaSopenharmony_ci            // consumed. The resulting decompressed data is discarded, though
141275793eaSopenharmony_ci            // the total size of the decompressed data in each member is
142275793eaSopenharmony_ci            // tracked, for the calculation of the total CRC.
143275793eaSopenharmony_ci            do {
144275793eaSopenharmony_ci                // inflate and handle any errors
145275793eaSopenharmony_ci                unsigned char scrap[CHUNK];
146275793eaSopenharmony_ci                strm.avail_out = CHUNK;
147275793eaSopenharmony_ci                strm.next_out = scrap;
148275793eaSopenharmony_ci                int ret = inflate(&strm, Z_BLOCK);
149275793eaSopenharmony_ci                if (ret == Z_MEM_ERROR)
150275793eaSopenharmony_ci                    BYE("out of memory");
151275793eaSopenharmony_ci                if (ret == Z_DATA_ERROR)
152275793eaSopenharmony_ci                    BYE("input invalid: %s", strm.msg);
153275793eaSopenharmony_ci                if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END)
154275793eaSopenharmony_ci                    BYE("internal error");
155275793eaSopenharmony_ci
156275793eaSopenharmony_ci                // Update the number of uncompressed bytes generated in this
157275793eaSopenharmony_ci                // member. The actual count (not modulo 2^32) is required to
158275793eaSopenharmony_ci                // correctly compute the total CRC.
159275793eaSopenharmony_ci                unsigned got = CHUNK - strm.avail_out;
160275793eaSopenharmony_ci                memb += got;
161275793eaSopenharmony_ci                if (memb < got)
162275793eaSopenharmony_ci                    BYE("overflow error");
163275793eaSopenharmony_ci
164275793eaSopenharmony_ci                // Continue to process this chunk until it is consumed, or
165275793eaSopenharmony_ci                // until the end of a component (header, deflate block, or
166275793eaSopenharmony_ci                // trailer) is reached.
167275793eaSopenharmony_ci            } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0);
168275793eaSopenharmony_ci
169275793eaSopenharmony_ci            // Since strm.avail_in was > 0 for the inflate call, some input was
170275793eaSopenharmony_ci            // just consumed. It is therefore assured that put < strm.next_in.
171275793eaSopenharmony_ci
172275793eaSopenharmony_ci            // Disposition the consumed component or part of a component.
173275793eaSopenharmony_ci            switch (state) {
174275793eaSopenharmony_ci                case BETWEEN:
175275793eaSopenharmony_ci                    state = HEAD;
176275793eaSopenharmony_ci                    // Fall through to HEAD when some or all of the header is
177275793eaSopenharmony_ci                    // processed.
178275793eaSopenharmony_ci
179275793eaSopenharmony_ci                case HEAD:
180275793eaSopenharmony_ci                    // Discard the header.
181275793eaSopenharmony_ci                    if (strm.data_type & 0x80) {
182275793eaSopenharmony_ci                        // End of header reached -- deflate blocks follow.
183275793eaSopenharmony_ci                        put = strm.next_in;
184275793eaSopenharmony_ci                        prev = num;
185275793eaSopenharmony_ci                        memb = 0;
186275793eaSopenharmony_ci                        state = BLOCK;
187275793eaSopenharmony_ci                    }
188275793eaSopenharmony_ci                    break;
189275793eaSopenharmony_ci
190275793eaSopenharmony_ci                case BLOCK:
191275793eaSopenharmony_ci                    // Copy the deflate stream to the output, but with the
192275793eaSopenharmony_ci                    // last-block-bit cleared. Re-synchronize stored block
193275793eaSopenharmony_ci                    // headers to the output byte boundaries. The bytes at
194275793eaSopenharmony_ci                    // put..strm.next_in-1 is the compressed data that has been
195275793eaSopenharmony_ci                    // processed and is ready to be copied to the output.
196275793eaSopenharmony_ci
197275793eaSopenharmony_ci                    // At this point, it is assured that new compressed data is
198275793eaSopenharmony_ci                    // available, i.e., put < strm.next_in. If prev is -1, then
199275793eaSopenharmony_ci                    // that compressed data starts in the middle of a deflate
200275793eaSopenharmony_ci                    // block. If prev is not -1, then the bits in the bit
201275793eaSopenharmony_ci                    // buffer, possibly combined with the bits in *put, contain
202275793eaSopenharmony_ci                    // the three-bit header of the new deflate block. In that
203275793eaSopenharmony_ci                    // case, prev is the number of bits from the previous block
204275793eaSopenharmony_ci                    // that remain in the bit buffer. Since num is the number
205275793eaSopenharmony_ci                    // of bits in the bit buffer, we have that num - prev is
206275793eaSopenharmony_ci                    // the number of bits from the new block currently in the
207275793eaSopenharmony_ci                    // bit buffer.
208275793eaSopenharmony_ci
209275793eaSopenharmony_ci                    // If strm.data_type & 0xc0 is 0x80, then the last byte of
210275793eaSopenharmony_ci                    // the available compressed data includes the last bits of
211275793eaSopenharmony_ci                    // the end of a deflate block. In that case, that last byte
212275793eaSopenharmony_ci                    // also has strm.data_type & 0x1f bits of the next deflate
213275793eaSopenharmony_ci                    // block, in the range 0..7. If strm.data_type & 0xc0 is
214275793eaSopenharmony_ci                    // 0xc0, then the last byte of the compressed data is the
215275793eaSopenharmony_ci                    // end of the deflate stream, followed by strm.data_type &
216275793eaSopenharmony_ci                    // 0x1f pad bits, also in the range 0..7.
217275793eaSopenharmony_ci
218275793eaSopenharmony_ci                    // Set bits to the number of bits not yet consumed from the
219275793eaSopenharmony_ci                    // last byte. If we are at the end of the block, bits is
220275793eaSopenharmony_ci                    // either the number of bits in the last byte belonging to
221275793eaSopenharmony_ci                    // the next block, or the number of pad bits after the
222275793eaSopenharmony_ci                    // final block. In either of those cases, bits is in the
223275793eaSopenharmony_ci                    // range 0..7.
224275793eaSopenharmony_ci                    ;                   // (required due to C syntax oddity)
225275793eaSopenharmony_ci                    int bits = strm.data_type & 0x1f;
226275793eaSopenharmony_ci
227275793eaSopenharmony_ci                    if (prev != -1) {
228275793eaSopenharmony_ci                        // We are at the start of a new block. Clear the last
229275793eaSopenharmony_ci                        // block bit, and check for special cases. If it is a
230275793eaSopenharmony_ci                        // stored block, then emit the header and pad to the
231275793eaSopenharmony_ci                        // next byte boundary. If it is a final, empty fixed
232275793eaSopenharmony_ci                        // block, then excise it.
233275793eaSopenharmony_ci
234275793eaSopenharmony_ci                        // Some or all of the three header bits for this block
235275793eaSopenharmony_ci                        // may already be in the bit buffer. Load any remaining
236275793eaSopenharmony_ci                        // header bits into the bit buffer.
237275793eaSopenharmony_ci                        if (num - prev < 3) {
238275793eaSopenharmony_ci                            buf += (unsigned long)*put++ << num;
239275793eaSopenharmony_ci                            num += 8;
240275793eaSopenharmony_ci                        }
241275793eaSopenharmony_ci
242275793eaSopenharmony_ci                        // Set last to have a 1 in the position of the last
243275793eaSopenharmony_ci                        // block bit in the bit buffer.
244275793eaSopenharmony_ci                        unsigned long last = (unsigned long)1 << prev;
245275793eaSopenharmony_ci
246275793eaSopenharmony_ci                        if (((buf >> prev) & 7) == 3) {
247275793eaSopenharmony_ci                            // This is a final fixed block. Load at least ten
248275793eaSopenharmony_ci                            // bits from this block, including the header, into
249275793eaSopenharmony_ci                            // the bit buffer. We already have at least three,
250275793eaSopenharmony_ci                            // so at most one more byte needs to be loaded.
251275793eaSopenharmony_ci                            if (num - prev < 10) {
252275793eaSopenharmony_ci                                if (put == strm.next_in)
253275793eaSopenharmony_ci                                    // Need to go get and process more input.
254275793eaSopenharmony_ci                                    // We'll end up back here to finish this.
255275793eaSopenharmony_ci                                    break;
256275793eaSopenharmony_ci                                buf += (unsigned long)*put++ << num;
257275793eaSopenharmony_ci                                num += 8;
258275793eaSopenharmony_ci                            }
259275793eaSopenharmony_ci                            if (((buf >> prev) & 0x3ff) == 3) {
260275793eaSopenharmony_ci                                // That final fixed block is empty. Delete it
261275793eaSopenharmony_ci                                // to avoid adding an empty block every time a
262275793eaSopenharmony_ci                                // gzip stream is normalized.
263275793eaSopenharmony_ci                                num = prev;
264275793eaSopenharmony_ci                                buf &= last - 1;    // zero the pad bits
265275793eaSopenharmony_ci                            }
266275793eaSopenharmony_ci                        }
267275793eaSopenharmony_ci                        else if (((buf >> prev) & 6) == 0) {
268275793eaSopenharmony_ci                            // This is a stored block. Flush to the next
269275793eaSopenharmony_ci                            // byte boundary after the three-bit header.
270275793eaSopenharmony_ci                            num = (prev + 10) & ~7;
271275793eaSopenharmony_ci                            buf &= last - 1;        // zero the pad bits
272275793eaSopenharmony_ci                        }
273275793eaSopenharmony_ci
274275793eaSopenharmony_ci                        // Clear the last block bit.
275275793eaSopenharmony_ci                        buf &= ~last;
276275793eaSopenharmony_ci
277275793eaSopenharmony_ci                        // Write out complete bytes in the bit buffer.
278275793eaSopenharmony_ci                        while (num >= 8) {
279275793eaSopenharmony_ci                            putc(buf, out);
280275793eaSopenharmony_ci                            buf >>= 8;
281275793eaSopenharmony_ci                            num -= 8;
282275793eaSopenharmony_ci                        }
283275793eaSopenharmony_ci
284275793eaSopenharmony_ci                        // If no more bytes left to process, then we have
285275793eaSopenharmony_ci                        // consumed the byte that had bits from the next block.
286275793eaSopenharmony_ci                        if (put == strm.next_in)
287275793eaSopenharmony_ci                            bits = 0;
288275793eaSopenharmony_ci                    }
289275793eaSopenharmony_ci
290275793eaSopenharmony_ci                    // We are done handling the deflate block header. Now copy
291275793eaSopenharmony_ci                    // all or almost all of the remaining compressed data that
292275793eaSopenharmony_ci                    // has been processed so far. Don't copy one byte at the
293275793eaSopenharmony_ci                    // end if it contains bits from the next deflate block or
294275793eaSopenharmony_ci                    // pad bits at the end of a deflate block.
295275793eaSopenharmony_ci
296275793eaSopenharmony_ci                    // mix is 1 if we are at the end of a deflate block, and if
297275793eaSopenharmony_ci                    // some of the bits in the last byte follow this block. mix
298275793eaSopenharmony_ci                    // is 0 if we are in the middle of a deflate block, if the
299275793eaSopenharmony_ci                    // deflate block ended on a byte boundary, or if all of the
300275793eaSopenharmony_ci                    // compressed data processed so far has been consumed.
301275793eaSopenharmony_ci                    int mix = (strm.data_type & 0x80) && bits;
302275793eaSopenharmony_ci
303275793eaSopenharmony_ci                    // Copy all of the processed compressed data to the output,
304275793eaSopenharmony_ci                    // except for the last byte if it contains bits from the
305275793eaSopenharmony_ci                    // next deflate block or pad bits at the end of the deflate
306275793eaSopenharmony_ci                    // stream. Copy the data after shifting in num bits from
307275793eaSopenharmony_ci                    // buf in front of it, leaving num bits from the end of the
308275793eaSopenharmony_ci                    // compressed data in buf when done.
309275793eaSopenharmony_ci                    unsigned char *end = strm.next_in - mix;
310275793eaSopenharmony_ci                    if (put < end) {
311275793eaSopenharmony_ci                        if (num)
312275793eaSopenharmony_ci                            // Insert num bits from buf before the data being
313275793eaSopenharmony_ci                            // copied.
314275793eaSopenharmony_ci                            do {
315275793eaSopenharmony_ci                                buf += (unsigned)(*put++) << num;
316275793eaSopenharmony_ci                                putc(buf, out);
317275793eaSopenharmony_ci                                buf >>= 8;
318275793eaSopenharmony_ci                            } while (put < end);
319275793eaSopenharmony_ci                        else {
320275793eaSopenharmony_ci                            // No shifting needed -- write directly.
321275793eaSopenharmony_ci                            fwrite(put, 1, end - put, out);
322275793eaSopenharmony_ci                            put = end;
323275793eaSopenharmony_ci                        }
324275793eaSopenharmony_ci                    }
325275793eaSopenharmony_ci
326275793eaSopenharmony_ci                    // Process the last processed byte if it wasn't written.
327275793eaSopenharmony_ci                    if (mix) {
328275793eaSopenharmony_ci                        // Load the last byte into the bit buffer.
329275793eaSopenharmony_ci                        buf += (unsigned)(*put++) << num;
330275793eaSopenharmony_ci                        num += 8;
331275793eaSopenharmony_ci
332275793eaSopenharmony_ci                        if (strm.data_type & 0x40) {
333275793eaSopenharmony_ci                            // We are at the end of the deflate stream and
334275793eaSopenharmony_ci                            // there are bits pad bits. Discard the pad bits
335275793eaSopenharmony_ci                            // and write a byte to the output, if available.
336275793eaSopenharmony_ci                            // Leave the num bits left over in buf to prepend
337275793eaSopenharmony_ci                            // to the next deflate stream.
338275793eaSopenharmony_ci                            num -= bits;
339275793eaSopenharmony_ci                            if (num >= 8) {
340275793eaSopenharmony_ci                                putc(buf, out);
341275793eaSopenharmony_ci                                num -= 8;
342275793eaSopenharmony_ci                                buf >>= 8;
343275793eaSopenharmony_ci                            }
344275793eaSopenharmony_ci
345275793eaSopenharmony_ci                            // Force the pad bits in the bit buffer to zeros.
346275793eaSopenharmony_ci                            buf &= ((unsigned long)1 << num) - 1;
347275793eaSopenharmony_ci
348275793eaSopenharmony_ci                            // Don't need to set prev here since going to TAIL.
349275793eaSopenharmony_ci                        }
350275793eaSopenharmony_ci                        else
351275793eaSopenharmony_ci                            // At the end of an internal deflate block. Leave
352275793eaSopenharmony_ci                            // the last byte in the bit buffer to examine on
353275793eaSopenharmony_ci                            // the next entry to BLOCK, when more bits from the
354275793eaSopenharmony_ci                            // next block will be available.
355275793eaSopenharmony_ci                            prev = num - bits;      // number of bits in buffer
356275793eaSopenharmony_ci                                                    // from current block
357275793eaSopenharmony_ci                    }
358275793eaSopenharmony_ci
359275793eaSopenharmony_ci                    // Don't have a byte left over, so we are in the middle of
360275793eaSopenharmony_ci                    // a deflate block, or the deflate block ended on a byte
361275793eaSopenharmony_ci                    // boundary. Set prev appropriately for the next entry into
362275793eaSopenharmony_ci                    // BLOCK.
363275793eaSopenharmony_ci                    else if (strm.data_type & 0x80)
364275793eaSopenharmony_ci                        // The block ended on a byte boundary, so no header
365275793eaSopenharmony_ci                        // bits are in the bit buffer.
366275793eaSopenharmony_ci                        prev = num;
367275793eaSopenharmony_ci                    else
368275793eaSopenharmony_ci                        // In the middle of a deflate block, so no header here.
369275793eaSopenharmony_ci                        prev = -1;
370275793eaSopenharmony_ci
371275793eaSopenharmony_ci                    // Check for the end of the deflate stream.
372275793eaSopenharmony_ci                    if ((strm.data_type & 0xc0) == 0xc0) {
373275793eaSopenharmony_ci                        // That ends the deflate stream on the input side, the
374275793eaSopenharmony_ci                        // pad bits were discarded, and any remaining bits from
375275793eaSopenharmony_ci                        // the last block in the stream are saved in the bit
376275793eaSopenharmony_ci                        // buffer to prepend to the next stream. Process the
377275793eaSopenharmony_ci                        // gzip trailer next.
378275793eaSopenharmony_ci                        tail = 0;
379275793eaSopenharmony_ci                        part = 0;
380275793eaSopenharmony_ci                        state = TAIL;
381275793eaSopenharmony_ci                    }
382275793eaSopenharmony_ci                    break;
383275793eaSopenharmony_ci
384275793eaSopenharmony_ci                case TAIL:
385275793eaSopenharmony_ci                    // Accumulate available trailer bytes to update the total
386275793eaSopenharmony_ci                    // CRC and the total uncompressed length.
387275793eaSopenharmony_ci                    do {
388275793eaSopenharmony_ci                        part = (part >> 8) + ((unsigned long)(*put++) << 24);
389275793eaSopenharmony_ci                        tail++;
390275793eaSopenharmony_ci                        if (tail == 4) {
391275793eaSopenharmony_ci                            // Update the total CRC.
392275793eaSopenharmony_ci                            z_off_t len2 = memb;
393275793eaSopenharmony_ci                            if (len2 < 0 || (unsigned long long)len2 != memb)
394275793eaSopenharmony_ci                                BYE("overflow error");
395275793eaSopenharmony_ci                            crc = crc ? crc32_combine(crc, part, len2) : part;
396275793eaSopenharmony_ci                            part = 0;
397275793eaSopenharmony_ci                        }
398275793eaSopenharmony_ci                        else if (tail == 8) {
399275793eaSopenharmony_ci                            // Update the total uncompressed length. (It's ok
400275793eaSopenharmony_ci                            // if this sum is done modulo 2^32.)
401275793eaSopenharmony_ci                            len += part;
402275793eaSopenharmony_ci
403275793eaSopenharmony_ci                            // At the end of a member. Set up to inflate an
404275793eaSopenharmony_ci                            // immediately following gzip member. (If we made
405275793eaSopenharmony_ci                            // it this far, then the trailer was valid.)
406275793eaSopenharmony_ci                            if (inflateReset(&strm) != Z_OK)
407275793eaSopenharmony_ci                                BYE("internal error");
408275793eaSopenharmony_ci                            state = BETWEEN;
409275793eaSopenharmony_ci                            break;
410275793eaSopenharmony_ci                        }
411275793eaSopenharmony_ci                    } while (put < strm.next_in);
412275793eaSopenharmony_ci                    break;
413275793eaSopenharmony_ci            }
414275793eaSopenharmony_ci
415275793eaSopenharmony_ci            // Process the input buffer until completely consumed.
416275793eaSopenharmony_ci        } while (strm.avail_in > 0);
417275793eaSopenharmony_ci
418275793eaSopenharmony_ci        // Process input until end of file, invalid input, or i/o error.
419275793eaSopenharmony_ci    } while (more);
420275793eaSopenharmony_ci
421275793eaSopenharmony_ci    // Done with the inflate engine.
422275793eaSopenharmony_ci    inflateEnd(&strm);
423275793eaSopenharmony_ci
424275793eaSopenharmony_ci    // Verify the validity of the input.
425275793eaSopenharmony_ci    if (state != BETWEEN)
426275793eaSopenharmony_ci        BYE("input invalid: incomplete gzip stream");
427275793eaSopenharmony_ci
428275793eaSopenharmony_ci    // Write the remaining deflate stream bits, followed by a terminating
429275793eaSopenharmony_ci    // deflate fixed block.
430275793eaSopenharmony_ci    buf += (unsigned long)3 << num;
431275793eaSopenharmony_ci    putc(buf, out);
432275793eaSopenharmony_ci    putc(buf >> 8, out);
433275793eaSopenharmony_ci    if (num > 6)
434275793eaSopenharmony_ci        putc(0, out);
435275793eaSopenharmony_ci
436275793eaSopenharmony_ci    // Write the gzip trailer, which is the CRC and the uncompressed length
437275793eaSopenharmony_ci    // modulo 2^32, both in little-endian order.
438275793eaSopenharmony_ci    putc(crc, out);
439275793eaSopenharmony_ci    putc(crc >> 8, out);
440275793eaSopenharmony_ci    putc(crc >> 16, out);
441275793eaSopenharmony_ci    putc(crc >> 24, out);
442275793eaSopenharmony_ci    putc(len, out);
443275793eaSopenharmony_ci    putc(len >> 8, out);
444275793eaSopenharmony_ci    putc(len >> 16, out);
445275793eaSopenharmony_ci    putc(len >> 24, out);
446275793eaSopenharmony_ci    fflush(out);
447275793eaSopenharmony_ci
448275793eaSopenharmony_ci    // Check for any i/o errors.
449275793eaSopenharmony_ci    if (ferror(in) || ferror(out))
450275793eaSopenharmony_ci        BYE("i/o error: %s", strerror(errno));
451275793eaSopenharmony_ci
452275793eaSopenharmony_ci    // All good!
453275793eaSopenharmony_ci    *err = NULL;
454275793eaSopenharmony_ci    return 0;
455275793eaSopenharmony_ci}
456275793eaSopenharmony_ci
457275793eaSopenharmony_ci// Normalize the gzip stream on stdin, writing the result to stdout.
458275793eaSopenharmony_ciint main(void) {
459275793eaSopenharmony_ci    // Avoid end-of-line conversions on evil operating systems.
460275793eaSopenharmony_ci    SET_BINARY_MODE(stdin);
461275793eaSopenharmony_ci    SET_BINARY_MODE(stdout);
462275793eaSopenharmony_ci
463275793eaSopenharmony_ci    // Normalize from stdin to stdout, returning 1 on error, 0 if ok.
464275793eaSopenharmony_ci    char *err;
465275793eaSopenharmony_ci    int ret = gzip_normalize(stdin, stdout, &err);
466275793eaSopenharmony_ci    if (ret)
467275793eaSopenharmony_ci        fprintf(stderr, "gznorm error: %s\n", err);
468275793eaSopenharmony_ci    free(err);
469275793eaSopenharmony_ci    return ret;
470275793eaSopenharmony_ci}
471