1275793eaSopenharmony_ci/* gznorm.c -- normalize a gzip stream 2275793eaSopenharmony_ci * Copyright (C) 2018 Mark Adler 3275793eaSopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h 4275793eaSopenharmony_ci * Version 1.0 7 Oct 2018 Mark Adler */ 5275793eaSopenharmony_ci 6275793eaSopenharmony_ci// gznorm takes a gzip stream, potentially containing multiple members, and 7275793eaSopenharmony_ci// converts it to a gzip stream with a single member. In addition the gzip 8275793eaSopenharmony_ci// header is normalized, removing the file name and time stamp, and setting the 9275793eaSopenharmony_ci// other header contents (XFL, OS) to fixed values. gznorm does not recompress 10275793eaSopenharmony_ci// the data, so it is fast, but no advantage is gained from the history that 11275793eaSopenharmony_ci// could be available across member boundaries. 12275793eaSopenharmony_ci 13275793eaSopenharmony_ci#include <stdio.h> // fread, fwrite, putc, fflush, ferror, fprintf, 14275793eaSopenharmony_ci // vsnprintf, stdout, stderr, NULL, FILE 15275793eaSopenharmony_ci#include <stdlib.h> // malloc, free 16275793eaSopenharmony_ci#include <string.h> // strerror 17275793eaSopenharmony_ci#include <errno.h> // errno 18275793eaSopenharmony_ci#include <stdarg.h> // va_list, va_start, va_end 19275793eaSopenharmony_ci#include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd, 20275793eaSopenharmony_ci // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK, 21275793eaSopenharmony_ci // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR, 22275793eaSopenharmony_ci // Z_MEM_ERROR 23275793eaSopenharmony_ci 24275793eaSopenharmony_ci#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) 25275793eaSopenharmony_ci# include <fcntl.h> 26275793eaSopenharmony_ci# include <io.h> 27275793eaSopenharmony_ci# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) 28275793eaSopenharmony_ci#else 29275793eaSopenharmony_ci# define SET_BINARY_MODE(file) 30275793eaSopenharmony_ci#endif 31275793eaSopenharmony_ci 32275793eaSopenharmony_ci#define local static 33275793eaSopenharmony_ci 34275793eaSopenharmony_ci// printf to an allocated string. Return the string, or NULL if the printf or 35275793eaSopenharmony_ci// allocation fails. 36275793eaSopenharmony_cilocal char *aprintf(char *fmt, ...) { 37275793eaSopenharmony_ci // Get the length of the result of the printf. 38275793eaSopenharmony_ci va_list args; 39275793eaSopenharmony_ci va_start(args, fmt); 40275793eaSopenharmony_ci int len = vsnprintf(NULL, 0, fmt, args); 41275793eaSopenharmony_ci va_end(args); 42275793eaSopenharmony_ci if (len < 0) 43275793eaSopenharmony_ci return NULL; 44275793eaSopenharmony_ci 45275793eaSopenharmony_ci // Allocate the required space and printf to it. 46275793eaSopenharmony_ci char *str = malloc(len + 1); 47275793eaSopenharmony_ci if (str == NULL) 48275793eaSopenharmony_ci return NULL; 49275793eaSopenharmony_ci va_start(args, fmt); 50275793eaSopenharmony_ci vsnprintf(str, len + 1, fmt, args); 51275793eaSopenharmony_ci va_end(args); 52275793eaSopenharmony_ci return str; 53275793eaSopenharmony_ci} 54275793eaSopenharmony_ci 55275793eaSopenharmony_ci// Return with an error, putting an allocated error message in *err. Doing an 56275793eaSopenharmony_ci// inflateEnd() on an already ended state, or one with state set to Z_NULL, is 57275793eaSopenharmony_ci// permitted. 58275793eaSopenharmony_ci#define BYE(...) \ 59275793eaSopenharmony_ci do { \ 60275793eaSopenharmony_ci inflateEnd(&strm); \ 61275793eaSopenharmony_ci *err = aprintf(__VA_ARGS__); \ 62275793eaSopenharmony_ci return 1; \ 63275793eaSopenharmony_ci } while (0) 64275793eaSopenharmony_ci 65275793eaSopenharmony_ci// Chunk size for buffered reads and for decompression. Twice this many bytes 66275793eaSopenharmony_ci// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned. 67275793eaSopenharmony_ci#define CHUNK 16384 68275793eaSopenharmony_ci 69275793eaSopenharmony_ci// Read a gzip stream from in and write an equivalent normalized gzip stream to 70275793eaSopenharmony_ci// out. If given no input, an empty gzip stream will be written. If successful, 71275793eaSopenharmony_ci// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the 72275793eaSopenharmony_ci// details of the error are returned in *err, a pointer to an allocated string. 73275793eaSopenharmony_ci// 74275793eaSopenharmony_ci// The input may be a stream with multiple gzip members, which is converted to 75275793eaSopenharmony_ci// a single gzip member on the output. Each gzip member is decompressed at the 76275793eaSopenharmony_ci// level of deflate blocks. This enables clearing the last-block bit, shifting 77275793eaSopenharmony_ci// the compressed data to concatenate to the previous member's compressed data, 78275793eaSopenharmony_ci// which can end at an arbitrary bit boundary, and identifying stored blocks in 79275793eaSopenharmony_ci// order to resynchronize those to byte boundaries. The deflate compressed data 80275793eaSopenharmony_ci// is terminated with a 10-bit empty fixed block. If any members on the input 81275793eaSopenharmony_ci// end with a 10-bit empty fixed block, then that block is excised from the 82275793eaSopenharmony_ci// stream. This avoids appending empty fixed blocks for every normalization, 83275793eaSopenharmony_ci// and assures that gzip_normalize applied a second time will not change the 84275793eaSopenharmony_ci// input. The pad bits after stored block headers and after the final deflate 85275793eaSopenharmony_ci// block are all forced to zeros. 86275793eaSopenharmony_cilocal int gzip_normalize(FILE *in, FILE *out, char **err) { 87275793eaSopenharmony_ci // initialize the inflate engine to process a gzip member 88275793eaSopenharmony_ci z_stream strm; 89275793eaSopenharmony_ci strm.zalloc = Z_NULL; 90275793eaSopenharmony_ci strm.zfree = Z_NULL; 91275793eaSopenharmony_ci strm.opaque = Z_NULL; 92275793eaSopenharmony_ci strm.avail_in = 0; 93275793eaSopenharmony_ci strm.next_in = Z_NULL; 94275793eaSopenharmony_ci if (inflateInit2(&strm, 15 + 16) != Z_OK) 95275793eaSopenharmony_ci BYE("out of memory"); 96275793eaSopenharmony_ci 97275793eaSopenharmony_ci // State while processing the input gzip stream. 98275793eaSopenharmony_ci enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ... 99275793eaSopenharmony_ci BETWEEN, // between gzip members (must end in this state) 100275793eaSopenharmony_ci HEAD, // reading a gzip header 101275793eaSopenharmony_ci BLOCK, // reading deflate blocks 102275793eaSopenharmony_ci TAIL // reading a gzip trailer 103275793eaSopenharmony_ci } state = BETWEEN; // current component being processed 104275793eaSopenharmony_ci unsigned long crc = 0; // accumulated CRC of uncompressed data 105275793eaSopenharmony_ci unsigned long len = 0; // accumulated length of uncompressed data 106275793eaSopenharmony_ci unsigned long buf = 0; // deflate stream bit buffer of num bits 107275793eaSopenharmony_ci int num = 0; // number of bits in buf (at bottom) 108275793eaSopenharmony_ci 109275793eaSopenharmony_ci // Write a canonical gzip header (no mod time, file name, comment, extra 110275793eaSopenharmony_ci // block, or extra flags, and OS is marked as unknown). 111275793eaSopenharmony_ci fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); 112275793eaSopenharmony_ci 113275793eaSopenharmony_ci // Process the gzip stream from in until reaching the end of the input, 114275793eaSopenharmony_ci // encountering invalid input, or experiencing an i/o error. 115275793eaSopenharmony_ci int more; // true if not at the end of the input 116275793eaSopenharmony_ci do { 117275793eaSopenharmony_ci // State inside this loop. 118275793eaSopenharmony_ci unsigned char *put; // next input buffer location to process 119275793eaSopenharmony_ci int prev; // number of bits from previous block in 120275793eaSopenharmony_ci // the bit buffer, or -1 if not at the 121275793eaSopenharmony_ci // start of a block 122275793eaSopenharmony_ci unsigned long long memb; // uncompressed length of member 123275793eaSopenharmony_ci size_t tail; // number of trailer bytes read (0..8) 124275793eaSopenharmony_ci unsigned long part; // accumulated trailer component 125275793eaSopenharmony_ci 126275793eaSopenharmony_ci // Get the next chunk of input from in. 127275793eaSopenharmony_ci unsigned char dat[CHUNK]; 128275793eaSopenharmony_ci strm.avail_in = fread(dat, 1, CHUNK, in); 129275793eaSopenharmony_ci if (strm.avail_in == 0) 130275793eaSopenharmony_ci break; 131275793eaSopenharmony_ci more = strm.avail_in == CHUNK; 132275793eaSopenharmony_ci strm.next_in = put = dat; 133275793eaSopenharmony_ci 134275793eaSopenharmony_ci // Run that chunk of input through the inflate engine to exhaustion. 135275793eaSopenharmony_ci do { 136275793eaSopenharmony_ci // At this point it is assured that strm.avail_in > 0. 137275793eaSopenharmony_ci 138275793eaSopenharmony_ci // Inflate until the end of a gzip component (header, deflate 139275793eaSopenharmony_ci // block, trailer) is reached, or until all of the chunk is 140275793eaSopenharmony_ci // consumed. The resulting decompressed data is discarded, though 141275793eaSopenharmony_ci // the total size of the decompressed data in each member is 142275793eaSopenharmony_ci // tracked, for the calculation of the total CRC. 143275793eaSopenharmony_ci do { 144275793eaSopenharmony_ci // inflate and handle any errors 145275793eaSopenharmony_ci unsigned char scrap[CHUNK]; 146275793eaSopenharmony_ci strm.avail_out = CHUNK; 147275793eaSopenharmony_ci strm.next_out = scrap; 148275793eaSopenharmony_ci int ret = inflate(&strm, Z_BLOCK); 149275793eaSopenharmony_ci if (ret == Z_MEM_ERROR) 150275793eaSopenharmony_ci BYE("out of memory"); 151275793eaSopenharmony_ci if (ret == Z_DATA_ERROR) 152275793eaSopenharmony_ci BYE("input invalid: %s", strm.msg); 153275793eaSopenharmony_ci if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END) 154275793eaSopenharmony_ci BYE("internal error"); 155275793eaSopenharmony_ci 156275793eaSopenharmony_ci // Update the number of uncompressed bytes generated in this 157275793eaSopenharmony_ci // member. The actual count (not modulo 2^32) is required to 158275793eaSopenharmony_ci // correctly compute the total CRC. 159275793eaSopenharmony_ci unsigned got = CHUNK - strm.avail_out; 160275793eaSopenharmony_ci memb += got; 161275793eaSopenharmony_ci if (memb < got) 162275793eaSopenharmony_ci BYE("overflow error"); 163275793eaSopenharmony_ci 164275793eaSopenharmony_ci // Continue to process this chunk until it is consumed, or 165275793eaSopenharmony_ci // until the end of a component (header, deflate block, or 166275793eaSopenharmony_ci // trailer) is reached. 167275793eaSopenharmony_ci } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0); 168275793eaSopenharmony_ci 169275793eaSopenharmony_ci // Since strm.avail_in was > 0 for the inflate call, some input was 170275793eaSopenharmony_ci // just consumed. It is therefore assured that put < strm.next_in. 171275793eaSopenharmony_ci 172275793eaSopenharmony_ci // Disposition the consumed component or part of a component. 173275793eaSopenharmony_ci switch (state) { 174275793eaSopenharmony_ci case BETWEEN: 175275793eaSopenharmony_ci state = HEAD; 176275793eaSopenharmony_ci // Fall through to HEAD when some or all of the header is 177275793eaSopenharmony_ci // processed. 178275793eaSopenharmony_ci 179275793eaSopenharmony_ci case HEAD: 180275793eaSopenharmony_ci // Discard the header. 181275793eaSopenharmony_ci if (strm.data_type & 0x80) { 182275793eaSopenharmony_ci // End of header reached -- deflate blocks follow. 183275793eaSopenharmony_ci put = strm.next_in; 184275793eaSopenharmony_ci prev = num; 185275793eaSopenharmony_ci memb = 0; 186275793eaSopenharmony_ci state = BLOCK; 187275793eaSopenharmony_ci } 188275793eaSopenharmony_ci break; 189275793eaSopenharmony_ci 190275793eaSopenharmony_ci case BLOCK: 191275793eaSopenharmony_ci // Copy the deflate stream to the output, but with the 192275793eaSopenharmony_ci // last-block-bit cleared. Re-synchronize stored block 193275793eaSopenharmony_ci // headers to the output byte boundaries. The bytes at 194275793eaSopenharmony_ci // put..strm.next_in-1 is the compressed data that has been 195275793eaSopenharmony_ci // processed and is ready to be copied to the output. 196275793eaSopenharmony_ci 197275793eaSopenharmony_ci // At this point, it is assured that new compressed data is 198275793eaSopenharmony_ci // available, i.e., put < strm.next_in. If prev is -1, then 199275793eaSopenharmony_ci // that compressed data starts in the middle of a deflate 200275793eaSopenharmony_ci // block. If prev is not -1, then the bits in the bit 201275793eaSopenharmony_ci // buffer, possibly combined with the bits in *put, contain 202275793eaSopenharmony_ci // the three-bit header of the new deflate block. In that 203275793eaSopenharmony_ci // case, prev is the number of bits from the previous block 204275793eaSopenharmony_ci // that remain in the bit buffer. Since num is the number 205275793eaSopenharmony_ci // of bits in the bit buffer, we have that num - prev is 206275793eaSopenharmony_ci // the number of bits from the new block currently in the 207275793eaSopenharmony_ci // bit buffer. 208275793eaSopenharmony_ci 209275793eaSopenharmony_ci // If strm.data_type & 0xc0 is 0x80, then the last byte of 210275793eaSopenharmony_ci // the available compressed data includes the last bits of 211275793eaSopenharmony_ci // the end of a deflate block. In that case, that last byte 212275793eaSopenharmony_ci // also has strm.data_type & 0x1f bits of the next deflate 213275793eaSopenharmony_ci // block, in the range 0..7. If strm.data_type & 0xc0 is 214275793eaSopenharmony_ci // 0xc0, then the last byte of the compressed data is the 215275793eaSopenharmony_ci // end of the deflate stream, followed by strm.data_type & 216275793eaSopenharmony_ci // 0x1f pad bits, also in the range 0..7. 217275793eaSopenharmony_ci 218275793eaSopenharmony_ci // Set bits to the number of bits not yet consumed from the 219275793eaSopenharmony_ci // last byte. If we are at the end of the block, bits is 220275793eaSopenharmony_ci // either the number of bits in the last byte belonging to 221275793eaSopenharmony_ci // the next block, or the number of pad bits after the 222275793eaSopenharmony_ci // final block. In either of those cases, bits is in the 223275793eaSopenharmony_ci // range 0..7. 224275793eaSopenharmony_ci ; // (required due to C syntax oddity) 225275793eaSopenharmony_ci int bits = strm.data_type & 0x1f; 226275793eaSopenharmony_ci 227275793eaSopenharmony_ci if (prev != -1) { 228275793eaSopenharmony_ci // We are at the start of a new block. Clear the last 229275793eaSopenharmony_ci // block bit, and check for special cases. If it is a 230275793eaSopenharmony_ci // stored block, then emit the header and pad to the 231275793eaSopenharmony_ci // next byte boundary. If it is a final, empty fixed 232275793eaSopenharmony_ci // block, then excise it. 233275793eaSopenharmony_ci 234275793eaSopenharmony_ci // Some or all of the three header bits for this block 235275793eaSopenharmony_ci // may already be in the bit buffer. Load any remaining 236275793eaSopenharmony_ci // header bits into the bit buffer. 237275793eaSopenharmony_ci if (num - prev < 3) { 238275793eaSopenharmony_ci buf += (unsigned long)*put++ << num; 239275793eaSopenharmony_ci num += 8; 240275793eaSopenharmony_ci } 241275793eaSopenharmony_ci 242275793eaSopenharmony_ci // Set last to have a 1 in the position of the last 243275793eaSopenharmony_ci // block bit in the bit buffer. 244275793eaSopenharmony_ci unsigned long last = (unsigned long)1 << prev; 245275793eaSopenharmony_ci 246275793eaSopenharmony_ci if (((buf >> prev) & 7) == 3) { 247275793eaSopenharmony_ci // This is a final fixed block. Load at least ten 248275793eaSopenharmony_ci // bits from this block, including the header, into 249275793eaSopenharmony_ci // the bit buffer. We already have at least three, 250275793eaSopenharmony_ci // so at most one more byte needs to be loaded. 251275793eaSopenharmony_ci if (num - prev < 10) { 252275793eaSopenharmony_ci if (put == strm.next_in) 253275793eaSopenharmony_ci // Need to go get and process more input. 254275793eaSopenharmony_ci // We'll end up back here to finish this. 255275793eaSopenharmony_ci break; 256275793eaSopenharmony_ci buf += (unsigned long)*put++ << num; 257275793eaSopenharmony_ci num += 8; 258275793eaSopenharmony_ci } 259275793eaSopenharmony_ci if (((buf >> prev) & 0x3ff) == 3) { 260275793eaSopenharmony_ci // That final fixed block is empty. Delete it 261275793eaSopenharmony_ci // to avoid adding an empty block every time a 262275793eaSopenharmony_ci // gzip stream is normalized. 263275793eaSopenharmony_ci num = prev; 264275793eaSopenharmony_ci buf &= last - 1; // zero the pad bits 265275793eaSopenharmony_ci } 266275793eaSopenharmony_ci } 267275793eaSopenharmony_ci else if (((buf >> prev) & 6) == 0) { 268275793eaSopenharmony_ci // This is a stored block. Flush to the next 269275793eaSopenharmony_ci // byte boundary after the three-bit header. 270275793eaSopenharmony_ci num = (prev + 10) & ~7; 271275793eaSopenharmony_ci buf &= last - 1; // zero the pad bits 272275793eaSopenharmony_ci } 273275793eaSopenharmony_ci 274275793eaSopenharmony_ci // Clear the last block bit. 275275793eaSopenharmony_ci buf &= ~last; 276275793eaSopenharmony_ci 277275793eaSopenharmony_ci // Write out complete bytes in the bit buffer. 278275793eaSopenharmony_ci while (num >= 8) { 279275793eaSopenharmony_ci putc(buf, out); 280275793eaSopenharmony_ci buf >>= 8; 281275793eaSopenharmony_ci num -= 8; 282275793eaSopenharmony_ci } 283275793eaSopenharmony_ci 284275793eaSopenharmony_ci // If no more bytes left to process, then we have 285275793eaSopenharmony_ci // consumed the byte that had bits from the next block. 286275793eaSopenharmony_ci if (put == strm.next_in) 287275793eaSopenharmony_ci bits = 0; 288275793eaSopenharmony_ci } 289275793eaSopenharmony_ci 290275793eaSopenharmony_ci // We are done handling the deflate block header. Now copy 291275793eaSopenharmony_ci // all or almost all of the remaining compressed data that 292275793eaSopenharmony_ci // has been processed so far. Don't copy one byte at the 293275793eaSopenharmony_ci // end if it contains bits from the next deflate block or 294275793eaSopenharmony_ci // pad bits at the end of a deflate block. 295275793eaSopenharmony_ci 296275793eaSopenharmony_ci // mix is 1 if we are at the end of a deflate block, and if 297275793eaSopenharmony_ci // some of the bits in the last byte follow this block. mix 298275793eaSopenharmony_ci // is 0 if we are in the middle of a deflate block, if the 299275793eaSopenharmony_ci // deflate block ended on a byte boundary, or if all of the 300275793eaSopenharmony_ci // compressed data processed so far has been consumed. 301275793eaSopenharmony_ci int mix = (strm.data_type & 0x80) && bits; 302275793eaSopenharmony_ci 303275793eaSopenharmony_ci // Copy all of the processed compressed data to the output, 304275793eaSopenharmony_ci // except for the last byte if it contains bits from the 305275793eaSopenharmony_ci // next deflate block or pad bits at the end of the deflate 306275793eaSopenharmony_ci // stream. Copy the data after shifting in num bits from 307275793eaSopenharmony_ci // buf in front of it, leaving num bits from the end of the 308275793eaSopenharmony_ci // compressed data in buf when done. 309275793eaSopenharmony_ci unsigned char *end = strm.next_in - mix; 310275793eaSopenharmony_ci if (put < end) { 311275793eaSopenharmony_ci if (num) 312275793eaSopenharmony_ci // Insert num bits from buf before the data being 313275793eaSopenharmony_ci // copied. 314275793eaSopenharmony_ci do { 315275793eaSopenharmony_ci buf += (unsigned)(*put++) << num; 316275793eaSopenharmony_ci putc(buf, out); 317275793eaSopenharmony_ci buf >>= 8; 318275793eaSopenharmony_ci } while (put < end); 319275793eaSopenharmony_ci else { 320275793eaSopenharmony_ci // No shifting needed -- write directly. 321275793eaSopenharmony_ci fwrite(put, 1, end - put, out); 322275793eaSopenharmony_ci put = end; 323275793eaSopenharmony_ci } 324275793eaSopenharmony_ci } 325275793eaSopenharmony_ci 326275793eaSopenharmony_ci // Process the last processed byte if it wasn't written. 327275793eaSopenharmony_ci if (mix) { 328275793eaSopenharmony_ci // Load the last byte into the bit buffer. 329275793eaSopenharmony_ci buf += (unsigned)(*put++) << num; 330275793eaSopenharmony_ci num += 8; 331275793eaSopenharmony_ci 332275793eaSopenharmony_ci if (strm.data_type & 0x40) { 333275793eaSopenharmony_ci // We are at the end of the deflate stream and 334275793eaSopenharmony_ci // there are bits pad bits. Discard the pad bits 335275793eaSopenharmony_ci // and write a byte to the output, if available. 336275793eaSopenharmony_ci // Leave the num bits left over in buf to prepend 337275793eaSopenharmony_ci // to the next deflate stream. 338275793eaSopenharmony_ci num -= bits; 339275793eaSopenharmony_ci if (num >= 8) { 340275793eaSopenharmony_ci putc(buf, out); 341275793eaSopenharmony_ci num -= 8; 342275793eaSopenharmony_ci buf >>= 8; 343275793eaSopenharmony_ci } 344275793eaSopenharmony_ci 345275793eaSopenharmony_ci // Force the pad bits in the bit buffer to zeros. 346275793eaSopenharmony_ci buf &= ((unsigned long)1 << num) - 1; 347275793eaSopenharmony_ci 348275793eaSopenharmony_ci // Don't need to set prev here since going to TAIL. 349275793eaSopenharmony_ci } 350275793eaSopenharmony_ci else 351275793eaSopenharmony_ci // At the end of an internal deflate block. Leave 352275793eaSopenharmony_ci // the last byte in the bit buffer to examine on 353275793eaSopenharmony_ci // the next entry to BLOCK, when more bits from the 354275793eaSopenharmony_ci // next block will be available. 355275793eaSopenharmony_ci prev = num - bits; // number of bits in buffer 356275793eaSopenharmony_ci // from current block 357275793eaSopenharmony_ci } 358275793eaSopenharmony_ci 359275793eaSopenharmony_ci // Don't have a byte left over, so we are in the middle of 360275793eaSopenharmony_ci // a deflate block, or the deflate block ended on a byte 361275793eaSopenharmony_ci // boundary. Set prev appropriately for the next entry into 362275793eaSopenharmony_ci // BLOCK. 363275793eaSopenharmony_ci else if (strm.data_type & 0x80) 364275793eaSopenharmony_ci // The block ended on a byte boundary, so no header 365275793eaSopenharmony_ci // bits are in the bit buffer. 366275793eaSopenharmony_ci prev = num; 367275793eaSopenharmony_ci else 368275793eaSopenharmony_ci // In the middle of a deflate block, so no header here. 369275793eaSopenharmony_ci prev = -1; 370275793eaSopenharmony_ci 371275793eaSopenharmony_ci // Check for the end of the deflate stream. 372275793eaSopenharmony_ci if ((strm.data_type & 0xc0) == 0xc0) { 373275793eaSopenharmony_ci // That ends the deflate stream on the input side, the 374275793eaSopenharmony_ci // pad bits were discarded, and any remaining bits from 375275793eaSopenharmony_ci // the last block in the stream are saved in the bit 376275793eaSopenharmony_ci // buffer to prepend to the next stream. Process the 377275793eaSopenharmony_ci // gzip trailer next. 378275793eaSopenharmony_ci tail = 0; 379275793eaSopenharmony_ci part = 0; 380275793eaSopenharmony_ci state = TAIL; 381275793eaSopenharmony_ci } 382275793eaSopenharmony_ci break; 383275793eaSopenharmony_ci 384275793eaSopenharmony_ci case TAIL: 385275793eaSopenharmony_ci // Accumulate available trailer bytes to update the total 386275793eaSopenharmony_ci // CRC and the total uncompressed length. 387275793eaSopenharmony_ci do { 388275793eaSopenharmony_ci part = (part >> 8) + ((unsigned long)(*put++) << 24); 389275793eaSopenharmony_ci tail++; 390275793eaSopenharmony_ci if (tail == 4) { 391275793eaSopenharmony_ci // Update the total CRC. 392275793eaSopenharmony_ci z_off_t len2 = memb; 393275793eaSopenharmony_ci if (len2 < 0 || (unsigned long long)len2 != memb) 394275793eaSopenharmony_ci BYE("overflow error"); 395275793eaSopenharmony_ci crc = crc ? crc32_combine(crc, part, len2) : part; 396275793eaSopenharmony_ci part = 0; 397275793eaSopenharmony_ci } 398275793eaSopenharmony_ci else if (tail == 8) { 399275793eaSopenharmony_ci // Update the total uncompressed length. (It's ok 400275793eaSopenharmony_ci // if this sum is done modulo 2^32.) 401275793eaSopenharmony_ci len += part; 402275793eaSopenharmony_ci 403275793eaSopenharmony_ci // At the end of a member. Set up to inflate an 404275793eaSopenharmony_ci // immediately following gzip member. (If we made 405275793eaSopenharmony_ci // it this far, then the trailer was valid.) 406275793eaSopenharmony_ci if (inflateReset(&strm) != Z_OK) 407275793eaSopenharmony_ci BYE("internal error"); 408275793eaSopenharmony_ci state = BETWEEN; 409275793eaSopenharmony_ci break; 410275793eaSopenharmony_ci } 411275793eaSopenharmony_ci } while (put < strm.next_in); 412275793eaSopenharmony_ci break; 413275793eaSopenharmony_ci } 414275793eaSopenharmony_ci 415275793eaSopenharmony_ci // Process the input buffer until completely consumed. 416275793eaSopenharmony_ci } while (strm.avail_in > 0); 417275793eaSopenharmony_ci 418275793eaSopenharmony_ci // Process input until end of file, invalid input, or i/o error. 419275793eaSopenharmony_ci } while (more); 420275793eaSopenharmony_ci 421275793eaSopenharmony_ci // Done with the inflate engine. 422275793eaSopenharmony_ci inflateEnd(&strm); 423275793eaSopenharmony_ci 424275793eaSopenharmony_ci // Verify the validity of the input. 425275793eaSopenharmony_ci if (state != BETWEEN) 426275793eaSopenharmony_ci BYE("input invalid: incomplete gzip stream"); 427275793eaSopenharmony_ci 428275793eaSopenharmony_ci // Write the remaining deflate stream bits, followed by a terminating 429275793eaSopenharmony_ci // deflate fixed block. 430275793eaSopenharmony_ci buf += (unsigned long)3 << num; 431275793eaSopenharmony_ci putc(buf, out); 432275793eaSopenharmony_ci putc(buf >> 8, out); 433275793eaSopenharmony_ci if (num > 6) 434275793eaSopenharmony_ci putc(0, out); 435275793eaSopenharmony_ci 436275793eaSopenharmony_ci // Write the gzip trailer, which is the CRC and the uncompressed length 437275793eaSopenharmony_ci // modulo 2^32, both in little-endian order. 438275793eaSopenharmony_ci putc(crc, out); 439275793eaSopenharmony_ci putc(crc >> 8, out); 440275793eaSopenharmony_ci putc(crc >> 16, out); 441275793eaSopenharmony_ci putc(crc >> 24, out); 442275793eaSopenharmony_ci putc(len, out); 443275793eaSopenharmony_ci putc(len >> 8, out); 444275793eaSopenharmony_ci putc(len >> 16, out); 445275793eaSopenharmony_ci putc(len >> 24, out); 446275793eaSopenharmony_ci fflush(out); 447275793eaSopenharmony_ci 448275793eaSopenharmony_ci // Check for any i/o errors. 449275793eaSopenharmony_ci if (ferror(in) || ferror(out)) 450275793eaSopenharmony_ci BYE("i/o error: %s", strerror(errno)); 451275793eaSopenharmony_ci 452275793eaSopenharmony_ci // All good! 453275793eaSopenharmony_ci *err = NULL; 454275793eaSopenharmony_ci return 0; 455275793eaSopenharmony_ci} 456275793eaSopenharmony_ci 457275793eaSopenharmony_ci// Normalize the gzip stream on stdin, writing the result to stdout. 458275793eaSopenharmony_ciint main(void) { 459275793eaSopenharmony_ci // Avoid end-of-line conversions on evil operating systems. 460275793eaSopenharmony_ci SET_BINARY_MODE(stdin); 461275793eaSopenharmony_ci SET_BINARY_MODE(stdout); 462275793eaSopenharmony_ci 463275793eaSopenharmony_ci // Normalize from stdin to stdout, returning 1 on error, 0 if ok. 464275793eaSopenharmony_ci char *err; 465275793eaSopenharmony_ci int ret = gzip_normalize(stdin, stdout, &err); 466275793eaSopenharmony_ci if (ret) 467275793eaSopenharmony_ci fprintf(stderr, "gznorm error: %s\n", err); 468275793eaSopenharmony_ci free(err); 469275793eaSopenharmony_ci return ret; 470275793eaSopenharmony_ci} 471