18c2ecf20Sopenharmony_ci/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), 48c2ecf20Sopenharmony_ci which also acknowledges contributions by Mike Burrows, David Wheeler, 58c2ecf20Sopenharmony_ci Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, 68c2ecf20Sopenharmony_ci Robert Sedgewick, and Jon L. Bentley. 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci This code is licensed under the LGPLv2: 98c2ecf20Sopenharmony_ci LGPL (http://www.gnu.org/copyleft/lgpl.html 108c2ecf20Sopenharmony_ci*/ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/* 138c2ecf20Sopenharmony_ci Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci More efficient reading of Huffman codes, a streamlined read_bunzip() 168c2ecf20Sopenharmony_ci function, and various other tweaks. In (limited) tests, approximately 178c2ecf20Sopenharmony_ci 20% faster than bzcat on x86 and about 10% faster on arm. 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci Note that about 2/3 of the time is spent in read_unzip() reversing 208c2ecf20Sopenharmony_ci the Burrows-Wheeler transformation. Much of that time is delay 218c2ecf20Sopenharmony_ci resulting from cache misses. 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci I would ask that anyone benefiting from this work, especially those 248c2ecf20Sopenharmony_ci using it in commercial products, consider making a donation to my local 258c2ecf20Sopenharmony_ci non-profit hospice organization in the name of the woman I loved, who 268c2ecf20Sopenharmony_ci passed away Feb. 12, 2003. 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci In memory of Toni W. Hagan 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci Hospice of Acadiana, Inc. 318c2ecf20Sopenharmony_ci 2600 Johnston St., Suite 200 328c2ecf20Sopenharmony_ci Lafayette, LA 70503-3240 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci Phone (337) 232-1234 or 1-800-738-2226 358c2ecf20Sopenharmony_ci Fax (337) 232-1297 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci https://www.hospiceacadiana.com/ 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci Manuel 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci/* 438c2ecf20Sopenharmony_ci Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) 448c2ecf20Sopenharmony_ci*/ 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#ifdef STATIC 488c2ecf20Sopenharmony_ci#define PREBOOT 498c2ecf20Sopenharmony_ci#else 508c2ecf20Sopenharmony_ci#include <linux/decompress/bunzip2.h> 518c2ecf20Sopenharmony_ci#endif /* STATIC */ 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci#include <linux/decompress/mm.h> 548c2ecf20Sopenharmony_ci#include <linux/crc32poly.h> 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#ifndef INT_MAX 578c2ecf20Sopenharmony_ci#define INT_MAX 0x7fffffff 588c2ecf20Sopenharmony_ci#endif 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci/* Constants for Huffman coding */ 618c2ecf20Sopenharmony_ci#define MAX_GROUPS 6 628c2ecf20Sopenharmony_ci#define GROUP_SIZE 50 /* 64 would have been more efficient */ 638c2ecf20Sopenharmony_ci#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ 648c2ecf20Sopenharmony_ci#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ 658c2ecf20Sopenharmony_ci#define SYMBOL_RUNA 0 668c2ecf20Sopenharmony_ci#define SYMBOL_RUNB 1 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci/* Status return values */ 698c2ecf20Sopenharmony_ci#define RETVAL_OK 0 708c2ecf20Sopenharmony_ci#define RETVAL_LAST_BLOCK (-1) 718c2ecf20Sopenharmony_ci#define RETVAL_NOT_BZIP_DATA (-2) 728c2ecf20Sopenharmony_ci#define RETVAL_UNEXPECTED_INPUT_EOF (-3) 738c2ecf20Sopenharmony_ci#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) 748c2ecf20Sopenharmony_ci#define RETVAL_DATA_ERROR (-5) 758c2ecf20Sopenharmony_ci#define RETVAL_OUT_OF_MEMORY (-6) 768c2ecf20Sopenharmony_ci#define RETVAL_OBSOLETE_INPUT (-7) 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci/* Other housekeeping constants */ 798c2ecf20Sopenharmony_ci#define BZIP2_IOBUF_SIZE 4096 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci/* This is what we know about each Huffman coding group */ 828c2ecf20Sopenharmony_cistruct group_data { 838c2ecf20Sopenharmony_ci /* We have an extra slot at the end of limit[] for a sentinal value. */ 848c2ecf20Sopenharmony_ci int limit[MAX_HUFCODE_BITS+1]; 858c2ecf20Sopenharmony_ci int base[MAX_HUFCODE_BITS]; 868c2ecf20Sopenharmony_ci int permute[MAX_SYMBOLS]; 878c2ecf20Sopenharmony_ci int minLen, maxLen; 888c2ecf20Sopenharmony_ci}; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci/* Structure holding all the housekeeping data, including IO buffers and 918c2ecf20Sopenharmony_ci memory that persists between calls to bunzip */ 928c2ecf20Sopenharmony_cistruct bunzip_data { 938c2ecf20Sopenharmony_ci /* State for interrupting output loop */ 948c2ecf20Sopenharmony_ci int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; 958c2ecf20Sopenharmony_ci /* I/O tracking data (file handles, buffers, positions, etc.) */ 968c2ecf20Sopenharmony_ci long (*fill)(void*, unsigned long); 978c2ecf20Sopenharmony_ci long inbufCount, inbufPos /*, outbufPos*/; 988c2ecf20Sopenharmony_ci unsigned char *inbuf /*,*outbuf*/; 998c2ecf20Sopenharmony_ci unsigned int inbufBitCount, inbufBits; 1008c2ecf20Sopenharmony_ci /* The CRC values stored in the block header and calculated from the 1018c2ecf20Sopenharmony_ci data */ 1028c2ecf20Sopenharmony_ci unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; 1038c2ecf20Sopenharmony_ci /* Intermediate buffer and its size (in bytes) */ 1048c2ecf20Sopenharmony_ci unsigned int *dbuf, dbufSize; 1058c2ecf20Sopenharmony_ci /* These things are a bit too big to go on the stack */ 1068c2ecf20Sopenharmony_ci unsigned char selectors[32768]; /* nSelectors = 15 bits */ 1078c2ecf20Sopenharmony_ci struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 1088c2ecf20Sopenharmony_ci int io_error; /* non-zero if we have IO error */ 1098c2ecf20Sopenharmony_ci int byteCount[256]; 1108c2ecf20Sopenharmony_ci unsigned char symToByte[256], mtfSymbol[256]; 1118c2ecf20Sopenharmony_ci}; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci/* Return the next nnn bits of input. All reads from the compressed input 1158c2ecf20Sopenharmony_ci are done through this function. All reads are big endian */ 1168c2ecf20Sopenharmony_cistatic unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci unsigned int bits = 0; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* If we need to get more data from the byte buffer, do so. 1218c2ecf20Sopenharmony_ci (Loop getting one byte at a time to enforce endianness and avoid 1228c2ecf20Sopenharmony_ci unaligned access.) */ 1238c2ecf20Sopenharmony_ci while (bd->inbufBitCount < bits_wanted) { 1248c2ecf20Sopenharmony_ci /* If we need to read more data from file into byte buffer, do 1258c2ecf20Sopenharmony_ci so */ 1268c2ecf20Sopenharmony_ci if (bd->inbufPos == bd->inbufCount) { 1278c2ecf20Sopenharmony_ci if (bd->io_error) 1288c2ecf20Sopenharmony_ci return 0; 1298c2ecf20Sopenharmony_ci bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); 1308c2ecf20Sopenharmony_ci if (bd->inbufCount <= 0) { 1318c2ecf20Sopenharmony_ci bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; 1328c2ecf20Sopenharmony_ci return 0; 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci bd->inbufPos = 0; 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 1378c2ecf20Sopenharmony_ci if (bd->inbufBitCount >= 24) { 1388c2ecf20Sopenharmony_ci bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); 1398c2ecf20Sopenharmony_ci bits_wanted -= bd->inbufBitCount; 1408c2ecf20Sopenharmony_ci bits <<= bits_wanted; 1418c2ecf20Sopenharmony_ci bd->inbufBitCount = 0; 1428c2ecf20Sopenharmony_ci } 1438c2ecf20Sopenharmony_ci /* Grab next 8 bits of input from buffer. */ 1448c2ecf20Sopenharmony_ci bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; 1458c2ecf20Sopenharmony_ci bd->inbufBitCount += 8; 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci /* Calculate result */ 1488c2ecf20Sopenharmony_ci bd->inbufBitCount -= bits_wanted; 1498c2ecf20Sopenharmony_ci bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci return bits; 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_cistatic int INIT get_next_block(struct bunzip_data *bd) 1578c2ecf20Sopenharmony_ci{ 1588c2ecf20Sopenharmony_ci struct group_data *hufGroup = NULL; 1598c2ecf20Sopenharmony_ci int *base = NULL; 1608c2ecf20Sopenharmony_ci int *limit = NULL; 1618c2ecf20Sopenharmony_ci int dbufCount, nextSym, dbufSize, groupCount, selector, 1628c2ecf20Sopenharmony_ci i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount; 1638c2ecf20Sopenharmony_ci unsigned char uc, *symToByte, *mtfSymbol, *selectors; 1648c2ecf20Sopenharmony_ci unsigned int *dbuf, origPtr; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci dbuf = bd->dbuf; 1678c2ecf20Sopenharmony_ci dbufSize = bd->dbufSize; 1688c2ecf20Sopenharmony_ci selectors = bd->selectors; 1698c2ecf20Sopenharmony_ci byteCount = bd->byteCount; 1708c2ecf20Sopenharmony_ci symToByte = bd->symToByte; 1718c2ecf20Sopenharmony_ci mtfSymbol = bd->mtfSymbol; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci /* Read in header signature and CRC, then validate signature. 1748c2ecf20Sopenharmony_ci (last block signature means CRC is for whole file, return now) */ 1758c2ecf20Sopenharmony_ci i = get_bits(bd, 24); 1768c2ecf20Sopenharmony_ci j = get_bits(bd, 24); 1778c2ecf20Sopenharmony_ci bd->headerCRC = get_bits(bd, 32); 1788c2ecf20Sopenharmony_ci if ((i == 0x177245) && (j == 0x385090)) 1798c2ecf20Sopenharmony_ci return RETVAL_LAST_BLOCK; 1808c2ecf20Sopenharmony_ci if ((i != 0x314159) || (j != 0x265359)) 1818c2ecf20Sopenharmony_ci return RETVAL_NOT_BZIP_DATA; 1828c2ecf20Sopenharmony_ci /* We can add support for blockRandomised if anybody complains. 1838c2ecf20Sopenharmony_ci There was some code for this in busybox 1.0.0-pre3, but nobody ever 1848c2ecf20Sopenharmony_ci noticed that it didn't actually work. */ 1858c2ecf20Sopenharmony_ci if (get_bits(bd, 1)) 1868c2ecf20Sopenharmony_ci return RETVAL_OBSOLETE_INPUT; 1878c2ecf20Sopenharmony_ci origPtr = get_bits(bd, 24); 1888c2ecf20Sopenharmony_ci if (origPtr >= dbufSize) 1898c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 1908c2ecf20Sopenharmony_ci /* mapping table: if some byte values are never used (encoding things 1918c2ecf20Sopenharmony_ci like ascii text), the compression code removes the gaps to have fewer 1928c2ecf20Sopenharmony_ci symbols to deal with, and writes a sparse bitfield indicating which 1938c2ecf20Sopenharmony_ci values were present. We make a translation table to convert the 1948c2ecf20Sopenharmony_ci symbols back to the corresponding bytes. */ 1958c2ecf20Sopenharmony_ci t = get_bits(bd, 16); 1968c2ecf20Sopenharmony_ci symTotal = 0; 1978c2ecf20Sopenharmony_ci for (i = 0; i < 16; i++) { 1988c2ecf20Sopenharmony_ci if (t&(1 << (15-i))) { 1998c2ecf20Sopenharmony_ci k = get_bits(bd, 16); 2008c2ecf20Sopenharmony_ci for (j = 0; j < 16; j++) 2018c2ecf20Sopenharmony_ci if (k&(1 << (15-j))) 2028c2ecf20Sopenharmony_ci symToByte[symTotal++] = (16*i)+j; 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci /* How many different Huffman coding groups does this block use? */ 2068c2ecf20Sopenharmony_ci groupCount = get_bits(bd, 3); 2078c2ecf20Sopenharmony_ci if (groupCount < 2 || groupCount > MAX_GROUPS) 2088c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 2098c2ecf20Sopenharmony_ci /* nSelectors: Every GROUP_SIZE many symbols we select a new 2108c2ecf20Sopenharmony_ci Huffman coding group. Read in the group selector list, 2118c2ecf20Sopenharmony_ci which is stored as MTF encoded bit runs. (MTF = Move To 2128c2ecf20Sopenharmony_ci Front, as each value is used it's moved to the start of the 2138c2ecf20Sopenharmony_ci list.) */ 2148c2ecf20Sopenharmony_ci nSelectors = get_bits(bd, 15); 2158c2ecf20Sopenharmony_ci if (!nSelectors) 2168c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 2178c2ecf20Sopenharmony_ci for (i = 0; i < groupCount; i++) 2188c2ecf20Sopenharmony_ci mtfSymbol[i] = i; 2198c2ecf20Sopenharmony_ci for (i = 0; i < nSelectors; i++) { 2208c2ecf20Sopenharmony_ci /* Get next value */ 2218c2ecf20Sopenharmony_ci for (j = 0; get_bits(bd, 1); j++) 2228c2ecf20Sopenharmony_ci if (j >= groupCount) 2238c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 2248c2ecf20Sopenharmony_ci /* Decode MTF to get the next selector */ 2258c2ecf20Sopenharmony_ci uc = mtfSymbol[j]; 2268c2ecf20Sopenharmony_ci for (; j; j--) 2278c2ecf20Sopenharmony_ci mtfSymbol[j] = mtfSymbol[j-1]; 2288c2ecf20Sopenharmony_ci mtfSymbol[0] = selectors[i] = uc; 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci /* Read the Huffman coding tables for each group, which code 2318c2ecf20Sopenharmony_ci for symTotal literal symbols, plus two run symbols (RUNA, 2328c2ecf20Sopenharmony_ci RUNB) */ 2338c2ecf20Sopenharmony_ci symCount = symTotal+2; 2348c2ecf20Sopenharmony_ci for (j = 0; j < groupCount; j++) { 2358c2ecf20Sopenharmony_ci unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; 2368c2ecf20Sopenharmony_ci int minLen, maxLen, pp; 2378c2ecf20Sopenharmony_ci /* Read Huffman code lengths for each symbol. They're 2388c2ecf20Sopenharmony_ci stored in a way similar to mtf; record a starting 2398c2ecf20Sopenharmony_ci value for the first symbol, and an offset from the 2408c2ecf20Sopenharmony_ci previous value for everys symbol after that. 2418c2ecf20Sopenharmony_ci (Subtracting 1 before the loop and then adding it 2428c2ecf20Sopenharmony_ci back at the end is an optimization that makes the 2438c2ecf20Sopenharmony_ci test inside the loop simpler: symbol length 0 2448c2ecf20Sopenharmony_ci becomes negative, so an unsigned inequality catches 2458c2ecf20Sopenharmony_ci it.) */ 2468c2ecf20Sopenharmony_ci t = get_bits(bd, 5)-1; 2478c2ecf20Sopenharmony_ci for (i = 0; i < symCount; i++) { 2488c2ecf20Sopenharmony_ci for (;;) { 2498c2ecf20Sopenharmony_ci if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) 2508c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci /* If first bit is 0, stop. Else 2538c2ecf20Sopenharmony_ci second bit indicates whether to 2548c2ecf20Sopenharmony_ci increment or decrement the value. 2558c2ecf20Sopenharmony_ci Optimization: grab 2 bits and unget 2568c2ecf20Sopenharmony_ci the second if the first was 0. */ 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci k = get_bits(bd, 2); 2598c2ecf20Sopenharmony_ci if (k < 2) { 2608c2ecf20Sopenharmony_ci bd->inbufBitCount++; 2618c2ecf20Sopenharmony_ci break; 2628c2ecf20Sopenharmony_ci } 2638c2ecf20Sopenharmony_ci /* Add one if second bit 1, else 2648c2ecf20Sopenharmony_ci * subtract 1. Avoids if/else */ 2658c2ecf20Sopenharmony_ci t += (((k+1)&2)-1); 2668c2ecf20Sopenharmony_ci } 2678c2ecf20Sopenharmony_ci /* Correct for the initial -1, to get the 2688c2ecf20Sopenharmony_ci * final symbol length */ 2698c2ecf20Sopenharmony_ci length[i] = t+1; 2708c2ecf20Sopenharmony_ci } 2718c2ecf20Sopenharmony_ci /* Find largest and smallest lengths in this group */ 2728c2ecf20Sopenharmony_ci minLen = maxLen = length[0]; 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci for (i = 1; i < symCount; i++) { 2758c2ecf20Sopenharmony_ci if (length[i] > maxLen) 2768c2ecf20Sopenharmony_ci maxLen = length[i]; 2778c2ecf20Sopenharmony_ci else if (length[i] < minLen) 2788c2ecf20Sopenharmony_ci minLen = length[i]; 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci /* Calculate permute[], base[], and limit[] tables from 2828c2ecf20Sopenharmony_ci * length[]. 2838c2ecf20Sopenharmony_ci * 2848c2ecf20Sopenharmony_ci * permute[] is the lookup table for converting 2858c2ecf20Sopenharmony_ci * Huffman coded symbols into decoded symbols. base[] 2868c2ecf20Sopenharmony_ci * is the amount to subtract from the value of a 2878c2ecf20Sopenharmony_ci * Huffman symbol of a given length when using 2888c2ecf20Sopenharmony_ci * permute[]. 2898c2ecf20Sopenharmony_ci * 2908c2ecf20Sopenharmony_ci * limit[] indicates the largest numerical value a 2918c2ecf20Sopenharmony_ci * symbol with a given number of bits can have. This 2928c2ecf20Sopenharmony_ci * is how the Huffman codes can vary in length: each 2938c2ecf20Sopenharmony_ci * code with a value > limit[length] needs another 2948c2ecf20Sopenharmony_ci * bit. 2958c2ecf20Sopenharmony_ci */ 2968c2ecf20Sopenharmony_ci hufGroup = bd->groups+j; 2978c2ecf20Sopenharmony_ci hufGroup->minLen = minLen; 2988c2ecf20Sopenharmony_ci hufGroup->maxLen = maxLen; 2998c2ecf20Sopenharmony_ci /* Note that minLen can't be smaller than 1, so we 3008c2ecf20Sopenharmony_ci adjust the base and limit array pointers so we're 3018c2ecf20Sopenharmony_ci not always wasting the first entry. We do this 3028c2ecf20Sopenharmony_ci again when using them (during symbol decoding).*/ 3038c2ecf20Sopenharmony_ci base = hufGroup->base-1; 3048c2ecf20Sopenharmony_ci limit = hufGroup->limit-1; 3058c2ecf20Sopenharmony_ci /* Calculate permute[]. Concurrently, initialize 3068c2ecf20Sopenharmony_ci * temp[] and limit[]. */ 3078c2ecf20Sopenharmony_ci pp = 0; 3088c2ecf20Sopenharmony_ci for (i = minLen; i <= maxLen; i++) { 3098c2ecf20Sopenharmony_ci temp[i] = limit[i] = 0; 3108c2ecf20Sopenharmony_ci for (t = 0; t < symCount; t++) 3118c2ecf20Sopenharmony_ci if (length[t] == i) 3128c2ecf20Sopenharmony_ci hufGroup->permute[pp++] = t; 3138c2ecf20Sopenharmony_ci } 3148c2ecf20Sopenharmony_ci /* Count symbols coded for at each bit length */ 3158c2ecf20Sopenharmony_ci for (i = 0; i < symCount; i++) 3168c2ecf20Sopenharmony_ci temp[length[i]]++; 3178c2ecf20Sopenharmony_ci /* Calculate limit[] (the largest symbol-coding value 3188c2ecf20Sopenharmony_ci *at each bit length, which is (previous limit << 3198c2ecf20Sopenharmony_ci *1)+symbols at this level), and base[] (number of 3208c2ecf20Sopenharmony_ci *symbols to ignore at each bit length, which is limit 3218c2ecf20Sopenharmony_ci *minus the cumulative count of symbols coded for 3228c2ecf20Sopenharmony_ci *already). */ 3238c2ecf20Sopenharmony_ci pp = t = 0; 3248c2ecf20Sopenharmony_ci for (i = minLen; i < maxLen; i++) { 3258c2ecf20Sopenharmony_ci pp += temp[i]; 3268c2ecf20Sopenharmony_ci /* We read the largest possible symbol size 3278c2ecf20Sopenharmony_ci and then unget bits after determining how 3288c2ecf20Sopenharmony_ci many we need, and those extra bits could be 3298c2ecf20Sopenharmony_ci set to anything. (They're noise from 3308c2ecf20Sopenharmony_ci future symbols.) At each level we're 3318c2ecf20Sopenharmony_ci really only interested in the first few 3328c2ecf20Sopenharmony_ci bits, so here we set all the trailing 3338c2ecf20Sopenharmony_ci to-be-ignored bits to 1 so they don't 3348c2ecf20Sopenharmony_ci affect the value > limit[length] 3358c2ecf20Sopenharmony_ci comparison. */ 3368c2ecf20Sopenharmony_ci limit[i] = (pp << (maxLen - i)) - 1; 3378c2ecf20Sopenharmony_ci pp <<= 1; 3388c2ecf20Sopenharmony_ci base[i+1] = pp-(t += temp[i]); 3398c2ecf20Sopenharmony_ci } 3408c2ecf20Sopenharmony_ci limit[maxLen+1] = INT_MAX; /* Sentinal value for 3418c2ecf20Sopenharmony_ci * reading next sym. */ 3428c2ecf20Sopenharmony_ci limit[maxLen] = pp+temp[maxLen]-1; 3438c2ecf20Sopenharmony_ci base[minLen] = 0; 3448c2ecf20Sopenharmony_ci } 3458c2ecf20Sopenharmony_ci /* We've finished reading and digesting the block header. Now 3468c2ecf20Sopenharmony_ci read this block's Huffman coded symbols from the file and 3478c2ecf20Sopenharmony_ci undo the Huffman coding and run length encoding, saving the 3488c2ecf20Sopenharmony_ci result into dbuf[dbufCount++] = uc */ 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci /* Initialize symbol occurrence counters and symbol Move To 3518c2ecf20Sopenharmony_ci * Front table */ 3528c2ecf20Sopenharmony_ci for (i = 0; i < 256; i++) { 3538c2ecf20Sopenharmony_ci byteCount[i] = 0; 3548c2ecf20Sopenharmony_ci mtfSymbol[i] = (unsigned char)i; 3558c2ecf20Sopenharmony_ci } 3568c2ecf20Sopenharmony_ci /* Loop through compressed symbols. */ 3578c2ecf20Sopenharmony_ci runPos = dbufCount = symCount = selector = 0; 3588c2ecf20Sopenharmony_ci for (;;) { 3598c2ecf20Sopenharmony_ci /* Determine which Huffman coding group to use. */ 3608c2ecf20Sopenharmony_ci if (!(symCount--)) { 3618c2ecf20Sopenharmony_ci symCount = GROUP_SIZE-1; 3628c2ecf20Sopenharmony_ci if (selector >= nSelectors) 3638c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 3648c2ecf20Sopenharmony_ci hufGroup = bd->groups+selectors[selector++]; 3658c2ecf20Sopenharmony_ci base = hufGroup->base-1; 3668c2ecf20Sopenharmony_ci limit = hufGroup->limit-1; 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci /* Read next Huffman-coded symbol. */ 3698c2ecf20Sopenharmony_ci /* Note: It is far cheaper to read maxLen bits and 3708c2ecf20Sopenharmony_ci back up than it is to read minLen bits and then an 3718c2ecf20Sopenharmony_ci additional bit at a time, testing as we go. 3728c2ecf20Sopenharmony_ci Because there is a trailing last block (with file 3738c2ecf20Sopenharmony_ci CRC), there is no danger of the overread causing an 3748c2ecf20Sopenharmony_ci unexpected EOF for a valid compressed file. As a 3758c2ecf20Sopenharmony_ci further optimization, we do the read inline 3768c2ecf20Sopenharmony_ci (falling back to a call to get_bits if the buffer 3778c2ecf20Sopenharmony_ci runs dry). The following (up to got_huff_bits:) is 3788c2ecf20Sopenharmony_ci equivalent to j = get_bits(bd, hufGroup->maxLen); 3798c2ecf20Sopenharmony_ci */ 3808c2ecf20Sopenharmony_ci while (bd->inbufBitCount < hufGroup->maxLen) { 3818c2ecf20Sopenharmony_ci if (bd->inbufPos == bd->inbufCount) { 3828c2ecf20Sopenharmony_ci j = get_bits(bd, hufGroup->maxLen); 3838c2ecf20Sopenharmony_ci goto got_huff_bits; 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci bd->inbufBits = 3868c2ecf20Sopenharmony_ci (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; 3878c2ecf20Sopenharmony_ci bd->inbufBitCount += 8; 3888c2ecf20Sopenharmony_ci }; 3898c2ecf20Sopenharmony_ci bd->inbufBitCount -= hufGroup->maxLen; 3908c2ecf20Sopenharmony_ci j = (bd->inbufBits >> bd->inbufBitCount)& 3918c2ecf20Sopenharmony_ci ((1 << hufGroup->maxLen)-1); 3928c2ecf20Sopenharmony_cigot_huff_bits: 3938c2ecf20Sopenharmony_ci /* Figure how many bits are in next symbol and 3948c2ecf20Sopenharmony_ci * unget extras */ 3958c2ecf20Sopenharmony_ci i = hufGroup->minLen; 3968c2ecf20Sopenharmony_ci while (j > limit[i]) 3978c2ecf20Sopenharmony_ci ++i; 3988c2ecf20Sopenharmony_ci bd->inbufBitCount += (hufGroup->maxLen - i); 3998c2ecf20Sopenharmony_ci /* Huffman decode value to get nextSym (with bounds checking) */ 4008c2ecf20Sopenharmony_ci if ((i > hufGroup->maxLen) 4018c2ecf20Sopenharmony_ci || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) 4028c2ecf20Sopenharmony_ci >= MAX_SYMBOLS)) 4038c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 4048c2ecf20Sopenharmony_ci nextSym = hufGroup->permute[j]; 4058c2ecf20Sopenharmony_ci /* We have now decoded the symbol, which indicates 4068c2ecf20Sopenharmony_ci either a new literal byte, or a repeated run of the 4078c2ecf20Sopenharmony_ci most recent literal byte. First, check if nextSym 4088c2ecf20Sopenharmony_ci indicates a repeated run, and if so loop collecting 4098c2ecf20Sopenharmony_ci how many times to repeat the last literal. */ 4108c2ecf20Sopenharmony_ci if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ 4118c2ecf20Sopenharmony_ci /* If this is the start of a new run, zero out 4128c2ecf20Sopenharmony_ci * counter */ 4138c2ecf20Sopenharmony_ci if (!runPos) { 4148c2ecf20Sopenharmony_ci runPos = 1; 4158c2ecf20Sopenharmony_ci t = 0; 4168c2ecf20Sopenharmony_ci } 4178c2ecf20Sopenharmony_ci /* Neat trick that saves 1 symbol: instead of 4188c2ecf20Sopenharmony_ci or-ing 0 or 1 at each bit position, add 1 4198c2ecf20Sopenharmony_ci or 2 instead. For example, 1011 is 1 << 0 4208c2ecf20Sopenharmony_ci + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 4218c2ecf20Sopenharmony_ci + 1 << 2. You can make any bit pattern 4228c2ecf20Sopenharmony_ci that way using 1 less symbol than the basic 4238c2ecf20Sopenharmony_ci or 0/1 method (except all bits 0, which 4248c2ecf20Sopenharmony_ci would use no symbols, but a run of length 0 4258c2ecf20Sopenharmony_ci doesn't mean anything in this context). 4268c2ecf20Sopenharmony_ci Thus space is saved. */ 4278c2ecf20Sopenharmony_ci t += (runPos << nextSym); 4288c2ecf20Sopenharmony_ci /* +runPos if RUNA; +2*runPos if RUNB */ 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci runPos <<= 1; 4318c2ecf20Sopenharmony_ci continue; 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci /* When we hit the first non-run symbol after a run, 4348c2ecf20Sopenharmony_ci we now know how many times to repeat the last 4358c2ecf20Sopenharmony_ci literal, so append that many copies to our buffer 4368c2ecf20Sopenharmony_ci of decoded symbols (dbuf) now. (The last literal 4378c2ecf20Sopenharmony_ci used is the one at the head of the mtfSymbol 4388c2ecf20Sopenharmony_ci array.) */ 4398c2ecf20Sopenharmony_ci if (runPos) { 4408c2ecf20Sopenharmony_ci runPos = 0; 4418c2ecf20Sopenharmony_ci if (dbufCount+t >= dbufSize) 4428c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci uc = symToByte[mtfSymbol[0]]; 4458c2ecf20Sopenharmony_ci byteCount[uc] += t; 4468c2ecf20Sopenharmony_ci while (t--) 4478c2ecf20Sopenharmony_ci dbuf[dbufCount++] = uc; 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci /* Is this the terminating symbol? */ 4508c2ecf20Sopenharmony_ci if (nextSym > symTotal) 4518c2ecf20Sopenharmony_ci break; 4528c2ecf20Sopenharmony_ci /* At this point, nextSym indicates a new literal 4538c2ecf20Sopenharmony_ci character. Subtract one to get the position in the 4548c2ecf20Sopenharmony_ci MTF array at which this literal is currently to be 4558c2ecf20Sopenharmony_ci found. (Note that the result can't be -1 or 0, 4568c2ecf20Sopenharmony_ci because 0 and 1 are RUNA and RUNB. But another 4578c2ecf20Sopenharmony_ci instance of the first symbol in the mtf array, 4588c2ecf20Sopenharmony_ci position 0, would have been handled as part of a 4598c2ecf20Sopenharmony_ci run above. Therefore 1 unused mtf position minus 2 4608c2ecf20Sopenharmony_ci non-literal nextSym values equals -1.) */ 4618c2ecf20Sopenharmony_ci if (dbufCount >= dbufSize) 4628c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 4638c2ecf20Sopenharmony_ci i = nextSym - 1; 4648c2ecf20Sopenharmony_ci uc = mtfSymbol[i]; 4658c2ecf20Sopenharmony_ci /* Adjust the MTF array. Since we typically expect to 4668c2ecf20Sopenharmony_ci *move only a small number of symbols, and are bound 4678c2ecf20Sopenharmony_ci *by 256 in any case, using memmove here would 4688c2ecf20Sopenharmony_ci *typically be bigger and slower due to function call 4698c2ecf20Sopenharmony_ci *overhead and other assorted setup costs. */ 4708c2ecf20Sopenharmony_ci do { 4718c2ecf20Sopenharmony_ci mtfSymbol[i] = mtfSymbol[i-1]; 4728c2ecf20Sopenharmony_ci } while (--i); 4738c2ecf20Sopenharmony_ci mtfSymbol[0] = uc; 4748c2ecf20Sopenharmony_ci uc = symToByte[uc]; 4758c2ecf20Sopenharmony_ci /* We have our literal byte. Save it into dbuf. */ 4768c2ecf20Sopenharmony_ci byteCount[uc]++; 4778c2ecf20Sopenharmony_ci dbuf[dbufCount++] = (unsigned int)uc; 4788c2ecf20Sopenharmony_ci } 4798c2ecf20Sopenharmony_ci /* At this point, we've read all the Huffman-coded symbols 4808c2ecf20Sopenharmony_ci (and repeated runs) for this block from the input stream, 4818c2ecf20Sopenharmony_ci and decoded them into the intermediate buffer. There are 4828c2ecf20Sopenharmony_ci dbufCount many decoded bytes in dbuf[]. Now undo the 4838c2ecf20Sopenharmony_ci Burrows-Wheeler transform on dbuf. See 4848c2ecf20Sopenharmony_ci http://dogma.net/markn/articles/bwt/bwt.htm 4858c2ecf20Sopenharmony_ci */ 4868c2ecf20Sopenharmony_ci /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 4878c2ecf20Sopenharmony_ci j = 0; 4888c2ecf20Sopenharmony_ci for (i = 0; i < 256; i++) { 4898c2ecf20Sopenharmony_ci k = j+byteCount[i]; 4908c2ecf20Sopenharmony_ci byteCount[i] = j; 4918c2ecf20Sopenharmony_ci j = k; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci /* Figure out what order dbuf would be in if we sorted it. */ 4948c2ecf20Sopenharmony_ci for (i = 0; i < dbufCount; i++) { 4958c2ecf20Sopenharmony_ci uc = (unsigned char)(dbuf[i] & 0xff); 4968c2ecf20Sopenharmony_ci dbuf[byteCount[uc]] |= (i << 8); 4978c2ecf20Sopenharmony_ci byteCount[uc]++; 4988c2ecf20Sopenharmony_ci } 4998c2ecf20Sopenharmony_ci /* Decode first byte by hand to initialize "previous" byte. 5008c2ecf20Sopenharmony_ci Note that it doesn't get output, and if the first three 5018c2ecf20Sopenharmony_ci characters are identical it doesn't qualify as a run (hence 5028c2ecf20Sopenharmony_ci writeRunCountdown = 5). */ 5038c2ecf20Sopenharmony_ci if (dbufCount) { 5048c2ecf20Sopenharmony_ci if (origPtr >= dbufCount) 5058c2ecf20Sopenharmony_ci return RETVAL_DATA_ERROR; 5068c2ecf20Sopenharmony_ci bd->writePos = dbuf[origPtr]; 5078c2ecf20Sopenharmony_ci bd->writeCurrent = (unsigned char)(bd->writePos&0xff); 5088c2ecf20Sopenharmony_ci bd->writePos >>= 8; 5098c2ecf20Sopenharmony_ci bd->writeRunCountdown = 5; 5108c2ecf20Sopenharmony_ci } 5118c2ecf20Sopenharmony_ci bd->writeCount = dbufCount; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci return RETVAL_OK; 5148c2ecf20Sopenharmony_ci} 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci/* Undo burrows-wheeler transform on intermediate buffer to produce output. 5178c2ecf20Sopenharmony_ci If start_bunzip was initialized with out_fd =-1, then up to len bytes of 5188c2ecf20Sopenharmony_ci data are written to outbuf. Return value is number of bytes written or 5198c2ecf20Sopenharmony_ci error (all errors are negative numbers). If out_fd!=-1, outbuf and len 5208c2ecf20Sopenharmony_ci are ignored, data is written to out_fd and return is RETVAL_OK or error. 5218c2ecf20Sopenharmony_ci*/ 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_cistatic int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) 5248c2ecf20Sopenharmony_ci{ 5258c2ecf20Sopenharmony_ci const unsigned int *dbuf; 5268c2ecf20Sopenharmony_ci int pos, xcurrent, previous, gotcount; 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci /* If last read was short due to end of file, return last block now */ 5298c2ecf20Sopenharmony_ci if (bd->writeCount < 0) 5308c2ecf20Sopenharmony_ci return bd->writeCount; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci gotcount = 0; 5338c2ecf20Sopenharmony_ci dbuf = bd->dbuf; 5348c2ecf20Sopenharmony_ci pos = bd->writePos; 5358c2ecf20Sopenharmony_ci xcurrent = bd->writeCurrent; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci /* We will always have pending decoded data to write into the output 5388c2ecf20Sopenharmony_ci buffer unless this is the very first call (in which case we haven't 5398c2ecf20Sopenharmony_ci Huffman-decoded a block into the intermediate buffer yet). */ 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci if (bd->writeCopies) { 5428c2ecf20Sopenharmony_ci /* Inside the loop, writeCopies means extra copies (beyond 1) */ 5438c2ecf20Sopenharmony_ci --bd->writeCopies; 5448c2ecf20Sopenharmony_ci /* Loop outputting bytes */ 5458c2ecf20Sopenharmony_ci for (;;) { 5468c2ecf20Sopenharmony_ci /* If the output buffer is full, snapshot 5478c2ecf20Sopenharmony_ci * state and return */ 5488c2ecf20Sopenharmony_ci if (gotcount >= len) { 5498c2ecf20Sopenharmony_ci bd->writePos = pos; 5508c2ecf20Sopenharmony_ci bd->writeCurrent = xcurrent; 5518c2ecf20Sopenharmony_ci bd->writeCopies++; 5528c2ecf20Sopenharmony_ci return len; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci /* Write next byte into output buffer, updating CRC */ 5558c2ecf20Sopenharmony_ci outbuf[gotcount++] = xcurrent; 5568c2ecf20Sopenharmony_ci bd->writeCRC = (((bd->writeCRC) << 8) 5578c2ecf20Sopenharmony_ci ^bd->crc32Table[((bd->writeCRC) >> 24) 5588c2ecf20Sopenharmony_ci ^xcurrent]); 5598c2ecf20Sopenharmony_ci /* Loop now if we're outputting multiple 5608c2ecf20Sopenharmony_ci * copies of this byte */ 5618c2ecf20Sopenharmony_ci if (bd->writeCopies) { 5628c2ecf20Sopenharmony_ci --bd->writeCopies; 5638c2ecf20Sopenharmony_ci continue; 5648c2ecf20Sopenharmony_ci } 5658c2ecf20Sopenharmony_cidecode_next_byte: 5668c2ecf20Sopenharmony_ci if (!bd->writeCount--) 5678c2ecf20Sopenharmony_ci break; 5688c2ecf20Sopenharmony_ci /* Follow sequence vector to undo 5698c2ecf20Sopenharmony_ci * Burrows-Wheeler transform */ 5708c2ecf20Sopenharmony_ci previous = xcurrent; 5718c2ecf20Sopenharmony_ci pos = dbuf[pos]; 5728c2ecf20Sopenharmony_ci xcurrent = pos&0xff; 5738c2ecf20Sopenharmony_ci pos >>= 8; 5748c2ecf20Sopenharmony_ci /* After 3 consecutive copies of the same 5758c2ecf20Sopenharmony_ci byte, the 4th is a repeat count. We count 5768c2ecf20Sopenharmony_ci down from 4 instead *of counting up because 5778c2ecf20Sopenharmony_ci testing for non-zero is faster */ 5788c2ecf20Sopenharmony_ci if (--bd->writeRunCountdown) { 5798c2ecf20Sopenharmony_ci if (xcurrent != previous) 5808c2ecf20Sopenharmony_ci bd->writeRunCountdown = 4; 5818c2ecf20Sopenharmony_ci } else { 5828c2ecf20Sopenharmony_ci /* We have a repeated run, this byte 5838c2ecf20Sopenharmony_ci * indicates the count */ 5848c2ecf20Sopenharmony_ci bd->writeCopies = xcurrent; 5858c2ecf20Sopenharmony_ci xcurrent = previous; 5868c2ecf20Sopenharmony_ci bd->writeRunCountdown = 5; 5878c2ecf20Sopenharmony_ci /* Sometimes there are just 3 bytes 5888c2ecf20Sopenharmony_ci * (run length 0) */ 5898c2ecf20Sopenharmony_ci if (!bd->writeCopies) 5908c2ecf20Sopenharmony_ci goto decode_next_byte; 5918c2ecf20Sopenharmony_ci /* Subtract the 1 copy we'd output 5928c2ecf20Sopenharmony_ci * anyway to get extras */ 5938c2ecf20Sopenharmony_ci --bd->writeCopies; 5948c2ecf20Sopenharmony_ci } 5958c2ecf20Sopenharmony_ci } 5968c2ecf20Sopenharmony_ci /* Decompression of this block completed successfully */ 5978c2ecf20Sopenharmony_ci bd->writeCRC = ~bd->writeCRC; 5988c2ecf20Sopenharmony_ci bd->totalCRC = ((bd->totalCRC << 1) | 5998c2ecf20Sopenharmony_ci (bd->totalCRC >> 31)) ^ bd->writeCRC; 6008c2ecf20Sopenharmony_ci /* If this block had a CRC error, force file level CRC error. */ 6018c2ecf20Sopenharmony_ci if (bd->writeCRC != bd->headerCRC) { 6028c2ecf20Sopenharmony_ci bd->totalCRC = bd->headerCRC+1; 6038c2ecf20Sopenharmony_ci return RETVAL_LAST_BLOCK; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci /* Refill the intermediate buffer by Huffman-decoding next 6088c2ecf20Sopenharmony_ci * block of input */ 6098c2ecf20Sopenharmony_ci /* (previous is just a convenient unused temp variable here) */ 6108c2ecf20Sopenharmony_ci previous = get_next_block(bd); 6118c2ecf20Sopenharmony_ci if (previous) { 6128c2ecf20Sopenharmony_ci bd->writeCount = previous; 6138c2ecf20Sopenharmony_ci return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; 6148c2ecf20Sopenharmony_ci } 6158c2ecf20Sopenharmony_ci bd->writeCRC = 0xffffffffUL; 6168c2ecf20Sopenharmony_ci pos = bd->writePos; 6178c2ecf20Sopenharmony_ci xcurrent = bd->writeCurrent; 6188c2ecf20Sopenharmony_ci goto decode_next_byte; 6198c2ecf20Sopenharmony_ci} 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_cistatic long INIT nofill(void *buf, unsigned long len) 6228c2ecf20Sopenharmony_ci{ 6238c2ecf20Sopenharmony_ci return -1; 6248c2ecf20Sopenharmony_ci} 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain 6278c2ecf20Sopenharmony_ci a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are 6288c2ecf20Sopenharmony_ci ignored, and data is read from file handle into temporary buffer. */ 6298c2ecf20Sopenharmony_cistatic int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len, 6308c2ecf20Sopenharmony_ci long (*fill)(void*, unsigned long)) 6318c2ecf20Sopenharmony_ci{ 6328c2ecf20Sopenharmony_ci struct bunzip_data *bd; 6338c2ecf20Sopenharmony_ci unsigned int i, j, c; 6348c2ecf20Sopenharmony_ci const unsigned int BZh0 = 6358c2ecf20Sopenharmony_ci (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) 6368c2ecf20Sopenharmony_ci +(((unsigned int)'h') << 8)+(unsigned int)'0'; 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci /* Figure out how much data to allocate */ 6398c2ecf20Sopenharmony_ci i = sizeof(struct bunzip_data); 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci /* Allocate bunzip_data. Most fields initialize to zero. */ 6428c2ecf20Sopenharmony_ci bd = *bdp = malloc(i); 6438c2ecf20Sopenharmony_ci if (!bd) 6448c2ecf20Sopenharmony_ci return RETVAL_OUT_OF_MEMORY; 6458c2ecf20Sopenharmony_ci memset(bd, 0, sizeof(struct bunzip_data)); 6468c2ecf20Sopenharmony_ci /* Setup input buffer */ 6478c2ecf20Sopenharmony_ci bd->inbuf = inbuf; 6488c2ecf20Sopenharmony_ci bd->inbufCount = len; 6498c2ecf20Sopenharmony_ci if (fill != NULL) 6508c2ecf20Sopenharmony_ci bd->fill = fill; 6518c2ecf20Sopenharmony_ci else 6528c2ecf20Sopenharmony_ci bd->fill = nofill; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci /* Init the CRC32 table (big endian) */ 6558c2ecf20Sopenharmony_ci for (i = 0; i < 256; i++) { 6568c2ecf20Sopenharmony_ci c = i << 24; 6578c2ecf20Sopenharmony_ci for (j = 8; j; j--) 6588c2ecf20Sopenharmony_ci c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1); 6598c2ecf20Sopenharmony_ci bd->crc32Table[i] = c; 6608c2ecf20Sopenharmony_ci } 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci /* Ensure that file starts with "BZh['1'-'9']." */ 6638c2ecf20Sopenharmony_ci i = get_bits(bd, 32); 6648c2ecf20Sopenharmony_ci if (((unsigned int)(i-BZh0-1)) >= 9) 6658c2ecf20Sopenharmony_ci return RETVAL_NOT_BZIP_DATA; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of 6688c2ecf20Sopenharmony_ci uncompressed data. Allocate intermediate buffer for block. */ 6698c2ecf20Sopenharmony_ci bd->dbufSize = 100000*(i-BZh0); 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); 6728c2ecf20Sopenharmony_ci if (!bd->dbuf) 6738c2ecf20Sopenharmony_ci return RETVAL_OUT_OF_MEMORY; 6748c2ecf20Sopenharmony_ci return RETVAL_OK; 6758c2ecf20Sopenharmony_ci} 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, 6788c2ecf20Sopenharmony_ci not end of file.) */ 6798c2ecf20Sopenharmony_ciSTATIC int INIT bunzip2(unsigned char *buf, long len, 6808c2ecf20Sopenharmony_ci long (*fill)(void*, unsigned long), 6818c2ecf20Sopenharmony_ci long (*flush)(void*, unsigned long), 6828c2ecf20Sopenharmony_ci unsigned char *outbuf, 6838c2ecf20Sopenharmony_ci long *pos, 6848c2ecf20Sopenharmony_ci void(*error)(char *x)) 6858c2ecf20Sopenharmony_ci{ 6868c2ecf20Sopenharmony_ci struct bunzip_data *bd; 6878c2ecf20Sopenharmony_ci int i = -1; 6888c2ecf20Sopenharmony_ci unsigned char *inbuf; 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci if (flush) 6918c2ecf20Sopenharmony_ci outbuf = malloc(BZIP2_IOBUF_SIZE); 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci if (!outbuf) { 6948c2ecf20Sopenharmony_ci error("Could not allocate output buffer"); 6958c2ecf20Sopenharmony_ci return RETVAL_OUT_OF_MEMORY; 6968c2ecf20Sopenharmony_ci } 6978c2ecf20Sopenharmony_ci if (buf) 6988c2ecf20Sopenharmony_ci inbuf = buf; 6998c2ecf20Sopenharmony_ci else 7008c2ecf20Sopenharmony_ci inbuf = malloc(BZIP2_IOBUF_SIZE); 7018c2ecf20Sopenharmony_ci if (!inbuf) { 7028c2ecf20Sopenharmony_ci error("Could not allocate input buffer"); 7038c2ecf20Sopenharmony_ci i = RETVAL_OUT_OF_MEMORY; 7048c2ecf20Sopenharmony_ci goto exit_0; 7058c2ecf20Sopenharmony_ci } 7068c2ecf20Sopenharmony_ci i = start_bunzip(&bd, inbuf, len, fill); 7078c2ecf20Sopenharmony_ci if (!i) { 7088c2ecf20Sopenharmony_ci for (;;) { 7098c2ecf20Sopenharmony_ci i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); 7108c2ecf20Sopenharmony_ci if (i <= 0) 7118c2ecf20Sopenharmony_ci break; 7128c2ecf20Sopenharmony_ci if (!flush) 7138c2ecf20Sopenharmony_ci outbuf += i; 7148c2ecf20Sopenharmony_ci else 7158c2ecf20Sopenharmony_ci if (i != flush(outbuf, i)) { 7168c2ecf20Sopenharmony_ci i = RETVAL_UNEXPECTED_OUTPUT_EOF; 7178c2ecf20Sopenharmony_ci break; 7188c2ecf20Sopenharmony_ci } 7198c2ecf20Sopenharmony_ci } 7208c2ecf20Sopenharmony_ci } 7218c2ecf20Sopenharmony_ci /* Check CRC and release memory */ 7228c2ecf20Sopenharmony_ci if (i == RETVAL_LAST_BLOCK) { 7238c2ecf20Sopenharmony_ci if (bd->headerCRC != bd->totalCRC) 7248c2ecf20Sopenharmony_ci error("Data integrity error when decompressing."); 7258c2ecf20Sopenharmony_ci else 7268c2ecf20Sopenharmony_ci i = RETVAL_OK; 7278c2ecf20Sopenharmony_ci } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { 7288c2ecf20Sopenharmony_ci error("Compressed file ends unexpectedly"); 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci if (!bd) 7318c2ecf20Sopenharmony_ci goto exit_1; 7328c2ecf20Sopenharmony_ci if (bd->dbuf) 7338c2ecf20Sopenharmony_ci large_free(bd->dbuf); 7348c2ecf20Sopenharmony_ci if (pos) 7358c2ecf20Sopenharmony_ci *pos = bd->inbufPos; 7368c2ecf20Sopenharmony_ci free(bd); 7378c2ecf20Sopenharmony_ciexit_1: 7388c2ecf20Sopenharmony_ci if (!buf) 7398c2ecf20Sopenharmony_ci free(inbuf); 7408c2ecf20Sopenharmony_ciexit_0: 7418c2ecf20Sopenharmony_ci if (flush) 7428c2ecf20Sopenharmony_ci free(outbuf); 7438c2ecf20Sopenharmony_ci return i; 7448c2ecf20Sopenharmony_ci} 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci#ifdef PREBOOT 7478c2ecf20Sopenharmony_ciSTATIC int INIT __decompress(unsigned char *buf, long len, 7488c2ecf20Sopenharmony_ci long (*fill)(void*, unsigned long), 7498c2ecf20Sopenharmony_ci long (*flush)(void*, unsigned long), 7508c2ecf20Sopenharmony_ci unsigned char *outbuf, long olen, 7518c2ecf20Sopenharmony_ci long *pos, 7528c2ecf20Sopenharmony_ci void (*error)(char *x)) 7538c2ecf20Sopenharmony_ci{ 7548c2ecf20Sopenharmony_ci return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); 7558c2ecf20Sopenharmony_ci} 7568c2ecf20Sopenharmony_ci#endif 757