18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * LZMA2 decoder 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Authors: Lasse Collin <lasse.collin@tukaani.org> 58c2ecf20Sopenharmony_ci * Igor Pavlov <https://7-zip.org/> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This file has been put into the public domain. 88c2ecf20Sopenharmony_ci * You can do whatever you want with this file. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include "xz_private.h" 128c2ecf20Sopenharmony_ci#include "xz_lzma2.h" 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci/* 158c2ecf20Sopenharmony_ci * Range decoder initialization eats the first five bytes of each LZMA chunk. 168c2ecf20Sopenharmony_ci */ 178c2ecf20Sopenharmony_ci#define RC_INIT_BYTES 5 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci/* 208c2ecf20Sopenharmony_ci * Minimum number of usable input buffer to safely decode one LZMA symbol. 218c2ecf20Sopenharmony_ci * The worst case is that we decode 22 bits using probabilities and 26 228c2ecf20Sopenharmony_ci * direct bits. This may decode at maximum of 20 bytes of input. However, 238c2ecf20Sopenharmony_ci * lzma_main() does an extra normalization before returning, thus we 248c2ecf20Sopenharmony_ci * need to put 21 here. 258c2ecf20Sopenharmony_ci */ 268c2ecf20Sopenharmony_ci#define LZMA_IN_REQUIRED 21 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * Dictionary (history buffer) 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * These are always true: 328c2ecf20Sopenharmony_ci * start <= pos <= full <= end 338c2ecf20Sopenharmony_ci * pos <= limit <= end 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * In multi-call mode, also these are true: 368c2ecf20Sopenharmony_ci * end == size 378c2ecf20Sopenharmony_ci * size <= size_max 388c2ecf20Sopenharmony_ci * allocated <= size 398c2ecf20Sopenharmony_ci * 408c2ecf20Sopenharmony_ci * Most of these variables are size_t to support single-call mode, 418c2ecf20Sopenharmony_ci * in which the dictionary variables address the actual output 428c2ecf20Sopenharmony_ci * buffer directly. 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_cistruct dictionary { 458c2ecf20Sopenharmony_ci /* Beginning of the history buffer */ 468c2ecf20Sopenharmony_ci uint8_t *buf; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci /* Old position in buf (before decoding more data) */ 498c2ecf20Sopenharmony_ci size_t start; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci /* Position in buf */ 528c2ecf20Sopenharmony_ci size_t pos; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci /* 558c2ecf20Sopenharmony_ci * How full dictionary is. This is used to detect corrupt input that 568c2ecf20Sopenharmony_ci * would read beyond the beginning of the uncompressed stream. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ci size_t full; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci /* Write limit; we don't write to buf[limit] or later bytes. */ 618c2ecf20Sopenharmony_ci size_t limit; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci /* 648c2ecf20Sopenharmony_ci * End of the dictionary buffer. In multi-call mode, this is 658c2ecf20Sopenharmony_ci * the same as the dictionary size. In single-call mode, this 668c2ecf20Sopenharmony_ci * indicates the size of the output buffer. 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_ci size_t end; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci /* 718c2ecf20Sopenharmony_ci * Size of the dictionary as specified in Block Header. This is used 728c2ecf20Sopenharmony_ci * together with "full" to detect corrupt input that would make us 738c2ecf20Sopenharmony_ci * read beyond the beginning of the uncompressed stream. 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_ci uint32_t size; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci /* 788c2ecf20Sopenharmony_ci * Maximum allowed dictionary size in multi-call mode. 798c2ecf20Sopenharmony_ci * This is ignored in single-call mode. 808c2ecf20Sopenharmony_ci */ 818c2ecf20Sopenharmony_ci uint32_t size_max; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci /* 848c2ecf20Sopenharmony_ci * Amount of memory currently allocated for the dictionary. 858c2ecf20Sopenharmony_ci * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, 868c2ecf20Sopenharmony_ci * size_max is always the same as the allocated size.) 878c2ecf20Sopenharmony_ci */ 888c2ecf20Sopenharmony_ci uint32_t allocated; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci /* Operation mode */ 918c2ecf20Sopenharmony_ci enum xz_mode mode; 928c2ecf20Sopenharmony_ci}; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci/* Range decoder */ 958c2ecf20Sopenharmony_cistruct rc_dec { 968c2ecf20Sopenharmony_ci uint32_t range; 978c2ecf20Sopenharmony_ci uint32_t code; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci /* 1008c2ecf20Sopenharmony_ci * Number of initializing bytes remaining to be read 1018c2ecf20Sopenharmony_ci * by rc_read_init(). 1028c2ecf20Sopenharmony_ci */ 1038c2ecf20Sopenharmony_ci uint32_t init_bytes_left; 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci /* 1068c2ecf20Sopenharmony_ci * Buffer from which we read our input. It can be either 1078c2ecf20Sopenharmony_ci * temp.buf or the caller-provided input buffer. 1088c2ecf20Sopenharmony_ci */ 1098c2ecf20Sopenharmony_ci const uint8_t *in; 1108c2ecf20Sopenharmony_ci size_t in_pos; 1118c2ecf20Sopenharmony_ci size_t in_limit; 1128c2ecf20Sopenharmony_ci}; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci/* Probabilities for a length decoder. */ 1158c2ecf20Sopenharmony_cistruct lzma_len_dec { 1168c2ecf20Sopenharmony_ci /* Probability of match length being at least 10 */ 1178c2ecf20Sopenharmony_ci uint16_t choice; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci /* Probability of match length being at least 18 */ 1208c2ecf20Sopenharmony_ci uint16_t choice2; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci /* Probabilities for match lengths 2-9 */ 1238c2ecf20Sopenharmony_ci uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci /* Probabilities for match lengths 10-17 */ 1268c2ecf20Sopenharmony_ci uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci /* Probabilities for match lengths 18-273 */ 1298c2ecf20Sopenharmony_ci uint16_t high[LEN_HIGH_SYMBOLS]; 1308c2ecf20Sopenharmony_ci}; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_cistruct lzma_dec { 1338c2ecf20Sopenharmony_ci /* Distances of latest four matches */ 1348c2ecf20Sopenharmony_ci uint32_t rep0; 1358c2ecf20Sopenharmony_ci uint32_t rep1; 1368c2ecf20Sopenharmony_ci uint32_t rep2; 1378c2ecf20Sopenharmony_ci uint32_t rep3; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci /* Types of the most recently seen LZMA symbols */ 1408c2ecf20Sopenharmony_ci enum lzma_state state; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci /* 1438c2ecf20Sopenharmony_ci * Length of a match. This is updated so that dict_repeat can 1448c2ecf20Sopenharmony_ci * be called again to finish repeating the whole match. 1458c2ecf20Sopenharmony_ci */ 1468c2ecf20Sopenharmony_ci uint32_t len; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci /* 1498c2ecf20Sopenharmony_ci * LZMA properties or related bit masks (number of literal 1508c2ecf20Sopenharmony_ci * context bits, a mask dervied from the number of literal 1518c2ecf20Sopenharmony_ci * position bits, and a mask dervied from the number 1528c2ecf20Sopenharmony_ci * position bits) 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_ci uint32_t lc; 1558c2ecf20Sopenharmony_ci uint32_t literal_pos_mask; /* (1 << lp) - 1 */ 1568c2ecf20Sopenharmony_ci uint32_t pos_mask; /* (1 << pb) - 1 */ 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ 1598c2ecf20Sopenharmony_ci uint16_t is_match[STATES][POS_STATES_MAX]; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ 1628c2ecf20Sopenharmony_ci uint16_t is_rep[STATES]; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci /* 1658c2ecf20Sopenharmony_ci * If 0, distance of a repeated match is rep0. 1668c2ecf20Sopenharmony_ci * Otherwise check is_rep1. 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_ci uint16_t is_rep0[STATES]; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* 1718c2ecf20Sopenharmony_ci * If 0, distance of a repeated match is rep1. 1728c2ecf20Sopenharmony_ci * Otherwise check is_rep2. 1738c2ecf20Sopenharmony_ci */ 1748c2ecf20Sopenharmony_ci uint16_t is_rep1[STATES]; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ 1778c2ecf20Sopenharmony_ci uint16_t is_rep2[STATES]; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci /* 1808c2ecf20Sopenharmony_ci * If 1, the repeated match has length of one byte. Otherwise 1818c2ecf20Sopenharmony_ci * the length is decoded from rep_len_decoder. 1828c2ecf20Sopenharmony_ci */ 1838c2ecf20Sopenharmony_ci uint16_t is_rep0_long[STATES][POS_STATES_MAX]; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci /* 1868c2ecf20Sopenharmony_ci * Probability tree for the highest two bits of the match 1878c2ecf20Sopenharmony_ci * distance. There is a separate probability tree for match 1888c2ecf20Sopenharmony_ci * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_ci uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci /* 1938c2ecf20Sopenharmony_ci * Probility trees for additional bits for match distance 1948c2ecf20Sopenharmony_ci * when the distance is in the range [4, 127]. 1958c2ecf20Sopenharmony_ci */ 1968c2ecf20Sopenharmony_ci uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci /* 1998c2ecf20Sopenharmony_ci * Probability tree for the lowest four bits of a match 2008c2ecf20Sopenharmony_ci * distance that is equal to or greater than 128. 2018c2ecf20Sopenharmony_ci */ 2028c2ecf20Sopenharmony_ci uint16_t dist_align[ALIGN_SIZE]; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci /* Length of a normal match */ 2058c2ecf20Sopenharmony_ci struct lzma_len_dec match_len_dec; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* Length of a repeated match */ 2088c2ecf20Sopenharmony_ci struct lzma_len_dec rep_len_dec; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* Probabilities of literals */ 2118c2ecf20Sopenharmony_ci uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; 2128c2ecf20Sopenharmony_ci}; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistruct lzma2_dec { 2158c2ecf20Sopenharmony_ci /* Position in xz_dec_lzma2_run(). */ 2168c2ecf20Sopenharmony_ci enum lzma2_seq { 2178c2ecf20Sopenharmony_ci SEQ_CONTROL, 2188c2ecf20Sopenharmony_ci SEQ_UNCOMPRESSED_1, 2198c2ecf20Sopenharmony_ci SEQ_UNCOMPRESSED_2, 2208c2ecf20Sopenharmony_ci SEQ_COMPRESSED_0, 2218c2ecf20Sopenharmony_ci SEQ_COMPRESSED_1, 2228c2ecf20Sopenharmony_ci SEQ_PROPERTIES, 2238c2ecf20Sopenharmony_ci SEQ_LZMA_PREPARE, 2248c2ecf20Sopenharmony_ci SEQ_LZMA_RUN, 2258c2ecf20Sopenharmony_ci SEQ_COPY 2268c2ecf20Sopenharmony_ci } sequence; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci /* Next position after decoding the compressed size of the chunk. */ 2298c2ecf20Sopenharmony_ci enum lzma2_seq next_sequence; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ 2328c2ecf20Sopenharmony_ci uint32_t uncompressed; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci /* 2358c2ecf20Sopenharmony_ci * Compressed size of LZMA chunk or compressed/uncompressed 2368c2ecf20Sopenharmony_ci * size of uncompressed chunk (64 KiB at maximum) 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_ci uint32_t compressed; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci /* 2418c2ecf20Sopenharmony_ci * True if dictionary reset is needed. This is false before 2428c2ecf20Sopenharmony_ci * the first chunk (LZMA or uncompressed). 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_ci bool need_dict_reset; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci /* 2478c2ecf20Sopenharmony_ci * True if new LZMA properties are needed. This is false 2488c2ecf20Sopenharmony_ci * before the first LZMA chunk. 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_ci bool need_props; 2518c2ecf20Sopenharmony_ci}; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_cistruct xz_dec_lzma2 { 2548c2ecf20Sopenharmony_ci /* 2558c2ecf20Sopenharmony_ci * The order below is important on x86 to reduce code size and 2568c2ecf20Sopenharmony_ci * it shouldn't hurt on other platforms. Everything up to and 2578c2ecf20Sopenharmony_ci * including lzma.pos_mask are in the first 128 bytes on x86-32, 2588c2ecf20Sopenharmony_ci * which allows using smaller instructions to access those 2598c2ecf20Sopenharmony_ci * variables. On x86-64, fewer variables fit into the first 128 2608c2ecf20Sopenharmony_ci * bytes, but this is still the best order without sacrificing 2618c2ecf20Sopenharmony_ci * the readability by splitting the structures. 2628c2ecf20Sopenharmony_ci */ 2638c2ecf20Sopenharmony_ci struct rc_dec rc; 2648c2ecf20Sopenharmony_ci struct dictionary dict; 2658c2ecf20Sopenharmony_ci struct lzma2_dec lzma2; 2668c2ecf20Sopenharmony_ci struct lzma_dec lzma; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci /* 2698c2ecf20Sopenharmony_ci * Temporary buffer which holds small number of input bytes between 2708c2ecf20Sopenharmony_ci * decoder calls. See lzma2_lzma() for details. 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_ci struct { 2738c2ecf20Sopenharmony_ci uint32_t size; 2748c2ecf20Sopenharmony_ci uint8_t buf[3 * LZMA_IN_REQUIRED]; 2758c2ecf20Sopenharmony_ci } temp; 2768c2ecf20Sopenharmony_ci}; 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci/************** 2798c2ecf20Sopenharmony_ci * Dictionary * 2808c2ecf20Sopenharmony_ci **************/ 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci/* 2838c2ecf20Sopenharmony_ci * Reset the dictionary state. When in single-call mode, set up the beginning 2848c2ecf20Sopenharmony_ci * of the dictionary to point to the actual output buffer. 2858c2ecf20Sopenharmony_ci */ 2868c2ecf20Sopenharmony_cistatic void dict_reset(struct dictionary *dict, struct xz_buf *b) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci if (DEC_IS_SINGLE(dict->mode)) { 2898c2ecf20Sopenharmony_ci dict->buf = b->out + b->out_pos; 2908c2ecf20Sopenharmony_ci dict->end = b->out_size - b->out_pos; 2918c2ecf20Sopenharmony_ci } 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci dict->start = 0; 2948c2ecf20Sopenharmony_ci dict->pos = 0; 2958c2ecf20Sopenharmony_ci dict->limit = 0; 2968c2ecf20Sopenharmony_ci dict->full = 0; 2978c2ecf20Sopenharmony_ci} 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci/* Set dictionary write limit */ 3008c2ecf20Sopenharmony_cistatic void dict_limit(struct dictionary *dict, size_t out_max) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci if (dict->end - dict->pos <= out_max) 3038c2ecf20Sopenharmony_ci dict->limit = dict->end; 3048c2ecf20Sopenharmony_ci else 3058c2ecf20Sopenharmony_ci dict->limit = dict->pos + out_max; 3068c2ecf20Sopenharmony_ci} 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci/* Return true if at least one byte can be written into the dictionary. */ 3098c2ecf20Sopenharmony_cistatic inline bool dict_has_space(const struct dictionary *dict) 3108c2ecf20Sopenharmony_ci{ 3118c2ecf20Sopenharmony_ci return dict->pos < dict->limit; 3128c2ecf20Sopenharmony_ci} 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci/* 3158c2ecf20Sopenharmony_ci * Get a byte from the dictionary at the given distance. The distance is 3168c2ecf20Sopenharmony_ci * assumed to valid, or as a special case, zero when the dictionary is 3178c2ecf20Sopenharmony_ci * still empty. This special case is needed for single-call decoding to 3188c2ecf20Sopenharmony_ci * avoid writing a '\0' to the end of the destination buffer. 3198c2ecf20Sopenharmony_ci */ 3208c2ecf20Sopenharmony_cistatic inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist) 3218c2ecf20Sopenharmony_ci{ 3228c2ecf20Sopenharmony_ci size_t offset = dict->pos - dist - 1; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (dist >= dict->pos) 3258c2ecf20Sopenharmony_ci offset += dict->end; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci return dict->full > 0 ? dict->buf[offset] : 0; 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci/* 3318c2ecf20Sopenharmony_ci * Put one byte into the dictionary. It is assumed that there is space for it. 3328c2ecf20Sopenharmony_ci */ 3338c2ecf20Sopenharmony_cistatic inline void dict_put(struct dictionary *dict, uint8_t byte) 3348c2ecf20Sopenharmony_ci{ 3358c2ecf20Sopenharmony_ci dict->buf[dict->pos++] = byte; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci if (dict->full < dict->pos) 3388c2ecf20Sopenharmony_ci dict->full = dict->pos; 3398c2ecf20Sopenharmony_ci} 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci/* 3428c2ecf20Sopenharmony_ci * Repeat given number of bytes from the given distance. If the distance is 3438c2ecf20Sopenharmony_ci * invalid, false is returned. On success, true is returned and *len is 3448c2ecf20Sopenharmony_ci * updated to indicate how many bytes were left to be repeated. 3458c2ecf20Sopenharmony_ci */ 3468c2ecf20Sopenharmony_cistatic bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist) 3478c2ecf20Sopenharmony_ci{ 3488c2ecf20Sopenharmony_ci size_t back; 3498c2ecf20Sopenharmony_ci uint32_t left; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci if (dist >= dict->full || dist >= dict->size) 3528c2ecf20Sopenharmony_ci return false; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci left = min_t(size_t, dict->limit - dict->pos, *len); 3558c2ecf20Sopenharmony_ci *len -= left; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci back = dict->pos - dist - 1; 3588c2ecf20Sopenharmony_ci if (dist >= dict->pos) 3598c2ecf20Sopenharmony_ci back += dict->end; 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci do { 3628c2ecf20Sopenharmony_ci dict->buf[dict->pos++] = dict->buf[back++]; 3638c2ecf20Sopenharmony_ci if (back == dict->end) 3648c2ecf20Sopenharmony_ci back = 0; 3658c2ecf20Sopenharmony_ci } while (--left > 0); 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci if (dict->full < dict->pos) 3688c2ecf20Sopenharmony_ci dict->full = dict->pos; 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci return true; 3718c2ecf20Sopenharmony_ci} 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci/* Copy uncompressed data as is from input to dictionary and output buffers. */ 3748c2ecf20Sopenharmony_cistatic void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, 3758c2ecf20Sopenharmony_ci uint32_t *left) 3768c2ecf20Sopenharmony_ci{ 3778c2ecf20Sopenharmony_ci size_t copy_size; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci while (*left > 0 && b->in_pos < b->in_size 3808c2ecf20Sopenharmony_ci && b->out_pos < b->out_size) { 3818c2ecf20Sopenharmony_ci copy_size = min(b->in_size - b->in_pos, 3828c2ecf20Sopenharmony_ci b->out_size - b->out_pos); 3838c2ecf20Sopenharmony_ci if (copy_size > dict->end - dict->pos) 3848c2ecf20Sopenharmony_ci copy_size = dict->end - dict->pos; 3858c2ecf20Sopenharmony_ci if (copy_size > *left) 3868c2ecf20Sopenharmony_ci copy_size = *left; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci *left -= copy_size; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci /* 3918c2ecf20Sopenharmony_ci * If doing in-place decompression in single-call mode and the 3928c2ecf20Sopenharmony_ci * uncompressed size of the file is larger than the caller 3938c2ecf20Sopenharmony_ci * thought (i.e. it is invalid input!), the buffers below may 3948c2ecf20Sopenharmony_ci * overlap and cause undefined behavior with memcpy(). 3958c2ecf20Sopenharmony_ci * With valid inputs memcpy() would be fine here. 3968c2ecf20Sopenharmony_ci */ 3978c2ecf20Sopenharmony_ci memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size); 3988c2ecf20Sopenharmony_ci dict->pos += copy_size; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci if (dict->full < dict->pos) 4018c2ecf20Sopenharmony_ci dict->full = dict->pos; 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci if (DEC_IS_MULTI(dict->mode)) { 4048c2ecf20Sopenharmony_ci if (dict->pos == dict->end) 4058c2ecf20Sopenharmony_ci dict->pos = 0; 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci /* 4088c2ecf20Sopenharmony_ci * Like above but for multi-call mode: use memmove() 4098c2ecf20Sopenharmony_ci * to avoid undefined behavior with invalid input. 4108c2ecf20Sopenharmony_ci */ 4118c2ecf20Sopenharmony_ci memmove(b->out + b->out_pos, b->in + b->in_pos, 4128c2ecf20Sopenharmony_ci copy_size); 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci dict->start = dict->pos; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci b->out_pos += copy_size; 4188c2ecf20Sopenharmony_ci b->in_pos += copy_size; 4198c2ecf20Sopenharmony_ci } 4208c2ecf20Sopenharmony_ci} 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci/* 4238c2ecf20Sopenharmony_ci * Flush pending data from dictionary to b->out. It is assumed that there is 4248c2ecf20Sopenharmony_ci * enough space in b->out. This is guaranteed because caller uses dict_limit() 4258c2ecf20Sopenharmony_ci * before decoding data into the dictionary. 4268c2ecf20Sopenharmony_ci */ 4278c2ecf20Sopenharmony_cistatic uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci size_t copy_size = dict->pos - dict->start; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci if (DEC_IS_MULTI(dict->mode)) { 4328c2ecf20Sopenharmony_ci if (dict->pos == dict->end) 4338c2ecf20Sopenharmony_ci dict->pos = 0; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci /* 4368c2ecf20Sopenharmony_ci * These buffers cannot overlap even if doing in-place 4378c2ecf20Sopenharmony_ci * decompression because in multi-call mode dict->buf 4388c2ecf20Sopenharmony_ci * has been allocated by us in this file; it's not 4398c2ecf20Sopenharmony_ci * provided by the caller like in single-call mode. 4408c2ecf20Sopenharmony_ci */ 4418c2ecf20Sopenharmony_ci memcpy(b->out + b->out_pos, dict->buf + dict->start, 4428c2ecf20Sopenharmony_ci copy_size); 4438c2ecf20Sopenharmony_ci } 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci dict->start = dict->pos; 4468c2ecf20Sopenharmony_ci b->out_pos += copy_size; 4478c2ecf20Sopenharmony_ci return copy_size; 4488c2ecf20Sopenharmony_ci} 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci/***************** 4518c2ecf20Sopenharmony_ci * Range decoder * 4528c2ecf20Sopenharmony_ci *****************/ 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci/* Reset the range decoder. */ 4558c2ecf20Sopenharmony_cistatic void rc_reset(struct rc_dec *rc) 4568c2ecf20Sopenharmony_ci{ 4578c2ecf20Sopenharmony_ci rc->range = (uint32_t)-1; 4588c2ecf20Sopenharmony_ci rc->code = 0; 4598c2ecf20Sopenharmony_ci rc->init_bytes_left = RC_INIT_BYTES; 4608c2ecf20Sopenharmony_ci} 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci/* 4638c2ecf20Sopenharmony_ci * Read the first five initial bytes into rc->code if they haven't been 4648c2ecf20Sopenharmony_ci * read already. (Yes, the first byte gets completely ignored.) 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_cistatic bool rc_read_init(struct rc_dec *rc, struct xz_buf *b) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci while (rc->init_bytes_left > 0) { 4698c2ecf20Sopenharmony_ci if (b->in_pos == b->in_size) 4708c2ecf20Sopenharmony_ci return false; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci rc->code = (rc->code << 8) + b->in[b->in_pos++]; 4738c2ecf20Sopenharmony_ci --rc->init_bytes_left; 4748c2ecf20Sopenharmony_ci } 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci return true; 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci/* Return true if there may not be enough input for the next decoding loop. */ 4808c2ecf20Sopenharmony_cistatic inline bool rc_limit_exceeded(const struct rc_dec *rc) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci return rc->in_pos > rc->in_limit; 4838c2ecf20Sopenharmony_ci} 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci/* 4868c2ecf20Sopenharmony_ci * Return true if it is possible (from point of view of range decoder) that 4878c2ecf20Sopenharmony_ci * we have reached the end of the LZMA chunk. 4888c2ecf20Sopenharmony_ci */ 4898c2ecf20Sopenharmony_cistatic inline bool rc_is_finished(const struct rc_dec *rc) 4908c2ecf20Sopenharmony_ci{ 4918c2ecf20Sopenharmony_ci return rc->code == 0; 4928c2ecf20Sopenharmony_ci} 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci/* Read the next input byte if needed. */ 4958c2ecf20Sopenharmony_cistatic __always_inline void rc_normalize(struct rc_dec *rc) 4968c2ecf20Sopenharmony_ci{ 4978c2ecf20Sopenharmony_ci if (rc->range < RC_TOP_VALUE) { 4988c2ecf20Sopenharmony_ci rc->range <<= RC_SHIFT_BITS; 4998c2ecf20Sopenharmony_ci rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; 5008c2ecf20Sopenharmony_ci } 5018c2ecf20Sopenharmony_ci} 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci/* 5048c2ecf20Sopenharmony_ci * Decode one bit. In some versions, this function has been splitted in three 5058c2ecf20Sopenharmony_ci * functions so that the compiler is supposed to be able to more easily avoid 5068c2ecf20Sopenharmony_ci * an extra branch. In this particular version of the LZMA decoder, this 5078c2ecf20Sopenharmony_ci * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 5088c2ecf20Sopenharmony_ci * on x86). Using a non-splitted version results in nicer looking code too. 5098c2ecf20Sopenharmony_ci * 5108c2ecf20Sopenharmony_ci * NOTE: This must return an int. Do not make it return a bool or the speed 5118c2ecf20Sopenharmony_ci * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, 5128c2ecf20Sopenharmony_ci * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) 5138c2ecf20Sopenharmony_ci */ 5148c2ecf20Sopenharmony_cistatic __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob) 5158c2ecf20Sopenharmony_ci{ 5168c2ecf20Sopenharmony_ci uint32_t bound; 5178c2ecf20Sopenharmony_ci int bit; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci rc_normalize(rc); 5208c2ecf20Sopenharmony_ci bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; 5218c2ecf20Sopenharmony_ci if (rc->code < bound) { 5228c2ecf20Sopenharmony_ci rc->range = bound; 5238c2ecf20Sopenharmony_ci *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; 5248c2ecf20Sopenharmony_ci bit = 0; 5258c2ecf20Sopenharmony_ci } else { 5268c2ecf20Sopenharmony_ci rc->range -= bound; 5278c2ecf20Sopenharmony_ci rc->code -= bound; 5288c2ecf20Sopenharmony_ci *prob -= *prob >> RC_MOVE_BITS; 5298c2ecf20Sopenharmony_ci bit = 1; 5308c2ecf20Sopenharmony_ci } 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci return bit; 5338c2ecf20Sopenharmony_ci} 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci/* Decode a bittree starting from the most significant bit. */ 5368c2ecf20Sopenharmony_cistatic __always_inline uint32_t rc_bittree(struct rc_dec *rc, 5378c2ecf20Sopenharmony_ci uint16_t *probs, uint32_t limit) 5388c2ecf20Sopenharmony_ci{ 5398c2ecf20Sopenharmony_ci uint32_t symbol = 1; 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci do { 5428c2ecf20Sopenharmony_ci if (rc_bit(rc, &probs[symbol])) 5438c2ecf20Sopenharmony_ci symbol = (symbol << 1) + 1; 5448c2ecf20Sopenharmony_ci else 5458c2ecf20Sopenharmony_ci symbol <<= 1; 5468c2ecf20Sopenharmony_ci } while (symbol < limit); 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci return symbol; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci/* Decode a bittree starting from the least significant bit. */ 5528c2ecf20Sopenharmony_cistatic __always_inline void rc_bittree_reverse(struct rc_dec *rc, 5538c2ecf20Sopenharmony_ci uint16_t *probs, 5548c2ecf20Sopenharmony_ci uint32_t *dest, uint32_t limit) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci uint32_t symbol = 1; 5578c2ecf20Sopenharmony_ci uint32_t i = 0; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci do { 5608c2ecf20Sopenharmony_ci if (rc_bit(rc, &probs[symbol])) { 5618c2ecf20Sopenharmony_ci symbol = (symbol << 1) + 1; 5628c2ecf20Sopenharmony_ci *dest += 1 << i; 5638c2ecf20Sopenharmony_ci } else { 5648c2ecf20Sopenharmony_ci symbol <<= 1; 5658c2ecf20Sopenharmony_ci } 5668c2ecf20Sopenharmony_ci } while (++i < limit); 5678c2ecf20Sopenharmony_ci} 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci/* Decode direct bits (fixed fifty-fifty probability) */ 5708c2ecf20Sopenharmony_cistatic inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit) 5718c2ecf20Sopenharmony_ci{ 5728c2ecf20Sopenharmony_ci uint32_t mask; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci do { 5758c2ecf20Sopenharmony_ci rc_normalize(rc); 5768c2ecf20Sopenharmony_ci rc->range >>= 1; 5778c2ecf20Sopenharmony_ci rc->code -= rc->range; 5788c2ecf20Sopenharmony_ci mask = (uint32_t)0 - (rc->code >> 31); 5798c2ecf20Sopenharmony_ci rc->code += rc->range & mask; 5808c2ecf20Sopenharmony_ci *dest = (*dest << 1) + (mask + 1); 5818c2ecf20Sopenharmony_ci } while (--limit > 0); 5828c2ecf20Sopenharmony_ci} 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci/******** 5858c2ecf20Sopenharmony_ci * LZMA * 5868c2ecf20Sopenharmony_ci ********/ 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci/* Get pointer to literal coder probability array. */ 5898c2ecf20Sopenharmony_cistatic uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s) 5908c2ecf20Sopenharmony_ci{ 5918c2ecf20Sopenharmony_ci uint32_t prev_byte = dict_get(&s->dict, 0); 5928c2ecf20Sopenharmony_ci uint32_t low = prev_byte >> (8 - s->lzma.lc); 5938c2ecf20Sopenharmony_ci uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; 5948c2ecf20Sopenharmony_ci return s->lzma.literal[low + high]; 5958c2ecf20Sopenharmony_ci} 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci/* Decode a literal (one 8-bit byte) */ 5988c2ecf20Sopenharmony_cistatic void lzma_literal(struct xz_dec_lzma2 *s) 5998c2ecf20Sopenharmony_ci{ 6008c2ecf20Sopenharmony_ci uint16_t *probs; 6018c2ecf20Sopenharmony_ci uint32_t symbol; 6028c2ecf20Sopenharmony_ci uint32_t match_byte; 6038c2ecf20Sopenharmony_ci uint32_t match_bit; 6048c2ecf20Sopenharmony_ci uint32_t offset; 6058c2ecf20Sopenharmony_ci uint32_t i; 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci probs = lzma_literal_probs(s); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci if (lzma_state_is_literal(s->lzma.state)) { 6108c2ecf20Sopenharmony_ci symbol = rc_bittree(&s->rc, probs, 0x100); 6118c2ecf20Sopenharmony_ci } else { 6128c2ecf20Sopenharmony_ci symbol = 1; 6138c2ecf20Sopenharmony_ci match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; 6148c2ecf20Sopenharmony_ci offset = 0x100; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci do { 6178c2ecf20Sopenharmony_ci match_bit = match_byte & offset; 6188c2ecf20Sopenharmony_ci match_byte <<= 1; 6198c2ecf20Sopenharmony_ci i = offset + match_bit + symbol; 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci if (rc_bit(&s->rc, &probs[i])) { 6228c2ecf20Sopenharmony_ci symbol = (symbol << 1) + 1; 6238c2ecf20Sopenharmony_ci offset &= match_bit; 6248c2ecf20Sopenharmony_ci } else { 6258c2ecf20Sopenharmony_ci symbol <<= 1; 6268c2ecf20Sopenharmony_ci offset &= ~match_bit; 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci } while (symbol < 0x100); 6298c2ecf20Sopenharmony_ci } 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci dict_put(&s->dict, (uint8_t)symbol); 6328c2ecf20Sopenharmony_ci lzma_state_literal(&s->lzma.state); 6338c2ecf20Sopenharmony_ci} 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci/* Decode the length of the match into s->lzma.len. */ 6368c2ecf20Sopenharmony_cistatic void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, 6378c2ecf20Sopenharmony_ci uint32_t pos_state) 6388c2ecf20Sopenharmony_ci{ 6398c2ecf20Sopenharmony_ci uint16_t *probs; 6408c2ecf20Sopenharmony_ci uint32_t limit; 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &l->choice)) { 6438c2ecf20Sopenharmony_ci probs = l->low[pos_state]; 6448c2ecf20Sopenharmony_ci limit = LEN_LOW_SYMBOLS; 6458c2ecf20Sopenharmony_ci s->lzma.len = MATCH_LEN_MIN; 6468c2ecf20Sopenharmony_ci } else { 6478c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &l->choice2)) { 6488c2ecf20Sopenharmony_ci probs = l->mid[pos_state]; 6498c2ecf20Sopenharmony_ci limit = LEN_MID_SYMBOLS; 6508c2ecf20Sopenharmony_ci s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; 6518c2ecf20Sopenharmony_ci } else { 6528c2ecf20Sopenharmony_ci probs = l->high; 6538c2ecf20Sopenharmony_ci limit = LEN_HIGH_SYMBOLS; 6548c2ecf20Sopenharmony_ci s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS 6558c2ecf20Sopenharmony_ci + LEN_MID_SYMBOLS; 6568c2ecf20Sopenharmony_ci } 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci/* Decode a match. The distance will be stored in s->lzma.rep0. */ 6638c2ecf20Sopenharmony_cistatic void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) 6648c2ecf20Sopenharmony_ci{ 6658c2ecf20Sopenharmony_ci uint16_t *probs; 6668c2ecf20Sopenharmony_ci uint32_t dist_slot; 6678c2ecf20Sopenharmony_ci uint32_t limit; 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci lzma_state_match(&s->lzma.state); 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci s->lzma.rep3 = s->lzma.rep2; 6728c2ecf20Sopenharmony_ci s->lzma.rep2 = s->lzma.rep1; 6738c2ecf20Sopenharmony_ci s->lzma.rep1 = s->lzma.rep0; 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci lzma_len(s, &s->lzma.match_len_dec, pos_state); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; 6788c2ecf20Sopenharmony_ci dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci if (dist_slot < DIST_MODEL_START) { 6818c2ecf20Sopenharmony_ci s->lzma.rep0 = dist_slot; 6828c2ecf20Sopenharmony_ci } else { 6838c2ecf20Sopenharmony_ci limit = (dist_slot >> 1) - 1; 6848c2ecf20Sopenharmony_ci s->lzma.rep0 = 2 + (dist_slot & 1); 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci if (dist_slot < DIST_MODEL_END) { 6878c2ecf20Sopenharmony_ci s->lzma.rep0 <<= limit; 6888c2ecf20Sopenharmony_ci probs = s->lzma.dist_special + s->lzma.rep0 6898c2ecf20Sopenharmony_ci - dist_slot - 1; 6908c2ecf20Sopenharmony_ci rc_bittree_reverse(&s->rc, probs, 6918c2ecf20Sopenharmony_ci &s->lzma.rep0, limit); 6928c2ecf20Sopenharmony_ci } else { 6938c2ecf20Sopenharmony_ci rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); 6948c2ecf20Sopenharmony_ci s->lzma.rep0 <<= ALIGN_BITS; 6958c2ecf20Sopenharmony_ci rc_bittree_reverse(&s->rc, s->lzma.dist_align, 6968c2ecf20Sopenharmony_ci &s->lzma.rep0, ALIGN_BITS); 6978c2ecf20Sopenharmony_ci } 6988c2ecf20Sopenharmony_ci } 6998c2ecf20Sopenharmony_ci} 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci/* 7028c2ecf20Sopenharmony_ci * Decode a repeated match. The distance is one of the four most recently 7038c2ecf20Sopenharmony_ci * seen matches. The distance will be stored in s->lzma.rep0. 7048c2ecf20Sopenharmony_ci */ 7058c2ecf20Sopenharmony_cistatic void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) 7068c2ecf20Sopenharmony_ci{ 7078c2ecf20Sopenharmony_ci uint32_t tmp; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { 7108c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ 7118c2ecf20Sopenharmony_ci s->lzma.state][pos_state])) { 7128c2ecf20Sopenharmony_ci lzma_state_short_rep(&s->lzma.state); 7138c2ecf20Sopenharmony_ci s->lzma.len = 1; 7148c2ecf20Sopenharmony_ci return; 7158c2ecf20Sopenharmony_ci } 7168c2ecf20Sopenharmony_ci } else { 7178c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { 7188c2ecf20Sopenharmony_ci tmp = s->lzma.rep1; 7198c2ecf20Sopenharmony_ci } else { 7208c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { 7218c2ecf20Sopenharmony_ci tmp = s->lzma.rep2; 7228c2ecf20Sopenharmony_ci } else { 7238c2ecf20Sopenharmony_ci tmp = s->lzma.rep3; 7248c2ecf20Sopenharmony_ci s->lzma.rep3 = s->lzma.rep2; 7258c2ecf20Sopenharmony_ci } 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci s->lzma.rep2 = s->lzma.rep1; 7288c2ecf20Sopenharmony_ci } 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci s->lzma.rep1 = s->lzma.rep0; 7318c2ecf20Sopenharmony_ci s->lzma.rep0 = tmp; 7328c2ecf20Sopenharmony_ci } 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci lzma_state_long_rep(&s->lzma.state); 7358c2ecf20Sopenharmony_ci lzma_len(s, &s->lzma.rep_len_dec, pos_state); 7368c2ecf20Sopenharmony_ci} 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci/* LZMA decoder core */ 7398c2ecf20Sopenharmony_cistatic bool lzma_main(struct xz_dec_lzma2 *s) 7408c2ecf20Sopenharmony_ci{ 7418c2ecf20Sopenharmony_ci uint32_t pos_state; 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci /* 7448c2ecf20Sopenharmony_ci * If the dictionary was reached during the previous call, try to 7458c2ecf20Sopenharmony_ci * finish the possibly pending repeat in the dictionary. 7468c2ecf20Sopenharmony_ci */ 7478c2ecf20Sopenharmony_ci if (dict_has_space(&s->dict) && s->lzma.len > 0) 7488c2ecf20Sopenharmony_ci dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci /* 7518c2ecf20Sopenharmony_ci * Decode more LZMA symbols. One iteration may consume up to 7528c2ecf20Sopenharmony_ci * LZMA_IN_REQUIRED - 1 bytes. 7538c2ecf20Sopenharmony_ci */ 7548c2ecf20Sopenharmony_ci while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { 7558c2ecf20Sopenharmony_ci pos_state = s->dict.pos & s->lzma.pos_mask; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci if (!rc_bit(&s->rc, &s->lzma.is_match[ 7588c2ecf20Sopenharmony_ci s->lzma.state][pos_state])) { 7598c2ecf20Sopenharmony_ci lzma_literal(s); 7608c2ecf20Sopenharmony_ci } else { 7618c2ecf20Sopenharmony_ci if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) 7628c2ecf20Sopenharmony_ci lzma_rep_match(s, pos_state); 7638c2ecf20Sopenharmony_ci else 7648c2ecf20Sopenharmony_ci lzma_match(s, pos_state); 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) 7678c2ecf20Sopenharmony_ci return false; 7688c2ecf20Sopenharmony_ci } 7698c2ecf20Sopenharmony_ci } 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci /* 7728c2ecf20Sopenharmony_ci * Having the range decoder always normalized when we are outside 7738c2ecf20Sopenharmony_ci * this function makes it easier to correctly handle end of the chunk. 7748c2ecf20Sopenharmony_ci */ 7758c2ecf20Sopenharmony_ci rc_normalize(&s->rc); 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci return true; 7788c2ecf20Sopenharmony_ci} 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci/* 7818c2ecf20Sopenharmony_ci * Reset the LZMA decoder and range decoder state. Dictionary is nore reset 7828c2ecf20Sopenharmony_ci * here, because LZMA state may be reset without resetting the dictionary. 7838c2ecf20Sopenharmony_ci */ 7848c2ecf20Sopenharmony_cistatic void lzma_reset(struct xz_dec_lzma2 *s) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci uint16_t *probs; 7878c2ecf20Sopenharmony_ci size_t i; 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci s->lzma.state = STATE_LIT_LIT; 7908c2ecf20Sopenharmony_ci s->lzma.rep0 = 0; 7918c2ecf20Sopenharmony_ci s->lzma.rep1 = 0; 7928c2ecf20Sopenharmony_ci s->lzma.rep2 = 0; 7938c2ecf20Sopenharmony_ci s->lzma.rep3 = 0; 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci /* 7968c2ecf20Sopenharmony_ci * All probabilities are initialized to the same value. This hack 7978c2ecf20Sopenharmony_ci * makes the code smaller by avoiding a separate loop for each 7988c2ecf20Sopenharmony_ci * probability array. 7998c2ecf20Sopenharmony_ci * 8008c2ecf20Sopenharmony_ci * This could be optimized so that only that part of literal 8018c2ecf20Sopenharmony_ci * probabilities that are actually required. In the common case 8028c2ecf20Sopenharmony_ci * we would write 12 KiB less. 8038c2ecf20Sopenharmony_ci */ 8048c2ecf20Sopenharmony_ci probs = s->lzma.is_match[0]; 8058c2ecf20Sopenharmony_ci for (i = 0; i < PROBS_TOTAL; ++i) 8068c2ecf20Sopenharmony_ci probs[i] = RC_BIT_MODEL_TOTAL / 2; 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci rc_reset(&s->rc); 8098c2ecf20Sopenharmony_ci} 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci/* 8128c2ecf20Sopenharmony_ci * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks 8138c2ecf20Sopenharmony_ci * from the decoded lp and pb values. On success, the LZMA decoder state is 8148c2ecf20Sopenharmony_ci * reset and true is returned. 8158c2ecf20Sopenharmony_ci */ 8168c2ecf20Sopenharmony_cistatic bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props) 8178c2ecf20Sopenharmony_ci{ 8188c2ecf20Sopenharmony_ci if (props > (4 * 5 + 4) * 9 + 8) 8198c2ecf20Sopenharmony_ci return false; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci s->lzma.pos_mask = 0; 8228c2ecf20Sopenharmony_ci while (props >= 9 * 5) { 8238c2ecf20Sopenharmony_ci props -= 9 * 5; 8248c2ecf20Sopenharmony_ci ++s->lzma.pos_mask; 8258c2ecf20Sopenharmony_ci } 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci s->lzma.literal_pos_mask = 0; 8308c2ecf20Sopenharmony_ci while (props >= 9) { 8318c2ecf20Sopenharmony_ci props -= 9; 8328c2ecf20Sopenharmony_ci ++s->lzma.literal_pos_mask; 8338c2ecf20Sopenharmony_ci } 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci s->lzma.lc = props; 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci if (s->lzma.lc + s->lzma.literal_pos_mask > 4) 8388c2ecf20Sopenharmony_ci return false; 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci lzma_reset(s); 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci return true; 8458c2ecf20Sopenharmony_ci} 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci/********* 8488c2ecf20Sopenharmony_ci * LZMA2 * 8498c2ecf20Sopenharmony_ci *********/ 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci/* 8528c2ecf20Sopenharmony_ci * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't 8538c2ecf20Sopenharmony_ci * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This 8548c2ecf20Sopenharmony_ci * wrapper function takes care of making the LZMA decoder's assumption safe. 8558c2ecf20Sopenharmony_ci * 8568c2ecf20Sopenharmony_ci * As long as there is plenty of input left to be decoded in the current LZMA 8578c2ecf20Sopenharmony_ci * chunk, we decode directly from the caller-supplied input buffer until 8588c2ecf20Sopenharmony_ci * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into 8598c2ecf20Sopenharmony_ci * s->temp.buf, which (hopefully) gets filled on the next call to this 8608c2ecf20Sopenharmony_ci * function. We decode a few bytes from the temporary buffer so that we can 8618c2ecf20Sopenharmony_ci * continue decoding from the caller-supplied input buffer again. 8628c2ecf20Sopenharmony_ci */ 8638c2ecf20Sopenharmony_cistatic bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) 8648c2ecf20Sopenharmony_ci{ 8658c2ecf20Sopenharmony_ci size_t in_avail; 8668c2ecf20Sopenharmony_ci uint32_t tmp; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci in_avail = b->in_size - b->in_pos; 8698c2ecf20Sopenharmony_ci if (s->temp.size > 0 || s->lzma2.compressed == 0) { 8708c2ecf20Sopenharmony_ci tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; 8718c2ecf20Sopenharmony_ci if (tmp > s->lzma2.compressed - s->temp.size) 8728c2ecf20Sopenharmony_ci tmp = s->lzma2.compressed - s->temp.size; 8738c2ecf20Sopenharmony_ci if (tmp > in_avail) 8748c2ecf20Sopenharmony_ci tmp = in_avail; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (s->temp.size + tmp == s->lzma2.compressed) { 8798c2ecf20Sopenharmony_ci memzero(s->temp.buf + s->temp.size + tmp, 8808c2ecf20Sopenharmony_ci sizeof(s->temp.buf) 8818c2ecf20Sopenharmony_ci - s->temp.size - tmp); 8828c2ecf20Sopenharmony_ci s->rc.in_limit = s->temp.size + tmp; 8838c2ecf20Sopenharmony_ci } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { 8848c2ecf20Sopenharmony_ci s->temp.size += tmp; 8858c2ecf20Sopenharmony_ci b->in_pos += tmp; 8868c2ecf20Sopenharmony_ci return true; 8878c2ecf20Sopenharmony_ci } else { 8888c2ecf20Sopenharmony_ci s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; 8898c2ecf20Sopenharmony_ci } 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci s->rc.in = s->temp.buf; 8928c2ecf20Sopenharmony_ci s->rc.in_pos = 0; 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) 8958c2ecf20Sopenharmony_ci return false; 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci s->lzma2.compressed -= s->rc.in_pos; 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci if (s->rc.in_pos < s->temp.size) { 9008c2ecf20Sopenharmony_ci s->temp.size -= s->rc.in_pos; 9018c2ecf20Sopenharmony_ci memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, 9028c2ecf20Sopenharmony_ci s->temp.size); 9038c2ecf20Sopenharmony_ci return true; 9048c2ecf20Sopenharmony_ci } 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci b->in_pos += s->rc.in_pos - s->temp.size; 9078c2ecf20Sopenharmony_ci s->temp.size = 0; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci in_avail = b->in_size - b->in_pos; 9118c2ecf20Sopenharmony_ci if (in_avail >= LZMA_IN_REQUIRED) { 9128c2ecf20Sopenharmony_ci s->rc.in = b->in; 9138c2ecf20Sopenharmony_ci s->rc.in_pos = b->in_pos; 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) 9168c2ecf20Sopenharmony_ci s->rc.in_limit = b->in_pos + s->lzma2.compressed; 9178c2ecf20Sopenharmony_ci else 9188c2ecf20Sopenharmony_ci s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci if (!lzma_main(s)) 9218c2ecf20Sopenharmony_ci return false; 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci in_avail = s->rc.in_pos - b->in_pos; 9248c2ecf20Sopenharmony_ci if (in_avail > s->lzma2.compressed) 9258c2ecf20Sopenharmony_ci return false; 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_ci s->lzma2.compressed -= in_avail; 9288c2ecf20Sopenharmony_ci b->in_pos = s->rc.in_pos; 9298c2ecf20Sopenharmony_ci } 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ci in_avail = b->in_size - b->in_pos; 9328c2ecf20Sopenharmony_ci if (in_avail < LZMA_IN_REQUIRED) { 9338c2ecf20Sopenharmony_ci if (in_avail > s->lzma2.compressed) 9348c2ecf20Sopenharmony_ci in_avail = s->lzma2.compressed; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci memcpy(s->temp.buf, b->in + b->in_pos, in_avail); 9378c2ecf20Sopenharmony_ci s->temp.size = in_avail; 9388c2ecf20Sopenharmony_ci b->in_pos += in_avail; 9398c2ecf20Sopenharmony_ci } 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci return true; 9428c2ecf20Sopenharmony_ci} 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci/* 9458c2ecf20Sopenharmony_ci * Take care of the LZMA2 control layer, and forward the job of actual LZMA 9468c2ecf20Sopenharmony_ci * decoding or copying of uncompressed chunks to other functions. 9478c2ecf20Sopenharmony_ci */ 9488c2ecf20Sopenharmony_ciXZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, 9498c2ecf20Sopenharmony_ci struct xz_buf *b) 9508c2ecf20Sopenharmony_ci{ 9518c2ecf20Sopenharmony_ci uint32_t tmp; 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { 9548c2ecf20Sopenharmony_ci switch (s->lzma2.sequence) { 9558c2ecf20Sopenharmony_ci case SEQ_CONTROL: 9568c2ecf20Sopenharmony_ci /* 9578c2ecf20Sopenharmony_ci * LZMA2 control byte 9588c2ecf20Sopenharmony_ci * 9598c2ecf20Sopenharmony_ci * Exact values: 9608c2ecf20Sopenharmony_ci * 0x00 End marker 9618c2ecf20Sopenharmony_ci * 0x01 Dictionary reset followed by 9628c2ecf20Sopenharmony_ci * an uncompressed chunk 9638c2ecf20Sopenharmony_ci * 0x02 Uncompressed chunk (no dictionary reset) 9648c2ecf20Sopenharmony_ci * 9658c2ecf20Sopenharmony_ci * Highest three bits (s->control & 0xE0): 9668c2ecf20Sopenharmony_ci * 0xE0 Dictionary reset, new properties and state 9678c2ecf20Sopenharmony_ci * reset, followed by LZMA compressed chunk 9688c2ecf20Sopenharmony_ci * 0xC0 New properties and state reset, followed 9698c2ecf20Sopenharmony_ci * by LZMA compressed chunk (no dictionary 9708c2ecf20Sopenharmony_ci * reset) 9718c2ecf20Sopenharmony_ci * 0xA0 State reset using old properties, 9728c2ecf20Sopenharmony_ci * followed by LZMA compressed chunk (no 9738c2ecf20Sopenharmony_ci * dictionary reset) 9748c2ecf20Sopenharmony_ci * 0x80 LZMA chunk (no dictionary or state reset) 9758c2ecf20Sopenharmony_ci * 9768c2ecf20Sopenharmony_ci * For LZMA compressed chunks, the lowest five bits 9778c2ecf20Sopenharmony_ci * (s->control & 1F) are the highest bits of the 9788c2ecf20Sopenharmony_ci * uncompressed size (bits 16-20). 9798c2ecf20Sopenharmony_ci * 9808c2ecf20Sopenharmony_ci * A new LZMA2 stream must begin with a dictionary 9818c2ecf20Sopenharmony_ci * reset. The first LZMA chunk must set new 9828c2ecf20Sopenharmony_ci * properties and reset the LZMA state. 9838c2ecf20Sopenharmony_ci * 9848c2ecf20Sopenharmony_ci * Values that don't match anything described above 9858c2ecf20Sopenharmony_ci * are invalid and we return XZ_DATA_ERROR. 9868c2ecf20Sopenharmony_ci */ 9878c2ecf20Sopenharmony_ci tmp = b->in[b->in_pos++]; 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci if (tmp == 0x00) 9908c2ecf20Sopenharmony_ci return XZ_STREAM_END; 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci if (tmp >= 0xE0 || tmp == 0x01) { 9938c2ecf20Sopenharmony_ci s->lzma2.need_props = true; 9948c2ecf20Sopenharmony_ci s->lzma2.need_dict_reset = false; 9958c2ecf20Sopenharmony_ci dict_reset(&s->dict, b); 9968c2ecf20Sopenharmony_ci } else if (s->lzma2.need_dict_reset) { 9978c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 9988c2ecf20Sopenharmony_ci } 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_ci if (tmp >= 0x80) { 10018c2ecf20Sopenharmony_ci s->lzma2.uncompressed = (tmp & 0x1F) << 16; 10028c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_UNCOMPRESSED_1; 10038c2ecf20Sopenharmony_ci 10048c2ecf20Sopenharmony_ci if (tmp >= 0xC0) { 10058c2ecf20Sopenharmony_ci /* 10068c2ecf20Sopenharmony_ci * When there are new properties, 10078c2ecf20Sopenharmony_ci * state reset is done at 10088c2ecf20Sopenharmony_ci * SEQ_PROPERTIES. 10098c2ecf20Sopenharmony_ci */ 10108c2ecf20Sopenharmony_ci s->lzma2.need_props = false; 10118c2ecf20Sopenharmony_ci s->lzma2.next_sequence 10128c2ecf20Sopenharmony_ci = SEQ_PROPERTIES; 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci } else if (s->lzma2.need_props) { 10158c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci } else { 10188c2ecf20Sopenharmony_ci s->lzma2.next_sequence 10198c2ecf20Sopenharmony_ci = SEQ_LZMA_PREPARE; 10208c2ecf20Sopenharmony_ci if (tmp >= 0xA0) 10218c2ecf20Sopenharmony_ci lzma_reset(s); 10228c2ecf20Sopenharmony_ci } 10238c2ecf20Sopenharmony_ci } else { 10248c2ecf20Sopenharmony_ci if (tmp > 0x02) 10258c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_COMPRESSED_0; 10288c2ecf20Sopenharmony_ci s->lzma2.next_sequence = SEQ_COPY; 10298c2ecf20Sopenharmony_ci } 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci break; 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci case SEQ_UNCOMPRESSED_1: 10348c2ecf20Sopenharmony_ci s->lzma2.uncompressed 10358c2ecf20Sopenharmony_ci += (uint32_t)b->in[b->in_pos++] << 8; 10368c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_UNCOMPRESSED_2; 10378c2ecf20Sopenharmony_ci break; 10388c2ecf20Sopenharmony_ci 10398c2ecf20Sopenharmony_ci case SEQ_UNCOMPRESSED_2: 10408c2ecf20Sopenharmony_ci s->lzma2.uncompressed 10418c2ecf20Sopenharmony_ci += (uint32_t)b->in[b->in_pos++] + 1; 10428c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_COMPRESSED_0; 10438c2ecf20Sopenharmony_ci break; 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci case SEQ_COMPRESSED_0: 10468c2ecf20Sopenharmony_ci s->lzma2.compressed 10478c2ecf20Sopenharmony_ci = (uint32_t)b->in[b->in_pos++] << 8; 10488c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_COMPRESSED_1; 10498c2ecf20Sopenharmony_ci break; 10508c2ecf20Sopenharmony_ci 10518c2ecf20Sopenharmony_ci case SEQ_COMPRESSED_1: 10528c2ecf20Sopenharmony_ci s->lzma2.compressed 10538c2ecf20Sopenharmony_ci += (uint32_t)b->in[b->in_pos++] + 1; 10548c2ecf20Sopenharmony_ci s->lzma2.sequence = s->lzma2.next_sequence; 10558c2ecf20Sopenharmony_ci break; 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci case SEQ_PROPERTIES: 10588c2ecf20Sopenharmony_ci if (!lzma_props(s, b->in[b->in_pos++])) 10598c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_LZMA_PREPARE; 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci /* fall through */ 10648c2ecf20Sopenharmony_ci 10658c2ecf20Sopenharmony_ci case SEQ_LZMA_PREPARE: 10668c2ecf20Sopenharmony_ci if (s->lzma2.compressed < RC_INIT_BYTES) 10678c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci if (!rc_read_init(&s->rc, b)) 10708c2ecf20Sopenharmony_ci return XZ_OK; 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci s->lzma2.compressed -= RC_INIT_BYTES; 10738c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_LZMA_RUN; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci /* fall through */ 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci case SEQ_LZMA_RUN: 10788c2ecf20Sopenharmony_ci /* 10798c2ecf20Sopenharmony_ci * Set dictionary limit to indicate how much we want 10808c2ecf20Sopenharmony_ci * to be encoded at maximum. Decode new data into the 10818c2ecf20Sopenharmony_ci * dictionary. Flush the new data from dictionary to 10828c2ecf20Sopenharmony_ci * b->out. Check if we finished decoding this chunk. 10838c2ecf20Sopenharmony_ci * In case the dictionary got full but we didn't fill 10848c2ecf20Sopenharmony_ci * the output buffer yet, we may run this loop 10858c2ecf20Sopenharmony_ci * multiple times without changing s->lzma2.sequence. 10868c2ecf20Sopenharmony_ci */ 10878c2ecf20Sopenharmony_ci dict_limit(&s->dict, min_t(size_t, 10888c2ecf20Sopenharmony_ci b->out_size - b->out_pos, 10898c2ecf20Sopenharmony_ci s->lzma2.uncompressed)); 10908c2ecf20Sopenharmony_ci if (!lzma2_lzma(s, b)) 10918c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci s->lzma2.uncompressed -= dict_flush(&s->dict, b); 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci if (s->lzma2.uncompressed == 0) { 10968c2ecf20Sopenharmony_ci if (s->lzma2.compressed > 0 || s->lzma.len > 0 10978c2ecf20Sopenharmony_ci || !rc_is_finished(&s->rc)) 10988c2ecf20Sopenharmony_ci return XZ_DATA_ERROR; 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci rc_reset(&s->rc); 11018c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_CONTROL; 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci } else if (b->out_pos == b->out_size 11048c2ecf20Sopenharmony_ci || (b->in_pos == b->in_size 11058c2ecf20Sopenharmony_ci && s->temp.size 11068c2ecf20Sopenharmony_ci < s->lzma2.compressed)) { 11078c2ecf20Sopenharmony_ci return XZ_OK; 11088c2ecf20Sopenharmony_ci } 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci break; 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci case SEQ_COPY: 11138c2ecf20Sopenharmony_ci dict_uncompressed(&s->dict, b, &s->lzma2.compressed); 11148c2ecf20Sopenharmony_ci if (s->lzma2.compressed > 0) 11158c2ecf20Sopenharmony_ci return XZ_OK; 11168c2ecf20Sopenharmony_ci 11178c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_CONTROL; 11188c2ecf20Sopenharmony_ci break; 11198c2ecf20Sopenharmony_ci } 11208c2ecf20Sopenharmony_ci } 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci return XZ_OK; 11238c2ecf20Sopenharmony_ci} 11248c2ecf20Sopenharmony_ci 11258c2ecf20Sopenharmony_ciXZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, 11268c2ecf20Sopenharmony_ci uint32_t dict_max) 11278c2ecf20Sopenharmony_ci{ 11288c2ecf20Sopenharmony_ci struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); 11298c2ecf20Sopenharmony_ci if (s == NULL) 11308c2ecf20Sopenharmony_ci return NULL; 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci s->dict.mode = mode; 11338c2ecf20Sopenharmony_ci s->dict.size_max = dict_max; 11348c2ecf20Sopenharmony_ci 11358c2ecf20Sopenharmony_ci if (DEC_IS_PREALLOC(mode)) { 11368c2ecf20Sopenharmony_ci s->dict.buf = vmalloc(dict_max); 11378c2ecf20Sopenharmony_ci if (s->dict.buf == NULL) { 11388c2ecf20Sopenharmony_ci kfree(s); 11398c2ecf20Sopenharmony_ci return NULL; 11408c2ecf20Sopenharmony_ci } 11418c2ecf20Sopenharmony_ci } else if (DEC_IS_DYNALLOC(mode)) { 11428c2ecf20Sopenharmony_ci s->dict.buf = NULL; 11438c2ecf20Sopenharmony_ci s->dict.allocated = 0; 11448c2ecf20Sopenharmony_ci } 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci return s; 11478c2ecf20Sopenharmony_ci} 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ciXZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) 11508c2ecf20Sopenharmony_ci{ 11518c2ecf20Sopenharmony_ci /* This limits dictionary size to 3 GiB to keep parsing simpler. */ 11528c2ecf20Sopenharmony_ci if (props > 39) 11538c2ecf20Sopenharmony_ci return XZ_OPTIONS_ERROR; 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci s->dict.size = 2 + (props & 1); 11568c2ecf20Sopenharmony_ci s->dict.size <<= (props >> 1) + 11; 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci if (DEC_IS_MULTI(s->dict.mode)) { 11598c2ecf20Sopenharmony_ci if (s->dict.size > s->dict.size_max) 11608c2ecf20Sopenharmony_ci return XZ_MEMLIMIT_ERROR; 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci s->dict.end = s->dict.size; 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci if (DEC_IS_DYNALLOC(s->dict.mode)) { 11658c2ecf20Sopenharmony_ci if (s->dict.allocated < s->dict.size) { 11668c2ecf20Sopenharmony_ci s->dict.allocated = s->dict.size; 11678c2ecf20Sopenharmony_ci vfree(s->dict.buf); 11688c2ecf20Sopenharmony_ci s->dict.buf = vmalloc(s->dict.size); 11698c2ecf20Sopenharmony_ci if (s->dict.buf == NULL) { 11708c2ecf20Sopenharmony_ci s->dict.allocated = 0; 11718c2ecf20Sopenharmony_ci return XZ_MEM_ERROR; 11728c2ecf20Sopenharmony_ci } 11738c2ecf20Sopenharmony_ci } 11748c2ecf20Sopenharmony_ci } 11758c2ecf20Sopenharmony_ci } 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci s->lzma.len = 0; 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci s->lzma2.sequence = SEQ_CONTROL; 11808c2ecf20Sopenharmony_ci s->lzma2.need_dict_reset = true; 11818c2ecf20Sopenharmony_ci 11828c2ecf20Sopenharmony_ci s->temp.size = 0; 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci return XZ_OK; 11858c2ecf20Sopenharmony_ci} 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ciXZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) 11888c2ecf20Sopenharmony_ci{ 11898c2ecf20Sopenharmony_ci if (DEC_IS_MULTI(s->dict.mode)) 11908c2ecf20Sopenharmony_ci vfree(s->dict.buf); 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci kfree(s); 11938c2ecf20Sopenharmony_ci} 1194