xref: /kernel/linux/linux-5.10/lib/xz/xz_dec_bcj.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Branch/Call/Jump (BCJ) filter decoders
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Authors: Lasse Collin <lasse.collin@tukaani.org>
58c2ecf20Sopenharmony_ci *          Igor Pavlov <https://7-zip.org/>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This file has been put into the public domain.
88c2ecf20Sopenharmony_ci * You can do whatever you want with this file.
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include "xz_private.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci/*
148c2ecf20Sopenharmony_ci * The rest of the file is inside this ifdef. It makes things a little more
158c2ecf20Sopenharmony_ci * convenient when building without support for any BCJ filters.
168c2ecf20Sopenharmony_ci */
178c2ecf20Sopenharmony_ci#ifdef XZ_DEC_BCJ
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_cistruct xz_dec_bcj {
208c2ecf20Sopenharmony_ci	/* Type of the BCJ filter being used */
218c2ecf20Sopenharmony_ci	enum {
228c2ecf20Sopenharmony_ci		BCJ_X86 = 4,        /* x86 or x86-64 */
238c2ecf20Sopenharmony_ci		BCJ_POWERPC = 5,    /* Big endian only */
248c2ecf20Sopenharmony_ci		BCJ_IA64 = 6,       /* Big or little endian */
258c2ecf20Sopenharmony_ci		BCJ_ARM = 7,        /* Little endian only */
268c2ecf20Sopenharmony_ci		BCJ_ARMTHUMB = 8,   /* Little endian only */
278c2ecf20Sopenharmony_ci		BCJ_SPARC = 9       /* Big or little endian */
288c2ecf20Sopenharmony_ci	} type;
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci	/*
318c2ecf20Sopenharmony_ci	 * Return value of the next filter in the chain. We need to preserve
328c2ecf20Sopenharmony_ci	 * this information across calls, because we must not call the next
338c2ecf20Sopenharmony_ci	 * filter anymore once it has returned XZ_STREAM_END.
348c2ecf20Sopenharmony_ci	 */
358c2ecf20Sopenharmony_ci	enum xz_ret ret;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci	/* True if we are operating in single-call mode. */
388c2ecf20Sopenharmony_ci	bool single_call;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	/*
418c2ecf20Sopenharmony_ci	 * Absolute position relative to the beginning of the uncompressed
428c2ecf20Sopenharmony_ci	 * data (in a single .xz Block). We care only about the lowest 32
438c2ecf20Sopenharmony_ci	 * bits so this doesn't need to be uint64_t even with big files.
448c2ecf20Sopenharmony_ci	 */
458c2ecf20Sopenharmony_ci	uint32_t pos;
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	/* x86 filter state */
488c2ecf20Sopenharmony_ci	uint32_t x86_prev_mask;
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	/* Temporary space to hold the variables from struct xz_buf */
518c2ecf20Sopenharmony_ci	uint8_t *out;
528c2ecf20Sopenharmony_ci	size_t out_pos;
538c2ecf20Sopenharmony_ci	size_t out_size;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	struct {
568c2ecf20Sopenharmony_ci		/* Amount of already filtered data in the beginning of buf */
578c2ecf20Sopenharmony_ci		size_t filtered;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci		/* Total amount of data currently stored in buf  */
608c2ecf20Sopenharmony_ci		size_t size;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci		/*
638c2ecf20Sopenharmony_ci		 * Buffer to hold a mix of filtered and unfiltered data. This
648c2ecf20Sopenharmony_ci		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
658c2ecf20Sopenharmony_ci		 *
668c2ecf20Sopenharmony_ci		 * Type         Alignment   Look-ahead
678c2ecf20Sopenharmony_ci		 * x86              1           4
688c2ecf20Sopenharmony_ci		 * PowerPC          4           0
698c2ecf20Sopenharmony_ci		 * IA-64           16           0
708c2ecf20Sopenharmony_ci		 * ARM              4           0
718c2ecf20Sopenharmony_ci		 * ARM-Thumb        2           2
728c2ecf20Sopenharmony_ci		 * SPARC            4           0
738c2ecf20Sopenharmony_ci		 */
748c2ecf20Sopenharmony_ci		uint8_t buf[16];
758c2ecf20Sopenharmony_ci	} temp;
768c2ecf20Sopenharmony_ci};
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci#ifdef XZ_DEC_X86
798c2ecf20Sopenharmony_ci/*
808c2ecf20Sopenharmony_ci * This is used to test the most significant byte of a memory address
818c2ecf20Sopenharmony_ci * in an x86 instruction.
828c2ecf20Sopenharmony_ci */
838c2ecf20Sopenharmony_cistatic inline int bcj_x86_test_msbyte(uint8_t b)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	return b == 0x00 || b == 0xFF;
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_cistatic size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
898c2ecf20Sopenharmony_ci{
908c2ecf20Sopenharmony_ci	static const bool mask_to_allowed_status[8]
918c2ecf20Sopenharmony_ci		= { true, true, true, false, true, false, false, false };
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	size_t i;
968c2ecf20Sopenharmony_ci	size_t prev_pos = (size_t)-1;
978c2ecf20Sopenharmony_ci	uint32_t prev_mask = s->x86_prev_mask;
988c2ecf20Sopenharmony_ci	uint32_t src;
998c2ecf20Sopenharmony_ci	uint32_t dest;
1008c2ecf20Sopenharmony_ci	uint32_t j;
1018c2ecf20Sopenharmony_ci	uint8_t b;
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	if (size <= 4)
1048c2ecf20Sopenharmony_ci		return 0;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	size -= 4;
1078c2ecf20Sopenharmony_ci	for (i = 0; i < size; ++i) {
1088c2ecf20Sopenharmony_ci		if ((buf[i] & 0xFE) != 0xE8)
1098c2ecf20Sopenharmony_ci			continue;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci		prev_pos = i - prev_pos;
1128c2ecf20Sopenharmony_ci		if (prev_pos > 3) {
1138c2ecf20Sopenharmony_ci			prev_mask = 0;
1148c2ecf20Sopenharmony_ci		} else {
1158c2ecf20Sopenharmony_ci			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
1168c2ecf20Sopenharmony_ci			if (prev_mask != 0) {
1178c2ecf20Sopenharmony_ci				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
1188c2ecf20Sopenharmony_ci				if (!mask_to_allowed_status[prev_mask]
1198c2ecf20Sopenharmony_ci						|| bcj_x86_test_msbyte(b)) {
1208c2ecf20Sopenharmony_ci					prev_pos = i;
1218c2ecf20Sopenharmony_ci					prev_mask = (prev_mask << 1) | 1;
1228c2ecf20Sopenharmony_ci					continue;
1238c2ecf20Sopenharmony_ci				}
1248c2ecf20Sopenharmony_ci			}
1258c2ecf20Sopenharmony_ci		}
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci		prev_pos = i;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci		if (bcj_x86_test_msbyte(buf[i + 4])) {
1308c2ecf20Sopenharmony_ci			src = get_unaligned_le32(buf + i + 1);
1318c2ecf20Sopenharmony_ci			while (true) {
1328c2ecf20Sopenharmony_ci				dest = src - (s->pos + (uint32_t)i + 5);
1338c2ecf20Sopenharmony_ci				if (prev_mask == 0)
1348c2ecf20Sopenharmony_ci					break;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci				j = mask_to_bit_num[prev_mask] * 8;
1378c2ecf20Sopenharmony_ci				b = (uint8_t)(dest >> (24 - j));
1388c2ecf20Sopenharmony_ci				if (!bcj_x86_test_msbyte(b))
1398c2ecf20Sopenharmony_ci					break;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
1428c2ecf20Sopenharmony_ci			}
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci			dest &= 0x01FFFFFF;
1458c2ecf20Sopenharmony_ci			dest |= (uint32_t)0 - (dest & 0x01000000);
1468c2ecf20Sopenharmony_ci			put_unaligned_le32(dest, buf + i + 1);
1478c2ecf20Sopenharmony_ci			i += 4;
1488c2ecf20Sopenharmony_ci		} else {
1498c2ecf20Sopenharmony_ci			prev_mask = (prev_mask << 1) | 1;
1508c2ecf20Sopenharmony_ci		}
1518c2ecf20Sopenharmony_ci	}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	prev_pos = i - prev_pos;
1548c2ecf20Sopenharmony_ci	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
1558c2ecf20Sopenharmony_ci	return i;
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci#endif
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci#ifdef XZ_DEC_POWERPC
1608c2ecf20Sopenharmony_cistatic size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	size_t i;
1638c2ecf20Sopenharmony_ci	uint32_t instr;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	for (i = 0; i + 4 <= size; i += 4) {
1668c2ecf20Sopenharmony_ci		instr = get_unaligned_be32(buf + i);
1678c2ecf20Sopenharmony_ci		if ((instr & 0xFC000003) == 0x48000001) {
1688c2ecf20Sopenharmony_ci			instr &= 0x03FFFFFC;
1698c2ecf20Sopenharmony_ci			instr -= s->pos + (uint32_t)i;
1708c2ecf20Sopenharmony_ci			instr &= 0x03FFFFFC;
1718c2ecf20Sopenharmony_ci			instr |= 0x48000001;
1728c2ecf20Sopenharmony_ci			put_unaligned_be32(instr, buf + i);
1738c2ecf20Sopenharmony_ci		}
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	return i;
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci#endif
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci#ifdef XZ_DEC_IA64
1818c2ecf20Sopenharmony_cistatic size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	static const uint8_t branch_table[32] = {
1848c2ecf20Sopenharmony_ci		0, 0, 0, 0, 0, 0, 0, 0,
1858c2ecf20Sopenharmony_ci		0, 0, 0, 0, 0, 0, 0, 0,
1868c2ecf20Sopenharmony_ci		4, 4, 6, 6, 0, 0, 7, 7,
1878c2ecf20Sopenharmony_ci		4, 4, 0, 0, 4, 4, 0, 0
1888c2ecf20Sopenharmony_ci	};
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	/*
1918c2ecf20Sopenharmony_ci	 * The local variables take a little bit stack space, but it's less
1928c2ecf20Sopenharmony_ci	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
1938c2ecf20Sopenharmony_ci	 * stack usage here without doing that for the LZMA2 decoder too.
1948c2ecf20Sopenharmony_ci	 */
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	/* Loop counters */
1978c2ecf20Sopenharmony_ci	size_t i;
1988c2ecf20Sopenharmony_ci	size_t j;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
2018c2ecf20Sopenharmony_ci	uint32_t slot;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	/* Bitwise offset of the instruction indicated by slot */
2048c2ecf20Sopenharmony_ci	uint32_t bit_pos;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	/* bit_pos split into byte and bit parts */
2078c2ecf20Sopenharmony_ci	uint32_t byte_pos;
2088c2ecf20Sopenharmony_ci	uint32_t bit_res;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	/* Address part of an instruction */
2118c2ecf20Sopenharmony_ci	uint32_t addr;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	/* Mask used to detect which instructions to convert */
2148c2ecf20Sopenharmony_ci	uint32_t mask;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	/* 41-bit instruction stored somewhere in the lowest 48 bits */
2178c2ecf20Sopenharmony_ci	uint64_t instr;
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	/* Instruction normalized with bit_res for easier manipulation */
2208c2ecf20Sopenharmony_ci	uint64_t norm;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	for (i = 0; i + 16 <= size; i += 16) {
2238c2ecf20Sopenharmony_ci		mask = branch_table[buf[i] & 0x1F];
2248c2ecf20Sopenharmony_ci		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
2258c2ecf20Sopenharmony_ci			if (((mask >> slot) & 1) == 0)
2268c2ecf20Sopenharmony_ci				continue;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci			byte_pos = bit_pos >> 3;
2298c2ecf20Sopenharmony_ci			bit_res = bit_pos & 7;
2308c2ecf20Sopenharmony_ci			instr = 0;
2318c2ecf20Sopenharmony_ci			for (j = 0; j < 6; ++j)
2328c2ecf20Sopenharmony_ci				instr |= (uint64_t)(buf[i + j + byte_pos])
2338c2ecf20Sopenharmony_ci						<< (8 * j);
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci			norm = instr >> bit_res;
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci			if (((norm >> 37) & 0x0F) == 0x05
2388c2ecf20Sopenharmony_ci					&& ((norm >> 9) & 0x07) == 0) {
2398c2ecf20Sopenharmony_ci				addr = (norm >> 13) & 0x0FFFFF;
2408c2ecf20Sopenharmony_ci				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
2418c2ecf20Sopenharmony_ci				addr <<= 4;
2428c2ecf20Sopenharmony_ci				addr -= s->pos + (uint32_t)i;
2438c2ecf20Sopenharmony_ci				addr >>= 4;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci				norm &= ~((uint64_t)0x8FFFFF << 13);
2468c2ecf20Sopenharmony_ci				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
2478c2ecf20Sopenharmony_ci				norm |= (uint64_t)(addr & 0x100000)
2488c2ecf20Sopenharmony_ci						<< (36 - 20);
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci				instr &= (1 << bit_res) - 1;
2518c2ecf20Sopenharmony_ci				instr |= norm << bit_res;
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci				for (j = 0; j < 6; j++)
2548c2ecf20Sopenharmony_ci					buf[i + j + byte_pos]
2558c2ecf20Sopenharmony_ci						= (uint8_t)(instr >> (8 * j));
2568c2ecf20Sopenharmony_ci			}
2578c2ecf20Sopenharmony_ci		}
2588c2ecf20Sopenharmony_ci	}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	return i;
2618c2ecf20Sopenharmony_ci}
2628c2ecf20Sopenharmony_ci#endif
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARM
2658c2ecf20Sopenharmony_cistatic size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
2668c2ecf20Sopenharmony_ci{
2678c2ecf20Sopenharmony_ci	size_t i;
2688c2ecf20Sopenharmony_ci	uint32_t addr;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	for (i = 0; i + 4 <= size; i += 4) {
2718c2ecf20Sopenharmony_ci		if (buf[i + 3] == 0xEB) {
2728c2ecf20Sopenharmony_ci			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
2738c2ecf20Sopenharmony_ci					| ((uint32_t)buf[i + 2] << 16);
2748c2ecf20Sopenharmony_ci			addr <<= 2;
2758c2ecf20Sopenharmony_ci			addr -= s->pos + (uint32_t)i + 8;
2768c2ecf20Sopenharmony_ci			addr >>= 2;
2778c2ecf20Sopenharmony_ci			buf[i] = (uint8_t)addr;
2788c2ecf20Sopenharmony_ci			buf[i + 1] = (uint8_t)(addr >> 8);
2798c2ecf20Sopenharmony_ci			buf[i + 2] = (uint8_t)(addr >> 16);
2808c2ecf20Sopenharmony_ci		}
2818c2ecf20Sopenharmony_ci	}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	return i;
2848c2ecf20Sopenharmony_ci}
2858c2ecf20Sopenharmony_ci#endif
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARMTHUMB
2888c2ecf20Sopenharmony_cistatic size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
2898c2ecf20Sopenharmony_ci{
2908c2ecf20Sopenharmony_ci	size_t i;
2918c2ecf20Sopenharmony_ci	uint32_t addr;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	for (i = 0; i + 4 <= size; i += 2) {
2948c2ecf20Sopenharmony_ci		if ((buf[i + 1] & 0xF8) == 0xF0
2958c2ecf20Sopenharmony_ci				&& (buf[i + 3] & 0xF8) == 0xF8) {
2968c2ecf20Sopenharmony_ci			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
2978c2ecf20Sopenharmony_ci					| ((uint32_t)buf[i] << 11)
2988c2ecf20Sopenharmony_ci					| (((uint32_t)buf[i + 3] & 0x07) << 8)
2998c2ecf20Sopenharmony_ci					| (uint32_t)buf[i + 2];
3008c2ecf20Sopenharmony_ci			addr <<= 1;
3018c2ecf20Sopenharmony_ci			addr -= s->pos + (uint32_t)i + 4;
3028c2ecf20Sopenharmony_ci			addr >>= 1;
3038c2ecf20Sopenharmony_ci			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
3048c2ecf20Sopenharmony_ci			buf[i] = (uint8_t)(addr >> 11);
3058c2ecf20Sopenharmony_ci			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
3068c2ecf20Sopenharmony_ci			buf[i + 2] = (uint8_t)addr;
3078c2ecf20Sopenharmony_ci			i += 2;
3088c2ecf20Sopenharmony_ci		}
3098c2ecf20Sopenharmony_ci	}
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	return i;
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci#endif
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci#ifdef XZ_DEC_SPARC
3168c2ecf20Sopenharmony_cistatic size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	size_t i;
3198c2ecf20Sopenharmony_ci	uint32_t instr;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	for (i = 0; i + 4 <= size; i += 4) {
3228c2ecf20Sopenharmony_ci		instr = get_unaligned_be32(buf + i);
3238c2ecf20Sopenharmony_ci		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
3248c2ecf20Sopenharmony_ci			instr <<= 2;
3258c2ecf20Sopenharmony_ci			instr -= s->pos + (uint32_t)i;
3268c2ecf20Sopenharmony_ci			instr >>= 2;
3278c2ecf20Sopenharmony_ci			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
3288c2ecf20Sopenharmony_ci					| 0x40000000 | (instr & 0x3FFFFF);
3298c2ecf20Sopenharmony_ci			put_unaligned_be32(instr, buf + i);
3308c2ecf20Sopenharmony_ci		}
3318c2ecf20Sopenharmony_ci	}
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	return i;
3348c2ecf20Sopenharmony_ci}
3358c2ecf20Sopenharmony_ci#endif
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci/*
3388c2ecf20Sopenharmony_ci * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
3398c2ecf20Sopenharmony_ci * of data that got filtered.
3408c2ecf20Sopenharmony_ci *
3418c2ecf20Sopenharmony_ci * NOTE: This is implemented as a switch statement to avoid using function
3428c2ecf20Sopenharmony_ci * pointers, which could be problematic in the kernel boot code, which must
3438c2ecf20Sopenharmony_ci * avoid pointers to static data (at least on x86).
3448c2ecf20Sopenharmony_ci */
3458c2ecf20Sopenharmony_cistatic void bcj_apply(struct xz_dec_bcj *s,
3468c2ecf20Sopenharmony_ci		      uint8_t *buf, size_t *pos, size_t size)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	size_t filtered;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	buf += *pos;
3518c2ecf20Sopenharmony_ci	size -= *pos;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	switch (s->type) {
3548c2ecf20Sopenharmony_ci#ifdef XZ_DEC_X86
3558c2ecf20Sopenharmony_ci	case BCJ_X86:
3568c2ecf20Sopenharmony_ci		filtered = bcj_x86(s, buf, size);
3578c2ecf20Sopenharmony_ci		break;
3588c2ecf20Sopenharmony_ci#endif
3598c2ecf20Sopenharmony_ci#ifdef XZ_DEC_POWERPC
3608c2ecf20Sopenharmony_ci	case BCJ_POWERPC:
3618c2ecf20Sopenharmony_ci		filtered = bcj_powerpc(s, buf, size);
3628c2ecf20Sopenharmony_ci		break;
3638c2ecf20Sopenharmony_ci#endif
3648c2ecf20Sopenharmony_ci#ifdef XZ_DEC_IA64
3658c2ecf20Sopenharmony_ci	case BCJ_IA64:
3668c2ecf20Sopenharmony_ci		filtered = bcj_ia64(s, buf, size);
3678c2ecf20Sopenharmony_ci		break;
3688c2ecf20Sopenharmony_ci#endif
3698c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARM
3708c2ecf20Sopenharmony_ci	case BCJ_ARM:
3718c2ecf20Sopenharmony_ci		filtered = bcj_arm(s, buf, size);
3728c2ecf20Sopenharmony_ci		break;
3738c2ecf20Sopenharmony_ci#endif
3748c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARMTHUMB
3758c2ecf20Sopenharmony_ci	case BCJ_ARMTHUMB:
3768c2ecf20Sopenharmony_ci		filtered = bcj_armthumb(s, buf, size);
3778c2ecf20Sopenharmony_ci		break;
3788c2ecf20Sopenharmony_ci#endif
3798c2ecf20Sopenharmony_ci#ifdef XZ_DEC_SPARC
3808c2ecf20Sopenharmony_ci	case BCJ_SPARC:
3818c2ecf20Sopenharmony_ci		filtered = bcj_sparc(s, buf, size);
3828c2ecf20Sopenharmony_ci		break;
3838c2ecf20Sopenharmony_ci#endif
3848c2ecf20Sopenharmony_ci	default:
3858c2ecf20Sopenharmony_ci		/* Never reached but silence compiler warnings. */
3868c2ecf20Sopenharmony_ci		filtered = 0;
3878c2ecf20Sopenharmony_ci		break;
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	*pos += filtered;
3918c2ecf20Sopenharmony_ci	s->pos += filtered;
3928c2ecf20Sopenharmony_ci}
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci/*
3958c2ecf20Sopenharmony_ci * Flush pending filtered data from temp to the output buffer.
3968c2ecf20Sopenharmony_ci * Move the remaining mixture of possibly filtered and unfiltered
3978c2ecf20Sopenharmony_ci * data to the beginning of temp.
3988c2ecf20Sopenharmony_ci */
3998c2ecf20Sopenharmony_cistatic void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
4008c2ecf20Sopenharmony_ci{
4018c2ecf20Sopenharmony_ci	size_t copy_size;
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
4048c2ecf20Sopenharmony_ci	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
4058c2ecf20Sopenharmony_ci	b->out_pos += copy_size;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	s->temp.filtered -= copy_size;
4088c2ecf20Sopenharmony_ci	s->temp.size -= copy_size;
4098c2ecf20Sopenharmony_ci	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
4108c2ecf20Sopenharmony_ci}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci/*
4138c2ecf20Sopenharmony_ci * The BCJ filter functions are primitive in sense that they process the
4148c2ecf20Sopenharmony_ci * data in chunks of 1-16 bytes. To hide this issue, this function does
4158c2ecf20Sopenharmony_ci * some buffering.
4168c2ecf20Sopenharmony_ci */
4178c2ecf20Sopenharmony_ciXZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
4188c2ecf20Sopenharmony_ci				     struct xz_dec_lzma2 *lzma2,
4198c2ecf20Sopenharmony_ci				     struct xz_buf *b)
4208c2ecf20Sopenharmony_ci{
4218c2ecf20Sopenharmony_ci	size_t out_start;
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	/*
4248c2ecf20Sopenharmony_ci	 * Flush pending already filtered data to the output buffer. Return
4258c2ecf20Sopenharmony_ci	 * immediatelly if we couldn't flush everything, or if the next
4268c2ecf20Sopenharmony_ci	 * filter in the chain had already returned XZ_STREAM_END.
4278c2ecf20Sopenharmony_ci	 */
4288c2ecf20Sopenharmony_ci	if (s->temp.filtered > 0) {
4298c2ecf20Sopenharmony_ci		bcj_flush(s, b);
4308c2ecf20Sopenharmony_ci		if (s->temp.filtered > 0)
4318c2ecf20Sopenharmony_ci			return XZ_OK;
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci		if (s->ret == XZ_STREAM_END)
4348c2ecf20Sopenharmony_ci			return XZ_STREAM_END;
4358c2ecf20Sopenharmony_ci	}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	/*
4388c2ecf20Sopenharmony_ci	 * If we have more output space than what is currently pending in
4398c2ecf20Sopenharmony_ci	 * temp, copy the unfiltered data from temp to the output buffer
4408c2ecf20Sopenharmony_ci	 * and try to fill the output buffer by decoding more data from the
4418c2ecf20Sopenharmony_ci	 * next filter in the chain. Apply the BCJ filter on the new data
4428c2ecf20Sopenharmony_ci	 * in the output buffer. If everything cannot be filtered, copy it
4438c2ecf20Sopenharmony_ci	 * to temp and rewind the output buffer position accordingly.
4448c2ecf20Sopenharmony_ci	 *
4458c2ecf20Sopenharmony_ci	 * This needs to be always run when temp.size == 0 to handle a special
4468c2ecf20Sopenharmony_ci	 * case where the output buffer is full and the next filter has no
4478c2ecf20Sopenharmony_ci	 * more output coming but hasn't returned XZ_STREAM_END yet.
4488c2ecf20Sopenharmony_ci	 */
4498c2ecf20Sopenharmony_ci	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
4508c2ecf20Sopenharmony_ci		out_start = b->out_pos;
4518c2ecf20Sopenharmony_ci		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
4528c2ecf20Sopenharmony_ci		b->out_pos += s->temp.size;
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci		s->ret = xz_dec_lzma2_run(lzma2, b);
4558c2ecf20Sopenharmony_ci		if (s->ret != XZ_STREAM_END
4568c2ecf20Sopenharmony_ci				&& (s->ret != XZ_OK || s->single_call))
4578c2ecf20Sopenharmony_ci			return s->ret;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci		bcj_apply(s, b->out, &out_start, b->out_pos);
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci		/*
4628c2ecf20Sopenharmony_ci		 * As an exception, if the next filter returned XZ_STREAM_END,
4638c2ecf20Sopenharmony_ci		 * we can do that too, since the last few bytes that remain
4648c2ecf20Sopenharmony_ci		 * unfiltered are meant to remain unfiltered.
4658c2ecf20Sopenharmony_ci		 */
4668c2ecf20Sopenharmony_ci		if (s->ret == XZ_STREAM_END)
4678c2ecf20Sopenharmony_ci			return XZ_STREAM_END;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci		s->temp.size = b->out_pos - out_start;
4708c2ecf20Sopenharmony_ci		b->out_pos -= s->temp.size;
4718c2ecf20Sopenharmony_ci		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci		/*
4748c2ecf20Sopenharmony_ci		 * If there wasn't enough input to the next filter to fill
4758c2ecf20Sopenharmony_ci		 * the output buffer with unfiltered data, there's no point
4768c2ecf20Sopenharmony_ci		 * to try decoding more data to temp.
4778c2ecf20Sopenharmony_ci		 */
4788c2ecf20Sopenharmony_ci		if (b->out_pos + s->temp.size < b->out_size)
4798c2ecf20Sopenharmony_ci			return XZ_OK;
4808c2ecf20Sopenharmony_ci	}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	/*
4838c2ecf20Sopenharmony_ci	 * We have unfiltered data in temp. If the output buffer isn't full
4848c2ecf20Sopenharmony_ci	 * yet, try to fill the temp buffer by decoding more data from the
4858c2ecf20Sopenharmony_ci	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
4868c2ecf20Sopenharmony_ci	 * fill the actual output buffer by copying filtered data from temp.
4878c2ecf20Sopenharmony_ci	 * A mix of filtered and unfiltered data may be left in temp; it will
4888c2ecf20Sopenharmony_ci	 * be taken care on the next call to this function.
4898c2ecf20Sopenharmony_ci	 */
4908c2ecf20Sopenharmony_ci	if (b->out_pos < b->out_size) {
4918c2ecf20Sopenharmony_ci		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
4928c2ecf20Sopenharmony_ci		s->out = b->out;
4938c2ecf20Sopenharmony_ci		s->out_pos = b->out_pos;
4948c2ecf20Sopenharmony_ci		s->out_size = b->out_size;
4958c2ecf20Sopenharmony_ci		b->out = s->temp.buf;
4968c2ecf20Sopenharmony_ci		b->out_pos = s->temp.size;
4978c2ecf20Sopenharmony_ci		b->out_size = sizeof(s->temp.buf);
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci		s->ret = xz_dec_lzma2_run(lzma2, b);
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci		s->temp.size = b->out_pos;
5028c2ecf20Sopenharmony_ci		b->out = s->out;
5038c2ecf20Sopenharmony_ci		b->out_pos = s->out_pos;
5048c2ecf20Sopenharmony_ci		b->out_size = s->out_size;
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
5078c2ecf20Sopenharmony_ci			return s->ret;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci		/*
5128c2ecf20Sopenharmony_ci		 * If the next filter returned XZ_STREAM_END, we mark that
5138c2ecf20Sopenharmony_ci		 * everything is filtered, since the last unfiltered bytes
5148c2ecf20Sopenharmony_ci		 * of the stream are meant to be left as is.
5158c2ecf20Sopenharmony_ci		 */
5168c2ecf20Sopenharmony_ci		if (s->ret == XZ_STREAM_END)
5178c2ecf20Sopenharmony_ci			s->temp.filtered = s->temp.size;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci		bcj_flush(s, b);
5208c2ecf20Sopenharmony_ci		if (s->temp.filtered > 0)
5218c2ecf20Sopenharmony_ci			return XZ_OK;
5228c2ecf20Sopenharmony_ci	}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	return s->ret;
5258c2ecf20Sopenharmony_ci}
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ciXZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
5288c2ecf20Sopenharmony_ci{
5298c2ecf20Sopenharmony_ci	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
5308c2ecf20Sopenharmony_ci	if (s != NULL)
5318c2ecf20Sopenharmony_ci		s->single_call = single_call;
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	return s;
5348c2ecf20Sopenharmony_ci}
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ciXZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
5378c2ecf20Sopenharmony_ci{
5388c2ecf20Sopenharmony_ci	switch (id) {
5398c2ecf20Sopenharmony_ci#ifdef XZ_DEC_X86
5408c2ecf20Sopenharmony_ci	case BCJ_X86:
5418c2ecf20Sopenharmony_ci#endif
5428c2ecf20Sopenharmony_ci#ifdef XZ_DEC_POWERPC
5438c2ecf20Sopenharmony_ci	case BCJ_POWERPC:
5448c2ecf20Sopenharmony_ci#endif
5458c2ecf20Sopenharmony_ci#ifdef XZ_DEC_IA64
5468c2ecf20Sopenharmony_ci	case BCJ_IA64:
5478c2ecf20Sopenharmony_ci#endif
5488c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARM
5498c2ecf20Sopenharmony_ci	case BCJ_ARM:
5508c2ecf20Sopenharmony_ci#endif
5518c2ecf20Sopenharmony_ci#ifdef XZ_DEC_ARMTHUMB
5528c2ecf20Sopenharmony_ci	case BCJ_ARMTHUMB:
5538c2ecf20Sopenharmony_ci#endif
5548c2ecf20Sopenharmony_ci#ifdef XZ_DEC_SPARC
5558c2ecf20Sopenharmony_ci	case BCJ_SPARC:
5568c2ecf20Sopenharmony_ci#endif
5578c2ecf20Sopenharmony_ci		break;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	default:
5608c2ecf20Sopenharmony_ci		/* Unsupported Filter ID */
5618c2ecf20Sopenharmony_ci		return XZ_OPTIONS_ERROR;
5628c2ecf20Sopenharmony_ci	}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	s->type = id;
5658c2ecf20Sopenharmony_ci	s->ret = XZ_OK;
5668c2ecf20Sopenharmony_ci	s->pos = 0;
5678c2ecf20Sopenharmony_ci	s->x86_prev_mask = 0;
5688c2ecf20Sopenharmony_ci	s->temp.filtered = 0;
5698c2ecf20Sopenharmony_ci	s->temp.size = 0;
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	return XZ_OK;
5728c2ecf20Sopenharmony_ci}
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci#endif
575