18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: LGPL-2.1+
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright 2016 Tom aan de Wiel
48c2ecf20Sopenharmony_ci * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
98c2ecf20Sopenharmony_ci * R.D. Brown, 1977
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/string.h>
138c2ecf20Sopenharmony_ci#include <linux/kernel.h>
148c2ecf20Sopenharmony_ci#include "codec-fwht.h"
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#define OVERFLOW_BIT BIT(14)
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci/*
198c2ecf20Sopenharmony_ci * Note: bit 0 of the header must always be 0. Otherwise it cannot
208c2ecf20Sopenharmony_ci * be guaranteed that the magic 8 byte sequence (see below) can
218c2ecf20Sopenharmony_ci * never occur in the rlc output.
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci#define PFRAME_BIT BIT(15)
248c2ecf20Sopenharmony_ci#define DUPS_MASK 0x1ffe
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#define PBLOCK 0
278c2ecf20Sopenharmony_ci#define IBLOCK 1
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#define ALL_ZEROS 15
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_cistatic const uint8_t zigzag[64] = {
328c2ecf20Sopenharmony_ci	0,
338c2ecf20Sopenharmony_ci	1,  8,
348c2ecf20Sopenharmony_ci	2,  9, 16,
358c2ecf20Sopenharmony_ci	3, 10, 17, 24,
368c2ecf20Sopenharmony_ci	4, 11, 18, 25, 32,
378c2ecf20Sopenharmony_ci	5, 12, 19, 26, 33, 40,
388c2ecf20Sopenharmony_ci	6, 13, 20, 27, 34, 41, 48,
398c2ecf20Sopenharmony_ci	7, 14, 21, 28, 35, 42, 49, 56,
408c2ecf20Sopenharmony_ci	15, 22, 29, 36, 43, 50, 57,
418c2ecf20Sopenharmony_ci	23, 30, 37, 44, 51, 58,
428c2ecf20Sopenharmony_ci	31, 38, 45, 52, 59,
438c2ecf20Sopenharmony_ci	39, 46, 53, 60,
448c2ecf20Sopenharmony_ci	47, 54, 61,
458c2ecf20Sopenharmony_ci	55, 62,
468c2ecf20Sopenharmony_ci	63,
478c2ecf20Sopenharmony_ci};
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/*
508c2ecf20Sopenharmony_ci * noinline_for_stack to work around
518c2ecf20Sopenharmony_ci * https://bugs.llvm.org/show_bug.cgi?id=38809
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_cistatic int noinline_for_stack
548c2ecf20Sopenharmony_cirlc(const s16 *in, __be16 *output, int blocktype)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	s16 block[8 * 8];
578c2ecf20Sopenharmony_ci	s16 *wp = block;
588c2ecf20Sopenharmony_ci	int i = 0;
598c2ecf20Sopenharmony_ci	int x, y;
608c2ecf20Sopenharmony_ci	int ret = 0;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	/* read in block from framebuffer */
638c2ecf20Sopenharmony_ci	int lastzero_run = 0;
648c2ecf20Sopenharmony_ci	int to_encode;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	for (y = 0; y < 8; y++) {
678c2ecf20Sopenharmony_ci		for (x = 0; x < 8; x++) {
688c2ecf20Sopenharmony_ci			*wp = in[x + y * 8];
698c2ecf20Sopenharmony_ci			wp++;
708c2ecf20Sopenharmony_ci		}
718c2ecf20Sopenharmony_ci	}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	/* keep track of amount of trailing zeros */
748c2ecf20Sopenharmony_ci	for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
758c2ecf20Sopenharmony_ci		lastzero_run++;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	*output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
788c2ecf20Sopenharmony_ci	ret++;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	i = 0;
838c2ecf20Sopenharmony_ci	while (i < to_encode) {
848c2ecf20Sopenharmony_ci		int cnt = 0;
858c2ecf20Sopenharmony_ci		int tmp;
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci		/* count leading zeros */
888c2ecf20Sopenharmony_ci		while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
898c2ecf20Sopenharmony_ci			cnt++;
908c2ecf20Sopenharmony_ci			i++;
918c2ecf20Sopenharmony_ci			if (i == to_encode) {
928c2ecf20Sopenharmony_ci				cnt--;
938c2ecf20Sopenharmony_ci				break;
948c2ecf20Sopenharmony_ci			}
958c2ecf20Sopenharmony_ci		}
968c2ecf20Sopenharmony_ci		/* 4 bits for run, 12 for coefficient (quantization by 4) */
978c2ecf20Sopenharmony_ci		*output++ = htons((cnt | tmp << 4));
988c2ecf20Sopenharmony_ci		i++;
998c2ecf20Sopenharmony_ci		ret++;
1008c2ecf20Sopenharmony_ci	}
1018c2ecf20Sopenharmony_ci	if (lastzero_run > 14) {
1028c2ecf20Sopenharmony_ci		*output = htons(ALL_ZEROS | 0);
1038c2ecf20Sopenharmony_ci		ret++;
1048c2ecf20Sopenharmony_ci	}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	return ret;
1078c2ecf20Sopenharmony_ci}
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci/*
1108c2ecf20Sopenharmony_ci * This function will worst-case increase rlc_in by 65*2 bytes:
1118c2ecf20Sopenharmony_ci * one s16 value for the header and 8 * 8 coefficients of type s16.
1128c2ecf20Sopenharmony_ci */
1138c2ecf20Sopenharmony_cistatic noinline_for_stack u16
1148c2ecf20Sopenharmony_ciderlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
1158c2ecf20Sopenharmony_ci{
1168c2ecf20Sopenharmony_ci	/* header */
1178c2ecf20Sopenharmony_ci	const __be16 *input = *rlc_in;
1188c2ecf20Sopenharmony_ci	u16 stat;
1198c2ecf20Sopenharmony_ci	int dec_count = 0;
1208c2ecf20Sopenharmony_ci	s16 block[8 * 8 + 16];
1218c2ecf20Sopenharmony_ci	s16 *wp = block;
1228c2ecf20Sopenharmony_ci	int i;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	if (input > end_of_input)
1258c2ecf20Sopenharmony_ci		return OVERFLOW_BIT;
1268c2ecf20Sopenharmony_ci	stat = ntohs(*input++);
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	/*
1298c2ecf20Sopenharmony_ci	 * Now de-compress, it expands one byte to up to 15 bytes
1308c2ecf20Sopenharmony_ci	 * (or fills the remainder of the 64 bytes with zeroes if it
1318c2ecf20Sopenharmony_ci	 * is the last byte to expand).
1328c2ecf20Sopenharmony_ci	 *
1338c2ecf20Sopenharmony_ci	 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
1348c2ecf20Sopenharmony_ci	 * allow for overflow if the incoming data was malformed.
1358c2ecf20Sopenharmony_ci	 */
1368c2ecf20Sopenharmony_ci	while (dec_count < 8 * 8) {
1378c2ecf20Sopenharmony_ci		s16 in;
1388c2ecf20Sopenharmony_ci		int length;
1398c2ecf20Sopenharmony_ci		int coeff;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci		if (input > end_of_input)
1428c2ecf20Sopenharmony_ci			return OVERFLOW_BIT;
1438c2ecf20Sopenharmony_ci		in = ntohs(*input++);
1448c2ecf20Sopenharmony_ci		length = in & 0xf;
1458c2ecf20Sopenharmony_ci		coeff = in >> 4;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci		/* fill remainder with zeros */
1488c2ecf20Sopenharmony_ci		if (length == 15) {
1498c2ecf20Sopenharmony_ci			for (i = 0; i < 64 - dec_count; i++)
1508c2ecf20Sopenharmony_ci				*wp++ = 0;
1518c2ecf20Sopenharmony_ci			break;
1528c2ecf20Sopenharmony_ci		}
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci		for (i = 0; i < length; i++)
1558c2ecf20Sopenharmony_ci			*wp++ = 0;
1568c2ecf20Sopenharmony_ci		*wp++ = coeff;
1578c2ecf20Sopenharmony_ci		dec_count += length + 1;
1588c2ecf20Sopenharmony_ci	}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	wp = block;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	for (i = 0; i < 64; i++) {
1638c2ecf20Sopenharmony_ci		int pos = zigzag[i];
1648c2ecf20Sopenharmony_ci		int y = pos / 8;
1658c2ecf20Sopenharmony_ci		int x = pos % 8;
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		dwht_out[x + y * 8] = *wp++;
1688c2ecf20Sopenharmony_ci	}
1698c2ecf20Sopenharmony_ci	*rlc_in = input;
1708c2ecf20Sopenharmony_ci	return stat;
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_cistatic const int quant_table[] = {
1748c2ecf20Sopenharmony_ci	2, 2, 2, 2, 2, 2,  2,  2,
1758c2ecf20Sopenharmony_ci	2, 2, 2, 2, 2, 2,  2,  2,
1768c2ecf20Sopenharmony_ci	2, 2, 2, 2, 2, 2,  2,  3,
1778c2ecf20Sopenharmony_ci	2, 2, 2, 2, 2, 2,  3,  6,
1788c2ecf20Sopenharmony_ci	2, 2, 2, 2, 2, 3,  6,  6,
1798c2ecf20Sopenharmony_ci	2, 2, 2, 2, 3, 6,  6,  6,
1808c2ecf20Sopenharmony_ci	2, 2, 2, 3, 6, 6,  6,  6,
1818c2ecf20Sopenharmony_ci	2, 2, 3, 6, 6, 6,  6,  8,
1828c2ecf20Sopenharmony_ci};
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_cistatic const int quant_table_p[] = {
1858c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 3,  3,  3,
1868c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 3,  3,  3,
1878c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 3,  3,  3,
1888c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 3,  3,  6,
1898c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 3,  6,  6,
1908c2ecf20Sopenharmony_ci	3, 3, 3, 3, 3, 6,  6,  9,
1918c2ecf20Sopenharmony_ci	3, 3, 3, 3, 6, 6,  9,  9,
1928c2ecf20Sopenharmony_ci	3, 3, 3, 6, 6, 9,  9,  10,
1938c2ecf20Sopenharmony_ci};
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_cistatic void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
1968c2ecf20Sopenharmony_ci{
1978c2ecf20Sopenharmony_ci	const int *quant = quant_table;
1988c2ecf20Sopenharmony_ci	int i, j;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	for (j = 0; j < 8; j++) {
2018c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
2028c2ecf20Sopenharmony_ci			*coeff >>= *quant;
2038c2ecf20Sopenharmony_ci			if (*coeff >= -qp && *coeff <= qp)
2048c2ecf20Sopenharmony_ci				*coeff = *de_coeff = 0;
2058c2ecf20Sopenharmony_ci			else
2068c2ecf20Sopenharmony_ci				*de_coeff = *coeff << *quant;
2078c2ecf20Sopenharmony_ci		}
2088c2ecf20Sopenharmony_ci	}
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic void dequantize_intra(s16 *coeff)
2128c2ecf20Sopenharmony_ci{
2138c2ecf20Sopenharmony_ci	const int *quant = quant_table;
2148c2ecf20Sopenharmony_ci	int i, j;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	for (j = 0; j < 8; j++)
2178c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++, quant++, coeff++)
2188c2ecf20Sopenharmony_ci			*coeff <<= *quant;
2198c2ecf20Sopenharmony_ci}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_cistatic void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
2228c2ecf20Sopenharmony_ci{
2238c2ecf20Sopenharmony_ci	const int *quant = quant_table_p;
2248c2ecf20Sopenharmony_ci	int i, j;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	for (j = 0; j < 8; j++) {
2278c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
2288c2ecf20Sopenharmony_ci			*coeff >>= *quant;
2298c2ecf20Sopenharmony_ci			if (*coeff >= -qp && *coeff <= qp)
2308c2ecf20Sopenharmony_ci				*coeff = *de_coeff = 0;
2318c2ecf20Sopenharmony_ci			else
2328c2ecf20Sopenharmony_ci				*de_coeff = *coeff << *quant;
2338c2ecf20Sopenharmony_ci		}
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci}
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_cistatic void dequantize_inter(s16 *coeff)
2388c2ecf20Sopenharmony_ci{
2398c2ecf20Sopenharmony_ci	const int *quant = quant_table_p;
2408c2ecf20Sopenharmony_ci	int i, j;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	for (j = 0; j < 8; j++)
2438c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++, quant++, coeff++)
2448c2ecf20Sopenharmony_ci			*coeff <<= *quant;
2458c2ecf20Sopenharmony_ci}
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_cistatic void noinline_for_stack fwht(const u8 *block, s16 *output_block,
2488c2ecf20Sopenharmony_ci				    unsigned int stride,
2498c2ecf20Sopenharmony_ci				    unsigned int input_step, bool intra)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	/* we'll need more than 8 bits for the transformed coefficients */
2528c2ecf20Sopenharmony_ci	s32 workspace1[8], workspace2[8];
2538c2ecf20Sopenharmony_ci	const u8 *tmp = block;
2548c2ecf20Sopenharmony_ci	s16 *out = output_block;
2558c2ecf20Sopenharmony_ci	int add = intra ? 256 : 0;
2568c2ecf20Sopenharmony_ci	unsigned int i;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	/* stage 1 */
2598c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, tmp += stride, out += 8) {
2608c2ecf20Sopenharmony_ci		switch (input_step) {
2618c2ecf20Sopenharmony_ci		case 1:
2628c2ecf20Sopenharmony_ci			workspace1[0]  = tmp[0] + tmp[1] - add;
2638c2ecf20Sopenharmony_ci			workspace1[1]  = tmp[0] - tmp[1];
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci			workspace1[2]  = tmp[2] + tmp[3] - add;
2668c2ecf20Sopenharmony_ci			workspace1[3]  = tmp[2] - tmp[3];
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci			workspace1[4]  = tmp[4] + tmp[5] - add;
2698c2ecf20Sopenharmony_ci			workspace1[5]  = tmp[4] - tmp[5];
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci			workspace1[6]  = tmp[6] + tmp[7] - add;
2728c2ecf20Sopenharmony_ci			workspace1[7]  = tmp[6] - tmp[7];
2738c2ecf20Sopenharmony_ci			break;
2748c2ecf20Sopenharmony_ci		case 2:
2758c2ecf20Sopenharmony_ci			workspace1[0]  = tmp[0] + tmp[2] - add;
2768c2ecf20Sopenharmony_ci			workspace1[1]  = tmp[0] - tmp[2];
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci			workspace1[2]  = tmp[4] + tmp[6] - add;
2798c2ecf20Sopenharmony_ci			workspace1[3]  = tmp[4] - tmp[6];
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci			workspace1[4]  = tmp[8] + tmp[10] - add;
2828c2ecf20Sopenharmony_ci			workspace1[5]  = tmp[8] - tmp[10];
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci			workspace1[6]  = tmp[12] + tmp[14] - add;
2858c2ecf20Sopenharmony_ci			workspace1[7]  = tmp[12] - tmp[14];
2868c2ecf20Sopenharmony_ci			break;
2878c2ecf20Sopenharmony_ci		case 3:
2888c2ecf20Sopenharmony_ci			workspace1[0]  = tmp[0] + tmp[3] - add;
2898c2ecf20Sopenharmony_ci			workspace1[1]  = tmp[0] - tmp[3];
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci			workspace1[2]  = tmp[6] + tmp[9] - add;
2928c2ecf20Sopenharmony_ci			workspace1[3]  = tmp[6] - tmp[9];
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci			workspace1[4]  = tmp[12] + tmp[15] - add;
2958c2ecf20Sopenharmony_ci			workspace1[5]  = tmp[12] - tmp[15];
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci			workspace1[6]  = tmp[18] + tmp[21] - add;
2988c2ecf20Sopenharmony_ci			workspace1[7]  = tmp[18] - tmp[21];
2998c2ecf20Sopenharmony_ci			break;
3008c2ecf20Sopenharmony_ci		default:
3018c2ecf20Sopenharmony_ci			workspace1[0]  = tmp[0] + tmp[4] - add;
3028c2ecf20Sopenharmony_ci			workspace1[1]  = tmp[0] - tmp[4];
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci			workspace1[2]  = tmp[8] + tmp[12] - add;
3058c2ecf20Sopenharmony_ci			workspace1[3]  = tmp[8] - tmp[12];
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci			workspace1[4]  = tmp[16] + tmp[20] - add;
3088c2ecf20Sopenharmony_ci			workspace1[5]  = tmp[16] - tmp[20];
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci			workspace1[6]  = tmp[24] + tmp[28] - add;
3118c2ecf20Sopenharmony_ci			workspace1[7]  = tmp[24] - tmp[28];
3128c2ecf20Sopenharmony_ci			break;
3138c2ecf20Sopenharmony_ci		}
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci		/* stage 2 */
3168c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
3178c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
3188c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
3198c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
3228c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
3238c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
3248c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci		/* stage 3 */
3278c2ecf20Sopenharmony_ci		out[0] = workspace2[0] + workspace2[4];
3288c2ecf20Sopenharmony_ci		out[1] = workspace2[0] - workspace2[4];
3298c2ecf20Sopenharmony_ci		out[2] = workspace2[1] - workspace2[5];
3308c2ecf20Sopenharmony_ci		out[3] = workspace2[1] + workspace2[5];
3318c2ecf20Sopenharmony_ci		out[4] = workspace2[2] + workspace2[6];
3328c2ecf20Sopenharmony_ci		out[5] = workspace2[2] - workspace2[6];
3338c2ecf20Sopenharmony_ci		out[6] = workspace2[3] - workspace2[7];
3348c2ecf20Sopenharmony_ci		out[7] = workspace2[3] + workspace2[7];
3358c2ecf20Sopenharmony_ci	}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	out = output_block;
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, out++) {
3408c2ecf20Sopenharmony_ci		/* stage 1 */
3418c2ecf20Sopenharmony_ci		workspace1[0]  = out[0] + out[1 * 8];
3428c2ecf20Sopenharmony_ci		workspace1[1]  = out[0] - out[1 * 8];
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci		workspace1[2]  = out[2 * 8] + out[3 * 8];
3458c2ecf20Sopenharmony_ci		workspace1[3]  = out[2 * 8] - out[3 * 8];
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci		workspace1[4]  = out[4 * 8] + out[5 * 8];
3488c2ecf20Sopenharmony_ci		workspace1[5]  = out[4 * 8] - out[5 * 8];
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci		workspace1[6]  = out[6 * 8] + out[7 * 8];
3518c2ecf20Sopenharmony_ci		workspace1[7]  = out[6 * 8] - out[7 * 8];
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci		/* stage 2 */
3548c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
3558c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
3568c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
3578c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
3608c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
3618c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
3628c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
3638c2ecf20Sopenharmony_ci		/* stage 3 */
3648c2ecf20Sopenharmony_ci		out[0 * 8] = workspace2[0] + workspace2[4];
3658c2ecf20Sopenharmony_ci		out[1 * 8] = workspace2[0] - workspace2[4];
3668c2ecf20Sopenharmony_ci		out[2 * 8] = workspace2[1] - workspace2[5];
3678c2ecf20Sopenharmony_ci		out[3 * 8] = workspace2[1] + workspace2[5];
3688c2ecf20Sopenharmony_ci		out[4 * 8] = workspace2[2] + workspace2[6];
3698c2ecf20Sopenharmony_ci		out[5 * 8] = workspace2[2] - workspace2[6];
3708c2ecf20Sopenharmony_ci		out[6 * 8] = workspace2[3] - workspace2[7];
3718c2ecf20Sopenharmony_ci		out[7 * 8] = workspace2[3] + workspace2[7];
3728c2ecf20Sopenharmony_ci	}
3738c2ecf20Sopenharmony_ci}
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci/*
3768c2ecf20Sopenharmony_ci * Not the nicest way of doing it, but P-blocks get twice the range of
3778c2ecf20Sopenharmony_ci * that of the I-blocks. Therefore we need a type bigger than 8 bits.
3788c2ecf20Sopenharmony_ci * Furthermore values can be negative... This is just a version that
3798c2ecf20Sopenharmony_ci * works with 16 signed data
3808c2ecf20Sopenharmony_ci */
3818c2ecf20Sopenharmony_cistatic void noinline_for_stack
3828c2ecf20Sopenharmony_cifwht16(const s16 *block, s16 *output_block, int stride, int intra)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	/* we'll need more than 8 bits for the transformed coefficients */
3858c2ecf20Sopenharmony_ci	s32 workspace1[8], workspace2[8];
3868c2ecf20Sopenharmony_ci	const s16 *tmp = block;
3878c2ecf20Sopenharmony_ci	s16 *out = output_block;
3888c2ecf20Sopenharmony_ci	int i;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, tmp += stride, out += 8) {
3918c2ecf20Sopenharmony_ci		/* stage 1 */
3928c2ecf20Sopenharmony_ci		workspace1[0]  = tmp[0] + tmp[1];
3938c2ecf20Sopenharmony_ci		workspace1[1]  = tmp[0] - tmp[1];
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci		workspace1[2]  = tmp[2] + tmp[3];
3968c2ecf20Sopenharmony_ci		workspace1[3]  = tmp[2] - tmp[3];
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci		workspace1[4]  = tmp[4] + tmp[5];
3998c2ecf20Sopenharmony_ci		workspace1[5]  = tmp[4] - tmp[5];
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci		workspace1[6]  = tmp[6] + tmp[7];
4028c2ecf20Sopenharmony_ci		workspace1[7]  = tmp[6] - tmp[7];
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci		/* stage 2 */
4058c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
4068c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
4078c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
4088c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
4118c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
4128c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
4138c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci		/* stage 3 */
4168c2ecf20Sopenharmony_ci		out[0] = workspace2[0] + workspace2[4];
4178c2ecf20Sopenharmony_ci		out[1] = workspace2[0] - workspace2[4];
4188c2ecf20Sopenharmony_ci		out[2] = workspace2[1] - workspace2[5];
4198c2ecf20Sopenharmony_ci		out[3] = workspace2[1] + workspace2[5];
4208c2ecf20Sopenharmony_ci		out[4] = workspace2[2] + workspace2[6];
4218c2ecf20Sopenharmony_ci		out[5] = workspace2[2] - workspace2[6];
4228c2ecf20Sopenharmony_ci		out[6] = workspace2[3] - workspace2[7];
4238c2ecf20Sopenharmony_ci		out[7] = workspace2[3] + workspace2[7];
4248c2ecf20Sopenharmony_ci	}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	out = output_block;
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, out++) {
4298c2ecf20Sopenharmony_ci		/* stage 1 */
4308c2ecf20Sopenharmony_ci		workspace1[0]  = out[0] + out[1*8];
4318c2ecf20Sopenharmony_ci		workspace1[1]  = out[0] - out[1*8];
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci		workspace1[2]  = out[2*8] + out[3*8];
4348c2ecf20Sopenharmony_ci		workspace1[3]  = out[2*8] - out[3*8];
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci		workspace1[4]  = out[4*8] + out[5*8];
4378c2ecf20Sopenharmony_ci		workspace1[5]  = out[4*8] - out[5*8];
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci		workspace1[6]  = out[6*8] + out[7*8];
4408c2ecf20Sopenharmony_ci		workspace1[7]  = out[6*8] - out[7*8];
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci		/* stage 2 */
4438c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
4448c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
4458c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
4468c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
4498c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
4508c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
4518c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci		/* stage 3 */
4548c2ecf20Sopenharmony_ci		out[0*8] = workspace2[0] + workspace2[4];
4558c2ecf20Sopenharmony_ci		out[1*8] = workspace2[0] - workspace2[4];
4568c2ecf20Sopenharmony_ci		out[2*8] = workspace2[1] - workspace2[5];
4578c2ecf20Sopenharmony_ci		out[3*8] = workspace2[1] + workspace2[5];
4588c2ecf20Sopenharmony_ci		out[4*8] = workspace2[2] + workspace2[6];
4598c2ecf20Sopenharmony_ci		out[5*8] = workspace2[2] - workspace2[6];
4608c2ecf20Sopenharmony_ci		out[6*8] = workspace2[3] - workspace2[7];
4618c2ecf20Sopenharmony_ci		out[7*8] = workspace2[3] + workspace2[7];
4628c2ecf20Sopenharmony_ci	}
4638c2ecf20Sopenharmony_ci}
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_cistatic noinline_for_stack void
4668c2ecf20Sopenharmony_ciifwht(const s16 *block, s16 *output_block, int intra)
4678c2ecf20Sopenharmony_ci{
4688c2ecf20Sopenharmony_ci	/*
4698c2ecf20Sopenharmony_ci	 * we'll need more than 8 bits for the transformed coefficients
4708c2ecf20Sopenharmony_ci	 * use native unit of cpu
4718c2ecf20Sopenharmony_ci	 */
4728c2ecf20Sopenharmony_ci	int workspace1[8], workspace2[8];
4738c2ecf20Sopenharmony_ci	int inter = intra ? 0 : 1;
4748c2ecf20Sopenharmony_ci	const s16 *tmp = block;
4758c2ecf20Sopenharmony_ci	s16 *out = output_block;
4768c2ecf20Sopenharmony_ci	int i;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, tmp += 8, out += 8) {
4798c2ecf20Sopenharmony_ci		/* stage 1 */
4808c2ecf20Sopenharmony_ci		workspace1[0]  = tmp[0] + tmp[1];
4818c2ecf20Sopenharmony_ci		workspace1[1]  = tmp[0] - tmp[1];
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci		workspace1[2]  = tmp[2] + tmp[3];
4848c2ecf20Sopenharmony_ci		workspace1[3]  = tmp[2] - tmp[3];
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci		workspace1[4]  = tmp[4] + tmp[5];
4878c2ecf20Sopenharmony_ci		workspace1[5]  = tmp[4] - tmp[5];
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci		workspace1[6]  = tmp[6] + tmp[7];
4908c2ecf20Sopenharmony_ci		workspace1[7]  = tmp[6] - tmp[7];
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci		/* stage 2 */
4938c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
4948c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
4958c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
4968c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
4998c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
5008c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
5018c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci		/* stage 3 */
5048c2ecf20Sopenharmony_ci		out[0] = workspace2[0] + workspace2[4];
5058c2ecf20Sopenharmony_ci		out[1] = workspace2[0] - workspace2[4];
5068c2ecf20Sopenharmony_ci		out[2] = workspace2[1] - workspace2[5];
5078c2ecf20Sopenharmony_ci		out[3] = workspace2[1] + workspace2[5];
5088c2ecf20Sopenharmony_ci		out[4] = workspace2[2] + workspace2[6];
5098c2ecf20Sopenharmony_ci		out[5] = workspace2[2] - workspace2[6];
5108c2ecf20Sopenharmony_ci		out[6] = workspace2[3] - workspace2[7];
5118c2ecf20Sopenharmony_ci		out[7] = workspace2[3] + workspace2[7];
5128c2ecf20Sopenharmony_ci	}
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	out = output_block;
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++, out++) {
5178c2ecf20Sopenharmony_ci		/* stage 1 */
5188c2ecf20Sopenharmony_ci		workspace1[0]  = out[0] + out[1 * 8];
5198c2ecf20Sopenharmony_ci		workspace1[1]  = out[0] - out[1 * 8];
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci		workspace1[2]  = out[2 * 8] + out[3 * 8];
5228c2ecf20Sopenharmony_ci		workspace1[3]  = out[2 * 8] - out[3 * 8];
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci		workspace1[4]  = out[4 * 8] + out[5 * 8];
5258c2ecf20Sopenharmony_ci		workspace1[5]  = out[4 * 8] - out[5 * 8];
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci		workspace1[6]  = out[6 * 8] + out[7 * 8];
5288c2ecf20Sopenharmony_ci		workspace1[7]  = out[6 * 8] - out[7 * 8];
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci		/* stage 2 */
5318c2ecf20Sopenharmony_ci		workspace2[0] = workspace1[0] + workspace1[2];
5328c2ecf20Sopenharmony_ci		workspace2[1] = workspace1[0] - workspace1[2];
5338c2ecf20Sopenharmony_ci		workspace2[2] = workspace1[1] - workspace1[3];
5348c2ecf20Sopenharmony_ci		workspace2[3] = workspace1[1] + workspace1[3];
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci		workspace2[4] = workspace1[4] + workspace1[6];
5378c2ecf20Sopenharmony_ci		workspace2[5] = workspace1[4] - workspace1[6];
5388c2ecf20Sopenharmony_ci		workspace2[6] = workspace1[5] - workspace1[7];
5398c2ecf20Sopenharmony_ci		workspace2[7] = workspace1[5] + workspace1[7];
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci		/* stage 3 */
5428c2ecf20Sopenharmony_ci		if (inter) {
5438c2ecf20Sopenharmony_ci			int d;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci			out[0 * 8] = workspace2[0] + workspace2[4];
5468c2ecf20Sopenharmony_ci			out[1 * 8] = workspace2[0] - workspace2[4];
5478c2ecf20Sopenharmony_ci			out[2 * 8] = workspace2[1] - workspace2[5];
5488c2ecf20Sopenharmony_ci			out[3 * 8] = workspace2[1] + workspace2[5];
5498c2ecf20Sopenharmony_ci			out[4 * 8] = workspace2[2] + workspace2[6];
5508c2ecf20Sopenharmony_ci			out[5 * 8] = workspace2[2] - workspace2[6];
5518c2ecf20Sopenharmony_ci			out[6 * 8] = workspace2[3] - workspace2[7];
5528c2ecf20Sopenharmony_ci			out[7 * 8] = workspace2[3] + workspace2[7];
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci			for (d = 0; d < 8; d++)
5558c2ecf20Sopenharmony_ci				out[8 * d] >>= 6;
5568c2ecf20Sopenharmony_ci		} else {
5578c2ecf20Sopenharmony_ci			int d;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci			out[0 * 8] = workspace2[0] + workspace2[4];
5608c2ecf20Sopenharmony_ci			out[1 * 8] = workspace2[0] - workspace2[4];
5618c2ecf20Sopenharmony_ci			out[2 * 8] = workspace2[1] - workspace2[5];
5628c2ecf20Sopenharmony_ci			out[3 * 8] = workspace2[1] + workspace2[5];
5638c2ecf20Sopenharmony_ci			out[4 * 8] = workspace2[2] + workspace2[6];
5648c2ecf20Sopenharmony_ci			out[5 * 8] = workspace2[2] - workspace2[6];
5658c2ecf20Sopenharmony_ci			out[6 * 8] = workspace2[3] - workspace2[7];
5668c2ecf20Sopenharmony_ci			out[7 * 8] = workspace2[3] + workspace2[7];
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci			for (d = 0; d < 8; d++) {
5698c2ecf20Sopenharmony_ci				out[8 * d] >>= 6;
5708c2ecf20Sopenharmony_ci				out[8 * d] += 128;
5718c2ecf20Sopenharmony_ci			}
5728c2ecf20Sopenharmony_ci		}
5738c2ecf20Sopenharmony_ci	}
5748c2ecf20Sopenharmony_ci}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_cistatic void fill_encoder_block(const u8 *input, s16 *dst,
5778c2ecf20Sopenharmony_ci			       unsigned int stride, unsigned int input_step)
5788c2ecf20Sopenharmony_ci{
5798c2ecf20Sopenharmony_ci	int i, j;
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++) {
5828c2ecf20Sopenharmony_ci		for (j = 0; j < 8; j++, input += input_step)
5838c2ecf20Sopenharmony_ci			*dst++ = *input;
5848c2ecf20Sopenharmony_ci		input += stride - 8 * input_step;
5858c2ecf20Sopenharmony_ci	}
5868c2ecf20Sopenharmony_ci}
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_cistatic int var_intra(const s16 *input)
5898c2ecf20Sopenharmony_ci{
5908c2ecf20Sopenharmony_ci	int32_t mean = 0;
5918c2ecf20Sopenharmony_ci	int32_t ret = 0;
5928c2ecf20Sopenharmony_ci	const s16 *tmp = input;
5938c2ecf20Sopenharmony_ci	int i;
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	for (i = 0; i < 8 * 8; i++, tmp++)
5968c2ecf20Sopenharmony_ci		mean += *tmp;
5978c2ecf20Sopenharmony_ci	mean /= 64;
5988c2ecf20Sopenharmony_ci	tmp = input;
5998c2ecf20Sopenharmony_ci	for (i = 0; i < 8 * 8; i++, tmp++)
6008c2ecf20Sopenharmony_ci		ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
6018c2ecf20Sopenharmony_ci	return ret;
6028c2ecf20Sopenharmony_ci}
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_cistatic int var_inter(const s16 *old, const s16 *new)
6058c2ecf20Sopenharmony_ci{
6068c2ecf20Sopenharmony_ci	int32_t ret = 0;
6078c2ecf20Sopenharmony_ci	int i;
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	for (i = 0; i < 8 * 8; i++, old++, new++)
6108c2ecf20Sopenharmony_ci		ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
6118c2ecf20Sopenharmony_ci	return ret;
6128c2ecf20Sopenharmony_ci}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_cistatic noinline_for_stack int
6158c2ecf20Sopenharmony_cidecide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock,
6168c2ecf20Sopenharmony_ci		 unsigned int stride, unsigned int input_step)
6178c2ecf20Sopenharmony_ci{
6188c2ecf20Sopenharmony_ci	s16 tmp[64];
6198c2ecf20Sopenharmony_ci	s16 old[64];
6208c2ecf20Sopenharmony_ci	s16 *work = tmp;
6218c2ecf20Sopenharmony_ci	unsigned int k, l;
6228c2ecf20Sopenharmony_ci	int vari;
6238c2ecf20Sopenharmony_ci	int vard;
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci	fill_encoder_block(cur, tmp, stride, input_step);
6268c2ecf20Sopenharmony_ci	fill_encoder_block(reference, old, 8, 1);
6278c2ecf20Sopenharmony_ci	vari = var_intra(tmp);
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	for (k = 0; k < 8; k++) {
6308c2ecf20Sopenharmony_ci		for (l = 0; l < 8; l++) {
6318c2ecf20Sopenharmony_ci			*deltablock = *work - *reference;
6328c2ecf20Sopenharmony_ci			deltablock++;
6338c2ecf20Sopenharmony_ci			work++;
6348c2ecf20Sopenharmony_ci			reference++;
6358c2ecf20Sopenharmony_ci		}
6368c2ecf20Sopenharmony_ci	}
6378c2ecf20Sopenharmony_ci	deltablock -= 64;
6388c2ecf20Sopenharmony_ci	vard = var_inter(old, tmp);
6398c2ecf20Sopenharmony_ci	return vari <= vard ? IBLOCK : PBLOCK;
6408c2ecf20Sopenharmony_ci}
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_cistatic void fill_decoder_block(u8 *dst, const s16 *input, int stride,
6438c2ecf20Sopenharmony_ci			       unsigned int dst_step)
6448c2ecf20Sopenharmony_ci{
6458c2ecf20Sopenharmony_ci	int i, j;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++) {
6488c2ecf20Sopenharmony_ci		for (j = 0; j < 8; j++, input++, dst += dst_step) {
6498c2ecf20Sopenharmony_ci			if (*input < 0)
6508c2ecf20Sopenharmony_ci				*dst = 0;
6518c2ecf20Sopenharmony_ci			else if (*input > 255)
6528c2ecf20Sopenharmony_ci				*dst = 255;
6538c2ecf20Sopenharmony_ci			else
6548c2ecf20Sopenharmony_ci				*dst = *input;
6558c2ecf20Sopenharmony_ci		}
6568c2ecf20Sopenharmony_ci		dst += stride - (8 * dst_step);
6578c2ecf20Sopenharmony_ci	}
6588c2ecf20Sopenharmony_ci}
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_cistatic void add_deltas(s16 *deltas, const u8 *ref, int stride,
6618c2ecf20Sopenharmony_ci		       unsigned int ref_step)
6628c2ecf20Sopenharmony_ci{
6638c2ecf20Sopenharmony_ci	int k, l;
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	for (k = 0; k < 8; k++) {
6668c2ecf20Sopenharmony_ci		for (l = 0; l < 8; l++) {
6678c2ecf20Sopenharmony_ci			*deltas += *ref;
6688c2ecf20Sopenharmony_ci			ref += ref_step;
6698c2ecf20Sopenharmony_ci			/*
6708c2ecf20Sopenharmony_ci			 * Due to quantizing, it might possible that the
6718c2ecf20Sopenharmony_ci			 * decoded coefficients are slightly out of range
6728c2ecf20Sopenharmony_ci			 */
6738c2ecf20Sopenharmony_ci			if (*deltas < 0)
6748c2ecf20Sopenharmony_ci				*deltas = 0;
6758c2ecf20Sopenharmony_ci			else if (*deltas > 255)
6768c2ecf20Sopenharmony_ci				*deltas = 255;
6778c2ecf20Sopenharmony_ci			deltas++;
6788c2ecf20Sopenharmony_ci		}
6798c2ecf20Sopenharmony_ci		ref += stride - (8 * ref_step);
6808c2ecf20Sopenharmony_ci	}
6818c2ecf20Sopenharmony_ci}
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_cistatic u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
6848c2ecf20Sopenharmony_ci			struct fwht_cframe *cf, u32 height, u32 width,
6858c2ecf20Sopenharmony_ci			u32 stride, unsigned int input_step,
6868c2ecf20Sopenharmony_ci			bool is_intra, bool next_is_intra)
6878c2ecf20Sopenharmony_ci{
6888c2ecf20Sopenharmony_ci	u8 *input_start = input;
6898c2ecf20Sopenharmony_ci	__be16 *rlco_start = *rlco;
6908c2ecf20Sopenharmony_ci	s16 deltablock[64];
6918c2ecf20Sopenharmony_ci	__be16 pframe_bit = htons(PFRAME_BIT);
6928c2ecf20Sopenharmony_ci	u32 encoding = 0;
6938c2ecf20Sopenharmony_ci	unsigned int last_size = 0;
6948c2ecf20Sopenharmony_ci	unsigned int i, j;
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci	width = round_up(width, 8);
6978c2ecf20Sopenharmony_ci	height = round_up(height, 8);
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	for (j = 0; j < height / 8; j++) {
7008c2ecf20Sopenharmony_ci		input = input_start + j * 8 * stride;
7018c2ecf20Sopenharmony_ci		for (i = 0; i < width / 8; i++) {
7028c2ecf20Sopenharmony_ci			/* intra code, first frame is always intra coded. */
7038c2ecf20Sopenharmony_ci			int blocktype = IBLOCK;
7048c2ecf20Sopenharmony_ci			unsigned int size;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci			if (!is_intra)
7078c2ecf20Sopenharmony_ci				blocktype = decide_blocktype(input, refp,
7088c2ecf20Sopenharmony_ci					deltablock, stride, input_step);
7098c2ecf20Sopenharmony_ci			if (blocktype == IBLOCK) {
7108c2ecf20Sopenharmony_ci				fwht(input, cf->coeffs, stride, input_step, 1);
7118c2ecf20Sopenharmony_ci				quantize_intra(cf->coeffs, cf->de_coeffs,
7128c2ecf20Sopenharmony_ci					       cf->i_frame_qp);
7138c2ecf20Sopenharmony_ci			} else {
7148c2ecf20Sopenharmony_ci				/* inter code */
7158c2ecf20Sopenharmony_ci				encoding |= FWHT_FRAME_PCODED;
7168c2ecf20Sopenharmony_ci				fwht16(deltablock, cf->coeffs, 8, 0);
7178c2ecf20Sopenharmony_ci				quantize_inter(cf->coeffs, cf->de_coeffs,
7188c2ecf20Sopenharmony_ci					       cf->p_frame_qp);
7198c2ecf20Sopenharmony_ci			}
7208c2ecf20Sopenharmony_ci			if (!next_is_intra) {
7218c2ecf20Sopenharmony_ci				ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
7228c2ecf20Sopenharmony_ci
7238c2ecf20Sopenharmony_ci				if (blocktype == PBLOCK)
7248c2ecf20Sopenharmony_ci					add_deltas(cf->de_fwht, refp, 8, 1);
7258c2ecf20Sopenharmony_ci				fill_decoder_block(refp, cf->de_fwht, 8, 1);
7268c2ecf20Sopenharmony_ci			}
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci			input += 8 * input_step;
7298c2ecf20Sopenharmony_ci			refp += 8 * 8;
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci			size = rlc(cf->coeffs, *rlco, blocktype);
7328c2ecf20Sopenharmony_ci			if (last_size == size &&
7338c2ecf20Sopenharmony_ci			    !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
7348c2ecf20Sopenharmony_ci				__be16 *last_rlco = *rlco - size;
7358c2ecf20Sopenharmony_ci				s16 hdr = ntohs(*last_rlco);
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci				if (!((*last_rlco ^ **rlco) & pframe_bit) &&
7388c2ecf20Sopenharmony_ci				    (hdr & DUPS_MASK) < DUPS_MASK)
7398c2ecf20Sopenharmony_ci					*last_rlco = htons(hdr + 2);
7408c2ecf20Sopenharmony_ci				else
7418c2ecf20Sopenharmony_ci					*rlco += size;
7428c2ecf20Sopenharmony_ci			} else {
7438c2ecf20Sopenharmony_ci				*rlco += size;
7448c2ecf20Sopenharmony_ci			}
7458c2ecf20Sopenharmony_ci			if (*rlco >= rlco_max) {
7468c2ecf20Sopenharmony_ci				encoding |= FWHT_FRAME_UNENCODED;
7478c2ecf20Sopenharmony_ci				goto exit_loop;
7488c2ecf20Sopenharmony_ci			}
7498c2ecf20Sopenharmony_ci			last_size = size;
7508c2ecf20Sopenharmony_ci		}
7518c2ecf20Sopenharmony_ci	}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ciexit_loop:
7548c2ecf20Sopenharmony_ci	if (encoding & FWHT_FRAME_UNENCODED) {
7558c2ecf20Sopenharmony_ci		u8 *out = (u8 *)rlco_start;
7568c2ecf20Sopenharmony_ci		u8 *p;
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_ci		input = input_start;
7598c2ecf20Sopenharmony_ci		/*
7608c2ecf20Sopenharmony_ci		 * The compressed stream should never contain the magic
7618c2ecf20Sopenharmony_ci		 * header, so when we copy the YUV data we replace 0xff
7628c2ecf20Sopenharmony_ci		 * by 0xfe. Since YUV is limited range such values
7638c2ecf20Sopenharmony_ci		 * shouldn't appear anyway.
7648c2ecf20Sopenharmony_ci		 */
7658c2ecf20Sopenharmony_ci		for (j = 0; j < height; j++) {
7668c2ecf20Sopenharmony_ci			for (i = 0, p = input; i < width; i++, p += input_step)
7678c2ecf20Sopenharmony_ci				*out++ = (*p == 0xff) ? 0xfe : *p;
7688c2ecf20Sopenharmony_ci			input += stride;
7698c2ecf20Sopenharmony_ci		}
7708c2ecf20Sopenharmony_ci		*rlco = (__be16 *)out;
7718c2ecf20Sopenharmony_ci		encoding &= ~FWHT_FRAME_PCODED;
7728c2ecf20Sopenharmony_ci	}
7738c2ecf20Sopenharmony_ci	return encoding;
7748c2ecf20Sopenharmony_ci}
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ciu32 fwht_encode_frame(struct fwht_raw_frame *frm,
7778c2ecf20Sopenharmony_ci		      struct fwht_raw_frame *ref_frm,
7788c2ecf20Sopenharmony_ci		      struct fwht_cframe *cf,
7798c2ecf20Sopenharmony_ci		      bool is_intra, bool next_is_intra,
7808c2ecf20Sopenharmony_ci		      unsigned int width, unsigned int height,
7818c2ecf20Sopenharmony_ci		      unsigned int stride, unsigned int chroma_stride)
7828c2ecf20Sopenharmony_ci{
7838c2ecf20Sopenharmony_ci	unsigned int size = height * width;
7848c2ecf20Sopenharmony_ci	__be16 *rlco = cf->rlc_data;
7858c2ecf20Sopenharmony_ci	__be16 *rlco_max;
7868c2ecf20Sopenharmony_ci	u32 encoding;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	rlco_max = rlco + size / 2 - 256;
7898c2ecf20Sopenharmony_ci	encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
7908c2ecf20Sopenharmony_ci				height, width, stride,
7918c2ecf20Sopenharmony_ci				frm->luma_alpha_step, is_intra, next_is_intra);
7928c2ecf20Sopenharmony_ci	if (encoding & FWHT_FRAME_UNENCODED)
7938c2ecf20Sopenharmony_ci		encoding |= FWHT_LUMA_UNENCODED;
7948c2ecf20Sopenharmony_ci	encoding &= ~FWHT_FRAME_UNENCODED;
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	if (frm->components_num >= 3) {
7978c2ecf20Sopenharmony_ci		u32 chroma_h = height / frm->height_div;
7988c2ecf20Sopenharmony_ci		u32 chroma_w = width / frm->width_div;
7998c2ecf20Sopenharmony_ci		unsigned int chroma_size = chroma_h * chroma_w;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci		rlco_max = rlco + chroma_size / 2 - 256;
8028c2ecf20Sopenharmony_ci		encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max,
8038c2ecf20Sopenharmony_ci					 cf, chroma_h, chroma_w,
8048c2ecf20Sopenharmony_ci					 chroma_stride, frm->chroma_step,
8058c2ecf20Sopenharmony_ci					 is_intra, next_is_intra);
8068c2ecf20Sopenharmony_ci		if (encoding & FWHT_FRAME_UNENCODED)
8078c2ecf20Sopenharmony_ci			encoding |= FWHT_CB_UNENCODED;
8088c2ecf20Sopenharmony_ci		encoding &= ~FWHT_FRAME_UNENCODED;
8098c2ecf20Sopenharmony_ci		rlco_max = rlco + chroma_size / 2 - 256;
8108c2ecf20Sopenharmony_ci		encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max,
8118c2ecf20Sopenharmony_ci					 cf, chroma_h, chroma_w,
8128c2ecf20Sopenharmony_ci					 chroma_stride, frm->chroma_step,
8138c2ecf20Sopenharmony_ci					 is_intra, next_is_intra);
8148c2ecf20Sopenharmony_ci		if (encoding & FWHT_FRAME_UNENCODED)
8158c2ecf20Sopenharmony_ci			encoding |= FWHT_CR_UNENCODED;
8168c2ecf20Sopenharmony_ci		encoding &= ~FWHT_FRAME_UNENCODED;
8178c2ecf20Sopenharmony_ci	}
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci	if (frm->components_num == 4) {
8208c2ecf20Sopenharmony_ci		rlco_max = rlco + size / 2 - 256;
8218c2ecf20Sopenharmony_ci		encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco,
8228c2ecf20Sopenharmony_ci					 rlco_max, cf, height, width,
8238c2ecf20Sopenharmony_ci					 stride, frm->luma_alpha_step,
8248c2ecf20Sopenharmony_ci					 is_intra, next_is_intra);
8258c2ecf20Sopenharmony_ci		if (encoding & FWHT_FRAME_UNENCODED)
8268c2ecf20Sopenharmony_ci			encoding |= FWHT_ALPHA_UNENCODED;
8278c2ecf20Sopenharmony_ci		encoding &= ~FWHT_FRAME_UNENCODED;
8288c2ecf20Sopenharmony_ci	}
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci	cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
8318c2ecf20Sopenharmony_ci	return encoding;
8328c2ecf20Sopenharmony_ci}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_cistatic bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
8358c2ecf20Sopenharmony_ci			 u32 height, u32 width, const u8 *ref, u32 ref_stride,
8368c2ecf20Sopenharmony_ci			 unsigned int ref_step, u8 *dst,
8378c2ecf20Sopenharmony_ci			 unsigned int dst_stride, unsigned int dst_step,
8388c2ecf20Sopenharmony_ci			 bool uncompressed, const __be16 *end_of_rlco_buf)
8398c2ecf20Sopenharmony_ci{
8408c2ecf20Sopenharmony_ci	unsigned int copies = 0;
8418c2ecf20Sopenharmony_ci	s16 copy[8 * 8];
8428c2ecf20Sopenharmony_ci	u16 stat;
8438c2ecf20Sopenharmony_ci	unsigned int i, j;
8448c2ecf20Sopenharmony_ci	bool is_intra = !ref;
8458c2ecf20Sopenharmony_ci
8468c2ecf20Sopenharmony_ci	width = round_up(width, 8);
8478c2ecf20Sopenharmony_ci	height = round_up(height, 8);
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_ci	if (uncompressed) {
8508c2ecf20Sopenharmony_ci		int i;
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci		if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
8538c2ecf20Sopenharmony_ci			return false;
8548c2ecf20Sopenharmony_ci		for (i = 0; i < height; i++) {
8558c2ecf20Sopenharmony_ci			memcpy(dst, *rlco, width);
8568c2ecf20Sopenharmony_ci			dst += dst_stride;
8578c2ecf20Sopenharmony_ci			*rlco += width / 2;
8588c2ecf20Sopenharmony_ci		}
8598c2ecf20Sopenharmony_ci		return true;
8608c2ecf20Sopenharmony_ci	}
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci	/*
8638c2ecf20Sopenharmony_ci	 * When decoding each macroblock the rlco pointer will be increased
8648c2ecf20Sopenharmony_ci	 * by 65 * 2 bytes worst-case.
8658c2ecf20Sopenharmony_ci	 * To avoid overflow the buffer has to be 65/64th of the actual raw
8668c2ecf20Sopenharmony_ci	 * image size, just in case someone feeds it malicious data.
8678c2ecf20Sopenharmony_ci	 */
8688c2ecf20Sopenharmony_ci	for (j = 0; j < height / 8; j++) {
8698c2ecf20Sopenharmony_ci		for (i = 0; i < width / 8; i++) {
8708c2ecf20Sopenharmony_ci			const u8 *refp = ref + j * 8 * ref_stride +
8718c2ecf20Sopenharmony_ci				i * 8 * ref_step;
8728c2ecf20Sopenharmony_ci			u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci			if (copies) {
8758c2ecf20Sopenharmony_ci				memcpy(cf->de_fwht, copy, sizeof(copy));
8768c2ecf20Sopenharmony_ci				if ((stat & PFRAME_BIT) && !is_intra)
8778c2ecf20Sopenharmony_ci					add_deltas(cf->de_fwht, refp,
8788c2ecf20Sopenharmony_ci						   ref_stride, ref_step);
8798c2ecf20Sopenharmony_ci				fill_decoder_block(dstp, cf->de_fwht,
8808c2ecf20Sopenharmony_ci						   dst_stride, dst_step);
8818c2ecf20Sopenharmony_ci				copies--;
8828c2ecf20Sopenharmony_ci				continue;
8838c2ecf20Sopenharmony_ci			}
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci			stat = derlc(rlco, cf->coeffs, end_of_rlco_buf);
8868c2ecf20Sopenharmony_ci			if (stat & OVERFLOW_BIT)
8878c2ecf20Sopenharmony_ci				return false;
8888c2ecf20Sopenharmony_ci			if ((stat & PFRAME_BIT) && !is_intra)
8898c2ecf20Sopenharmony_ci				dequantize_inter(cf->coeffs);
8908c2ecf20Sopenharmony_ci			else
8918c2ecf20Sopenharmony_ci				dequantize_intra(cf->coeffs);
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci			ifwht(cf->coeffs, cf->de_fwht,
8948c2ecf20Sopenharmony_ci			      ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci			copies = (stat & DUPS_MASK) >> 1;
8978c2ecf20Sopenharmony_ci			if (copies)
8988c2ecf20Sopenharmony_ci				memcpy(copy, cf->de_fwht, sizeof(copy));
8998c2ecf20Sopenharmony_ci			if ((stat & PFRAME_BIT) && !is_intra)
9008c2ecf20Sopenharmony_ci				add_deltas(cf->de_fwht, refp,
9018c2ecf20Sopenharmony_ci					   ref_stride, ref_step);
9028c2ecf20Sopenharmony_ci			fill_decoder_block(dstp, cf->de_fwht, dst_stride,
9038c2ecf20Sopenharmony_ci					   dst_step);
9048c2ecf20Sopenharmony_ci		}
9058c2ecf20Sopenharmony_ci	}
9068c2ecf20Sopenharmony_ci	return true;
9078c2ecf20Sopenharmony_ci}
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_cibool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags,
9108c2ecf20Sopenharmony_ci		       unsigned int components_num, unsigned int width,
9118c2ecf20Sopenharmony_ci		       unsigned int height, const struct fwht_raw_frame *ref,
9128c2ecf20Sopenharmony_ci		       unsigned int ref_stride, unsigned int ref_chroma_stride,
9138c2ecf20Sopenharmony_ci		       struct fwht_raw_frame *dst, unsigned int dst_stride,
9148c2ecf20Sopenharmony_ci		       unsigned int dst_chroma_stride)
9158c2ecf20Sopenharmony_ci{
9168c2ecf20Sopenharmony_ci	const __be16 *rlco = cf->rlc_data;
9178c2ecf20Sopenharmony_ci	const __be16 *end_of_rlco_buf = cf->rlc_data +
9188c2ecf20Sopenharmony_ci			(cf->size / sizeof(*rlco)) - 1;
9198c2ecf20Sopenharmony_ci
9208c2ecf20Sopenharmony_ci	if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride,
9218c2ecf20Sopenharmony_ci			  ref->luma_alpha_step, dst->luma, dst_stride,
9228c2ecf20Sopenharmony_ci			  dst->luma_alpha_step,
9238c2ecf20Sopenharmony_ci			  hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
9248c2ecf20Sopenharmony_ci			  end_of_rlco_buf))
9258c2ecf20Sopenharmony_ci		return false;
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci	if (components_num >= 3) {
9288c2ecf20Sopenharmony_ci		u32 h = height;
9298c2ecf20Sopenharmony_ci		u32 w = width;
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci		if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT))
9328c2ecf20Sopenharmony_ci			h /= 2;
9338c2ecf20Sopenharmony_ci		if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH))
9348c2ecf20Sopenharmony_ci			w /= 2;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci		if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride,
9378c2ecf20Sopenharmony_ci				  ref->chroma_step, dst->cb, dst_chroma_stride,
9388c2ecf20Sopenharmony_ci				  dst->chroma_step,
9398c2ecf20Sopenharmony_ci				  hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
9408c2ecf20Sopenharmony_ci				  end_of_rlco_buf))
9418c2ecf20Sopenharmony_ci			return false;
9428c2ecf20Sopenharmony_ci		if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride,
9438c2ecf20Sopenharmony_ci				  ref->chroma_step, dst->cr, dst_chroma_stride,
9448c2ecf20Sopenharmony_ci				  dst->chroma_step,
9458c2ecf20Sopenharmony_ci				  hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
9468c2ecf20Sopenharmony_ci				  end_of_rlco_buf))
9478c2ecf20Sopenharmony_ci			return false;
9488c2ecf20Sopenharmony_ci	}
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci	if (components_num == 4)
9518c2ecf20Sopenharmony_ci		if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride,
9528c2ecf20Sopenharmony_ci				  ref->luma_alpha_step, dst->alpha, dst_stride,
9538c2ecf20Sopenharmony_ci				  dst->luma_alpha_step,
9548c2ecf20Sopenharmony_ci				  hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
9558c2ecf20Sopenharmony_ci				  end_of_rlco_buf))
9568c2ecf20Sopenharmony_ci			return false;
9578c2ecf20Sopenharmony_ci	return true;
9588c2ecf20Sopenharmony_ci}
959