162306a36Sopenharmony_ci// SPDX-License-Identifier: LGPL-2.1+ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2016 Tom aan de Wiel 462306a36Sopenharmony_ci * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper: 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms, 962306a36Sopenharmony_ci * R.D. Brown, 1977 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci#include <linux/kernel.h> 1462306a36Sopenharmony_ci#include <linux/videodev2.h> 1562306a36Sopenharmony_ci#include "codec-fwht.h" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define OVERFLOW_BIT BIT(14) 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* 2062306a36Sopenharmony_ci * Note: bit 0 of the header must always be 0. Otherwise it cannot 2162306a36Sopenharmony_ci * be guaranteed that the magic 8 byte sequence (see below) can 2262306a36Sopenharmony_ci * never occur in the rlc output. 2362306a36Sopenharmony_ci */ 2462306a36Sopenharmony_ci#define PFRAME_BIT BIT(15) 2562306a36Sopenharmony_ci#define DUPS_MASK 0x1ffe 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define PBLOCK 0 2862306a36Sopenharmony_ci#define IBLOCK 1 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#define ALL_ZEROS 15 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistatic const uint8_t zigzag[64] = { 3362306a36Sopenharmony_ci 0, 3462306a36Sopenharmony_ci 1, 8, 3562306a36Sopenharmony_ci 2, 9, 16, 3662306a36Sopenharmony_ci 3, 10, 17, 24, 3762306a36Sopenharmony_ci 4, 11, 18, 25, 32, 3862306a36Sopenharmony_ci 5, 12, 19, 26, 33, 40, 3962306a36Sopenharmony_ci 6, 13, 20, 27, 34, 41, 48, 4062306a36Sopenharmony_ci 7, 14, 21, 28, 35, 42, 49, 56, 4162306a36Sopenharmony_ci 15, 22, 29, 36, 43, 50, 57, 4262306a36Sopenharmony_ci 23, 30, 37, 44, 51, 58, 4362306a36Sopenharmony_ci 31, 38, 45, 52, 59, 4462306a36Sopenharmony_ci 39, 46, 53, 60, 4562306a36Sopenharmony_ci 47, 54, 61, 4662306a36Sopenharmony_ci 55, 62, 4762306a36Sopenharmony_ci 63, 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * noinline_for_stack to work around 5262306a36Sopenharmony_ci * https://bugs.llvm.org/show_bug.cgi?id=38809 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_cistatic int noinline_for_stack 5562306a36Sopenharmony_cirlc(const s16 *in, __be16 *output, int blocktype) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci s16 block[8 * 8]; 5862306a36Sopenharmony_ci s16 *wp = block; 5962306a36Sopenharmony_ci int i = 0; 6062306a36Sopenharmony_ci int x, y; 6162306a36Sopenharmony_ci int ret = 0; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci /* read in block from framebuffer */ 6462306a36Sopenharmony_ci int lastzero_run = 0; 6562306a36Sopenharmony_ci int to_encode; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci for (y = 0; y < 8; y++) { 6862306a36Sopenharmony_ci for (x = 0; x < 8; x++) { 6962306a36Sopenharmony_ci *wp = in[x + y * 8]; 7062306a36Sopenharmony_ci wp++; 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci } 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci /* keep track of amount of trailing zeros */ 7562306a36Sopenharmony_ci for (i = 63; i >= 0 && !block[zigzag[i]]; i--) 7662306a36Sopenharmony_ci lastzero_run++; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0); 7962306a36Sopenharmony_ci ret++; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci i = 0; 8462306a36Sopenharmony_ci while (i < to_encode) { 8562306a36Sopenharmony_ci int cnt = 0; 8662306a36Sopenharmony_ci int tmp; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* count leading zeros */ 8962306a36Sopenharmony_ci while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) { 9062306a36Sopenharmony_ci cnt++; 9162306a36Sopenharmony_ci i++; 9262306a36Sopenharmony_ci if (i == to_encode) { 9362306a36Sopenharmony_ci cnt--; 9462306a36Sopenharmony_ci break; 9562306a36Sopenharmony_ci } 9662306a36Sopenharmony_ci } 9762306a36Sopenharmony_ci /* 4 bits for run, 12 for coefficient (quantization by 4) */ 9862306a36Sopenharmony_ci *output++ = htons((cnt | tmp << 4)); 9962306a36Sopenharmony_ci i++; 10062306a36Sopenharmony_ci ret++; 10162306a36Sopenharmony_ci } 10262306a36Sopenharmony_ci if (lastzero_run > 14) { 10362306a36Sopenharmony_ci *output = htons(ALL_ZEROS | 0); 10462306a36Sopenharmony_ci ret++; 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci return ret; 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci/* 11162306a36Sopenharmony_ci * This function will worst-case increase rlc_in by 65*2 bytes: 11262306a36Sopenharmony_ci * one s16 value for the header and 8 * 8 coefficients of type s16. 11362306a36Sopenharmony_ci */ 11462306a36Sopenharmony_cistatic noinline_for_stack u16 11562306a36Sopenharmony_ciderlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci /* header */ 11862306a36Sopenharmony_ci const __be16 *input = *rlc_in; 11962306a36Sopenharmony_ci u16 stat; 12062306a36Sopenharmony_ci int dec_count = 0; 12162306a36Sopenharmony_ci s16 block[8 * 8 + 16]; 12262306a36Sopenharmony_ci s16 *wp = block; 12362306a36Sopenharmony_ci int i; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (input > end_of_input) 12662306a36Sopenharmony_ci return OVERFLOW_BIT; 12762306a36Sopenharmony_ci stat = ntohs(*input++); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* 13062306a36Sopenharmony_ci * Now de-compress, it expands one byte to up to 15 bytes 13162306a36Sopenharmony_ci * (or fills the remainder of the 64 bytes with zeroes if it 13262306a36Sopenharmony_ci * is the last byte to expand). 13362306a36Sopenharmony_ci * 13462306a36Sopenharmony_ci * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to 13562306a36Sopenharmony_ci * allow for overflow if the incoming data was malformed. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_ci while (dec_count < 8 * 8) { 13862306a36Sopenharmony_ci s16 in; 13962306a36Sopenharmony_ci int length; 14062306a36Sopenharmony_ci int coeff; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci if (input > end_of_input) 14362306a36Sopenharmony_ci return OVERFLOW_BIT; 14462306a36Sopenharmony_ci in = ntohs(*input++); 14562306a36Sopenharmony_ci length = in & 0xf; 14662306a36Sopenharmony_ci coeff = in >> 4; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci /* fill remainder with zeros */ 14962306a36Sopenharmony_ci if (length == 15) { 15062306a36Sopenharmony_ci for (i = 0; i < 64 - dec_count; i++) 15162306a36Sopenharmony_ci *wp++ = 0; 15262306a36Sopenharmony_ci break; 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci for (i = 0; i < length; i++) 15662306a36Sopenharmony_ci *wp++ = 0; 15762306a36Sopenharmony_ci *wp++ = coeff; 15862306a36Sopenharmony_ci dec_count += length + 1; 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci wp = block; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci for (i = 0; i < 64; i++) { 16462306a36Sopenharmony_ci int pos = zigzag[i]; 16562306a36Sopenharmony_ci int y = pos / 8; 16662306a36Sopenharmony_ci int x = pos % 8; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci dwht_out[x + y * 8] = *wp++; 16962306a36Sopenharmony_ci } 17062306a36Sopenharmony_ci *rlc_in = input; 17162306a36Sopenharmony_ci return stat; 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_cistatic const int quant_table[] = { 17562306a36Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 2, 17662306a36Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 2, 17762306a36Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 3, 17862306a36Sopenharmony_ci 2, 2, 2, 2, 2, 2, 3, 6, 17962306a36Sopenharmony_ci 2, 2, 2, 2, 2, 3, 6, 6, 18062306a36Sopenharmony_ci 2, 2, 2, 2, 3, 6, 6, 6, 18162306a36Sopenharmony_ci 2, 2, 2, 3, 6, 6, 6, 6, 18262306a36Sopenharmony_ci 2, 2, 3, 6, 6, 6, 6, 8, 18362306a36Sopenharmony_ci}; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistatic const int quant_table_p[] = { 18662306a36Sopenharmony_ci 3, 3, 3, 3, 3, 3, 3, 3, 18762306a36Sopenharmony_ci 3, 3, 3, 3, 3, 3, 3, 3, 18862306a36Sopenharmony_ci 3, 3, 3, 3, 3, 3, 3, 3, 18962306a36Sopenharmony_ci 3, 3, 3, 3, 3, 3, 3, 6, 19062306a36Sopenharmony_ci 3, 3, 3, 3, 3, 3, 6, 6, 19162306a36Sopenharmony_ci 3, 3, 3, 3, 3, 6, 6, 9, 19262306a36Sopenharmony_ci 3, 3, 3, 3, 6, 6, 9, 9, 19362306a36Sopenharmony_ci 3, 3, 3, 6, 6, 9, 9, 10, 19462306a36Sopenharmony_ci}; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cistatic void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci const int *quant = quant_table; 19962306a36Sopenharmony_ci int i, j; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci for (j = 0; j < 8; j++) { 20262306a36Sopenharmony_ci for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { 20362306a36Sopenharmony_ci *coeff >>= *quant; 20462306a36Sopenharmony_ci if (*coeff >= -qp && *coeff <= qp) 20562306a36Sopenharmony_ci *coeff = *de_coeff = 0; 20662306a36Sopenharmony_ci else 20762306a36Sopenharmony_ci *de_coeff = *coeff << *quant; 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci} 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_cistatic void dequantize_intra(s16 *coeff) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci const int *quant = quant_table; 21562306a36Sopenharmony_ci int i, j; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci for (j = 0; j < 8; j++) 21862306a36Sopenharmony_ci for (i = 0; i < 8; i++, quant++, coeff++) 21962306a36Sopenharmony_ci *coeff <<= *quant; 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cistatic void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci const int *quant = quant_table_p; 22562306a36Sopenharmony_ci int i, j; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci for (j = 0; j < 8; j++) { 22862306a36Sopenharmony_ci for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { 22962306a36Sopenharmony_ci *coeff >>= *quant; 23062306a36Sopenharmony_ci if (*coeff >= -qp && *coeff <= qp) 23162306a36Sopenharmony_ci *coeff = *de_coeff = 0; 23262306a36Sopenharmony_ci else 23362306a36Sopenharmony_ci *de_coeff = *coeff << *quant; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_cistatic void dequantize_inter(s16 *coeff) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci const int *quant = quant_table_p; 24162306a36Sopenharmony_ci int i, j; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci for (j = 0; j < 8; j++) 24462306a36Sopenharmony_ci for (i = 0; i < 8; i++, quant++, coeff++) 24562306a36Sopenharmony_ci *coeff <<= *quant; 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cistatic void noinline_for_stack fwht(const u8 *block, s16 *output_block, 24962306a36Sopenharmony_ci unsigned int stride, 25062306a36Sopenharmony_ci unsigned int input_step, bool intra) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci /* we'll need more than 8 bits for the transformed coefficients */ 25362306a36Sopenharmony_ci s32 workspace1[8], workspace2[8]; 25462306a36Sopenharmony_ci const u8 *tmp = block; 25562306a36Sopenharmony_ci s16 *out = output_block; 25662306a36Sopenharmony_ci int add = intra ? 256 : 0; 25762306a36Sopenharmony_ci unsigned int i; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci /* stage 1 */ 26062306a36Sopenharmony_ci for (i = 0; i < 8; i++, tmp += stride, out += 8) { 26162306a36Sopenharmony_ci switch (input_step) { 26262306a36Sopenharmony_ci case 1: 26362306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[1] - add; 26462306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[1]; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci workspace1[2] = tmp[2] + tmp[3] - add; 26762306a36Sopenharmony_ci workspace1[3] = tmp[2] - tmp[3]; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci workspace1[4] = tmp[4] + tmp[5] - add; 27062306a36Sopenharmony_ci workspace1[5] = tmp[4] - tmp[5]; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci workspace1[6] = tmp[6] + tmp[7] - add; 27362306a36Sopenharmony_ci workspace1[7] = tmp[6] - tmp[7]; 27462306a36Sopenharmony_ci break; 27562306a36Sopenharmony_ci case 2: 27662306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[2] - add; 27762306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[2]; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci workspace1[2] = tmp[4] + tmp[6] - add; 28062306a36Sopenharmony_ci workspace1[3] = tmp[4] - tmp[6]; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci workspace1[4] = tmp[8] + tmp[10] - add; 28362306a36Sopenharmony_ci workspace1[5] = tmp[8] - tmp[10]; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci workspace1[6] = tmp[12] + tmp[14] - add; 28662306a36Sopenharmony_ci workspace1[7] = tmp[12] - tmp[14]; 28762306a36Sopenharmony_ci break; 28862306a36Sopenharmony_ci case 3: 28962306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[3] - add; 29062306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[3]; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci workspace1[2] = tmp[6] + tmp[9] - add; 29362306a36Sopenharmony_ci workspace1[3] = tmp[6] - tmp[9]; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci workspace1[4] = tmp[12] + tmp[15] - add; 29662306a36Sopenharmony_ci workspace1[5] = tmp[12] - tmp[15]; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci workspace1[6] = tmp[18] + tmp[21] - add; 29962306a36Sopenharmony_ci workspace1[7] = tmp[18] - tmp[21]; 30062306a36Sopenharmony_ci break; 30162306a36Sopenharmony_ci default: 30262306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[4] - add; 30362306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[4]; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci workspace1[2] = tmp[8] + tmp[12] - add; 30662306a36Sopenharmony_ci workspace1[3] = tmp[8] - tmp[12]; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci workspace1[4] = tmp[16] + tmp[20] - add; 30962306a36Sopenharmony_ci workspace1[5] = tmp[16] - tmp[20]; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci workspace1[6] = tmp[24] + tmp[28] - add; 31262306a36Sopenharmony_ci workspace1[7] = tmp[24] - tmp[28]; 31362306a36Sopenharmony_ci break; 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* stage 2 */ 31762306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 31862306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 31962306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 32062306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 32362306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 32462306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 32562306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* stage 3 */ 32862306a36Sopenharmony_ci out[0] = workspace2[0] + workspace2[4]; 32962306a36Sopenharmony_ci out[1] = workspace2[0] - workspace2[4]; 33062306a36Sopenharmony_ci out[2] = workspace2[1] - workspace2[5]; 33162306a36Sopenharmony_ci out[3] = workspace2[1] + workspace2[5]; 33262306a36Sopenharmony_ci out[4] = workspace2[2] + workspace2[6]; 33362306a36Sopenharmony_ci out[5] = workspace2[2] - workspace2[6]; 33462306a36Sopenharmony_ci out[6] = workspace2[3] - workspace2[7]; 33562306a36Sopenharmony_ci out[7] = workspace2[3] + workspace2[7]; 33662306a36Sopenharmony_ci } 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci out = output_block; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci for (i = 0; i < 8; i++, out++) { 34162306a36Sopenharmony_ci /* stage 1 */ 34262306a36Sopenharmony_ci workspace1[0] = out[0] + out[1 * 8]; 34362306a36Sopenharmony_ci workspace1[1] = out[0] - out[1 * 8]; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci workspace1[2] = out[2 * 8] + out[3 * 8]; 34662306a36Sopenharmony_ci workspace1[3] = out[2 * 8] - out[3 * 8]; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci workspace1[4] = out[4 * 8] + out[5 * 8]; 34962306a36Sopenharmony_ci workspace1[5] = out[4 * 8] - out[5 * 8]; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci workspace1[6] = out[6 * 8] + out[7 * 8]; 35262306a36Sopenharmony_ci workspace1[7] = out[6 * 8] - out[7 * 8]; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* stage 2 */ 35562306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 35662306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 35762306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 35862306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 36162306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 36262306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 36362306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 36462306a36Sopenharmony_ci /* stage 3 */ 36562306a36Sopenharmony_ci out[0 * 8] = workspace2[0] + workspace2[4]; 36662306a36Sopenharmony_ci out[1 * 8] = workspace2[0] - workspace2[4]; 36762306a36Sopenharmony_ci out[2 * 8] = workspace2[1] - workspace2[5]; 36862306a36Sopenharmony_ci out[3 * 8] = workspace2[1] + workspace2[5]; 36962306a36Sopenharmony_ci out[4 * 8] = workspace2[2] + workspace2[6]; 37062306a36Sopenharmony_ci out[5 * 8] = workspace2[2] - workspace2[6]; 37162306a36Sopenharmony_ci out[6 * 8] = workspace2[3] - workspace2[7]; 37262306a36Sopenharmony_ci out[7 * 8] = workspace2[3] + workspace2[7]; 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci/* 37762306a36Sopenharmony_ci * Not the nicest way of doing it, but P-blocks get twice the range of 37862306a36Sopenharmony_ci * that of the I-blocks. Therefore we need a type bigger than 8 bits. 37962306a36Sopenharmony_ci * Furthermore values can be negative... This is just a version that 38062306a36Sopenharmony_ci * works with 16 signed data 38162306a36Sopenharmony_ci */ 38262306a36Sopenharmony_cistatic void noinline_for_stack 38362306a36Sopenharmony_cifwht16(const s16 *block, s16 *output_block, int stride, int intra) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci /* we'll need more than 8 bits for the transformed coefficients */ 38662306a36Sopenharmony_ci s32 workspace1[8], workspace2[8]; 38762306a36Sopenharmony_ci const s16 *tmp = block; 38862306a36Sopenharmony_ci s16 *out = output_block; 38962306a36Sopenharmony_ci int i; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci for (i = 0; i < 8; i++, tmp += stride, out += 8) { 39262306a36Sopenharmony_ci /* stage 1 */ 39362306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[1]; 39462306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[1]; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci workspace1[2] = tmp[2] + tmp[3]; 39762306a36Sopenharmony_ci workspace1[3] = tmp[2] - tmp[3]; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci workspace1[4] = tmp[4] + tmp[5]; 40062306a36Sopenharmony_ci workspace1[5] = tmp[4] - tmp[5]; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci workspace1[6] = tmp[6] + tmp[7]; 40362306a36Sopenharmony_ci workspace1[7] = tmp[6] - tmp[7]; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci /* stage 2 */ 40662306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 40762306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 40862306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 40962306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 41262306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 41362306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 41462306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci /* stage 3 */ 41762306a36Sopenharmony_ci out[0] = workspace2[0] + workspace2[4]; 41862306a36Sopenharmony_ci out[1] = workspace2[0] - workspace2[4]; 41962306a36Sopenharmony_ci out[2] = workspace2[1] - workspace2[5]; 42062306a36Sopenharmony_ci out[3] = workspace2[1] + workspace2[5]; 42162306a36Sopenharmony_ci out[4] = workspace2[2] + workspace2[6]; 42262306a36Sopenharmony_ci out[5] = workspace2[2] - workspace2[6]; 42362306a36Sopenharmony_ci out[6] = workspace2[3] - workspace2[7]; 42462306a36Sopenharmony_ci out[7] = workspace2[3] + workspace2[7]; 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci out = output_block; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci for (i = 0; i < 8; i++, out++) { 43062306a36Sopenharmony_ci /* stage 1 */ 43162306a36Sopenharmony_ci workspace1[0] = out[0] + out[1*8]; 43262306a36Sopenharmony_ci workspace1[1] = out[0] - out[1*8]; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci workspace1[2] = out[2*8] + out[3*8]; 43562306a36Sopenharmony_ci workspace1[3] = out[2*8] - out[3*8]; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci workspace1[4] = out[4*8] + out[5*8]; 43862306a36Sopenharmony_ci workspace1[5] = out[4*8] - out[5*8]; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci workspace1[6] = out[6*8] + out[7*8]; 44162306a36Sopenharmony_ci workspace1[7] = out[6*8] - out[7*8]; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci /* stage 2 */ 44462306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 44562306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 44662306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 44762306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 45062306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 45162306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 45262306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci /* stage 3 */ 45562306a36Sopenharmony_ci out[0*8] = workspace2[0] + workspace2[4]; 45662306a36Sopenharmony_ci out[1*8] = workspace2[0] - workspace2[4]; 45762306a36Sopenharmony_ci out[2*8] = workspace2[1] - workspace2[5]; 45862306a36Sopenharmony_ci out[3*8] = workspace2[1] + workspace2[5]; 45962306a36Sopenharmony_ci out[4*8] = workspace2[2] + workspace2[6]; 46062306a36Sopenharmony_ci out[5*8] = workspace2[2] - workspace2[6]; 46162306a36Sopenharmony_ci out[6*8] = workspace2[3] - workspace2[7]; 46262306a36Sopenharmony_ci out[7*8] = workspace2[3] + workspace2[7]; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_cistatic noinline_for_stack void 46762306a36Sopenharmony_ciifwht(const s16 *block, s16 *output_block, int intra) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci /* 47062306a36Sopenharmony_ci * we'll need more than 8 bits for the transformed coefficients 47162306a36Sopenharmony_ci * use native unit of cpu 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci int workspace1[8], workspace2[8]; 47462306a36Sopenharmony_ci int inter = intra ? 0 : 1; 47562306a36Sopenharmony_ci const s16 *tmp = block; 47662306a36Sopenharmony_ci s16 *out = output_block; 47762306a36Sopenharmony_ci int i; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci for (i = 0; i < 8; i++, tmp += 8, out += 8) { 48062306a36Sopenharmony_ci /* stage 1 */ 48162306a36Sopenharmony_ci workspace1[0] = tmp[0] + tmp[1]; 48262306a36Sopenharmony_ci workspace1[1] = tmp[0] - tmp[1]; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci workspace1[2] = tmp[2] + tmp[3]; 48562306a36Sopenharmony_ci workspace1[3] = tmp[2] - tmp[3]; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci workspace1[4] = tmp[4] + tmp[5]; 48862306a36Sopenharmony_ci workspace1[5] = tmp[4] - tmp[5]; 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci workspace1[6] = tmp[6] + tmp[7]; 49162306a36Sopenharmony_ci workspace1[7] = tmp[6] - tmp[7]; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* stage 2 */ 49462306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 49562306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 49662306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 49762306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 50062306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 50162306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 50262306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* stage 3 */ 50562306a36Sopenharmony_ci out[0] = workspace2[0] + workspace2[4]; 50662306a36Sopenharmony_ci out[1] = workspace2[0] - workspace2[4]; 50762306a36Sopenharmony_ci out[2] = workspace2[1] - workspace2[5]; 50862306a36Sopenharmony_ci out[3] = workspace2[1] + workspace2[5]; 50962306a36Sopenharmony_ci out[4] = workspace2[2] + workspace2[6]; 51062306a36Sopenharmony_ci out[5] = workspace2[2] - workspace2[6]; 51162306a36Sopenharmony_ci out[6] = workspace2[3] - workspace2[7]; 51262306a36Sopenharmony_ci out[7] = workspace2[3] + workspace2[7]; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci out = output_block; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci for (i = 0; i < 8; i++, out++) { 51862306a36Sopenharmony_ci /* stage 1 */ 51962306a36Sopenharmony_ci workspace1[0] = out[0] + out[1 * 8]; 52062306a36Sopenharmony_ci workspace1[1] = out[0] - out[1 * 8]; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci workspace1[2] = out[2 * 8] + out[3 * 8]; 52362306a36Sopenharmony_ci workspace1[3] = out[2 * 8] - out[3 * 8]; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci workspace1[4] = out[4 * 8] + out[5 * 8]; 52662306a36Sopenharmony_ci workspace1[5] = out[4 * 8] - out[5 * 8]; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci workspace1[6] = out[6 * 8] + out[7 * 8]; 52962306a36Sopenharmony_ci workspace1[7] = out[6 * 8] - out[7 * 8]; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci /* stage 2 */ 53262306a36Sopenharmony_ci workspace2[0] = workspace1[0] + workspace1[2]; 53362306a36Sopenharmony_ci workspace2[1] = workspace1[0] - workspace1[2]; 53462306a36Sopenharmony_ci workspace2[2] = workspace1[1] - workspace1[3]; 53562306a36Sopenharmony_ci workspace2[3] = workspace1[1] + workspace1[3]; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci workspace2[4] = workspace1[4] + workspace1[6]; 53862306a36Sopenharmony_ci workspace2[5] = workspace1[4] - workspace1[6]; 53962306a36Sopenharmony_ci workspace2[6] = workspace1[5] - workspace1[7]; 54062306a36Sopenharmony_ci workspace2[7] = workspace1[5] + workspace1[7]; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci /* stage 3 */ 54362306a36Sopenharmony_ci if (inter) { 54462306a36Sopenharmony_ci int d; 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci out[0 * 8] = workspace2[0] + workspace2[4]; 54762306a36Sopenharmony_ci out[1 * 8] = workspace2[0] - workspace2[4]; 54862306a36Sopenharmony_ci out[2 * 8] = workspace2[1] - workspace2[5]; 54962306a36Sopenharmony_ci out[3 * 8] = workspace2[1] + workspace2[5]; 55062306a36Sopenharmony_ci out[4 * 8] = workspace2[2] + workspace2[6]; 55162306a36Sopenharmony_ci out[5 * 8] = workspace2[2] - workspace2[6]; 55262306a36Sopenharmony_ci out[6 * 8] = workspace2[3] - workspace2[7]; 55362306a36Sopenharmony_ci out[7 * 8] = workspace2[3] + workspace2[7]; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci for (d = 0; d < 8; d++) 55662306a36Sopenharmony_ci out[8 * d] >>= 6; 55762306a36Sopenharmony_ci } else { 55862306a36Sopenharmony_ci int d; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci out[0 * 8] = workspace2[0] + workspace2[4]; 56162306a36Sopenharmony_ci out[1 * 8] = workspace2[0] - workspace2[4]; 56262306a36Sopenharmony_ci out[2 * 8] = workspace2[1] - workspace2[5]; 56362306a36Sopenharmony_ci out[3 * 8] = workspace2[1] + workspace2[5]; 56462306a36Sopenharmony_ci out[4 * 8] = workspace2[2] + workspace2[6]; 56562306a36Sopenharmony_ci out[5 * 8] = workspace2[2] - workspace2[6]; 56662306a36Sopenharmony_ci out[6 * 8] = workspace2[3] - workspace2[7]; 56762306a36Sopenharmony_ci out[7 * 8] = workspace2[3] + workspace2[7]; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci for (d = 0; d < 8; d++) { 57062306a36Sopenharmony_ci out[8 * d] >>= 6; 57162306a36Sopenharmony_ci out[8 * d] += 128; 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci} 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_cistatic void fill_encoder_block(const u8 *input, s16 *dst, 57862306a36Sopenharmony_ci unsigned int stride, unsigned int input_step) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci int i, j; 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci for (i = 0; i < 8; i++) { 58362306a36Sopenharmony_ci for (j = 0; j < 8; j++, input += input_step) 58462306a36Sopenharmony_ci *dst++ = *input; 58562306a36Sopenharmony_ci input += stride - 8 * input_step; 58662306a36Sopenharmony_ci } 58762306a36Sopenharmony_ci} 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_cistatic int var_intra(const s16 *input) 59062306a36Sopenharmony_ci{ 59162306a36Sopenharmony_ci int32_t mean = 0; 59262306a36Sopenharmony_ci int32_t ret = 0; 59362306a36Sopenharmony_ci const s16 *tmp = input; 59462306a36Sopenharmony_ci int i; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci for (i = 0; i < 8 * 8; i++, tmp++) 59762306a36Sopenharmony_ci mean += *tmp; 59862306a36Sopenharmony_ci mean /= 64; 59962306a36Sopenharmony_ci tmp = input; 60062306a36Sopenharmony_ci for (i = 0; i < 8 * 8; i++, tmp++) 60162306a36Sopenharmony_ci ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean); 60262306a36Sopenharmony_ci return ret; 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_cistatic int var_inter(const s16 *old, const s16 *new) 60662306a36Sopenharmony_ci{ 60762306a36Sopenharmony_ci int32_t ret = 0; 60862306a36Sopenharmony_ci int i; 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci for (i = 0; i < 8 * 8; i++, old++, new++) 61162306a36Sopenharmony_ci ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new); 61262306a36Sopenharmony_ci return ret; 61362306a36Sopenharmony_ci} 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_cistatic noinline_for_stack int 61662306a36Sopenharmony_cidecide_blocktype(const u8 *cur, const u8 *reference, s16 *deltablock, 61762306a36Sopenharmony_ci unsigned int stride, unsigned int input_step) 61862306a36Sopenharmony_ci{ 61962306a36Sopenharmony_ci s16 tmp[64]; 62062306a36Sopenharmony_ci s16 old[64]; 62162306a36Sopenharmony_ci s16 *work = tmp; 62262306a36Sopenharmony_ci unsigned int k, l; 62362306a36Sopenharmony_ci int vari; 62462306a36Sopenharmony_ci int vard; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci fill_encoder_block(cur, tmp, stride, input_step); 62762306a36Sopenharmony_ci fill_encoder_block(reference, old, 8, 1); 62862306a36Sopenharmony_ci vari = var_intra(tmp); 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci for (k = 0; k < 8; k++) { 63162306a36Sopenharmony_ci for (l = 0; l < 8; l++) { 63262306a36Sopenharmony_ci *deltablock = *work - *reference; 63362306a36Sopenharmony_ci deltablock++; 63462306a36Sopenharmony_ci work++; 63562306a36Sopenharmony_ci reference++; 63662306a36Sopenharmony_ci } 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci deltablock -= 64; 63962306a36Sopenharmony_ci vard = var_inter(old, tmp); 64062306a36Sopenharmony_ci return vari <= vard ? IBLOCK : PBLOCK; 64162306a36Sopenharmony_ci} 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_cistatic void fill_decoder_block(u8 *dst, const s16 *input, int stride, 64462306a36Sopenharmony_ci unsigned int dst_step) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci int i, j; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci for (i = 0; i < 8; i++) { 64962306a36Sopenharmony_ci for (j = 0; j < 8; j++, input++, dst += dst_step) { 65062306a36Sopenharmony_ci if (*input < 0) 65162306a36Sopenharmony_ci *dst = 0; 65262306a36Sopenharmony_ci else if (*input > 255) 65362306a36Sopenharmony_ci *dst = 255; 65462306a36Sopenharmony_ci else 65562306a36Sopenharmony_ci *dst = *input; 65662306a36Sopenharmony_ci } 65762306a36Sopenharmony_ci dst += stride - (8 * dst_step); 65862306a36Sopenharmony_ci } 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_cistatic void add_deltas(s16 *deltas, const u8 *ref, int stride, 66262306a36Sopenharmony_ci unsigned int ref_step) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci int k, l; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci for (k = 0; k < 8; k++) { 66762306a36Sopenharmony_ci for (l = 0; l < 8; l++) { 66862306a36Sopenharmony_ci *deltas += *ref; 66962306a36Sopenharmony_ci ref += ref_step; 67062306a36Sopenharmony_ci /* 67162306a36Sopenharmony_ci * Due to quantizing, it might possible that the 67262306a36Sopenharmony_ci * decoded coefficients are slightly out of range 67362306a36Sopenharmony_ci */ 67462306a36Sopenharmony_ci if (*deltas < 0) 67562306a36Sopenharmony_ci *deltas = 0; 67662306a36Sopenharmony_ci else if (*deltas > 255) 67762306a36Sopenharmony_ci *deltas = 255; 67862306a36Sopenharmony_ci deltas++; 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci ref += stride - (8 * ref_step); 68162306a36Sopenharmony_ci } 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max, 68562306a36Sopenharmony_ci struct fwht_cframe *cf, u32 height, u32 width, 68662306a36Sopenharmony_ci u32 stride, unsigned int input_step, 68762306a36Sopenharmony_ci bool is_intra, bool next_is_intra) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci u8 *input_start = input; 69062306a36Sopenharmony_ci __be16 *rlco_start = *rlco; 69162306a36Sopenharmony_ci s16 deltablock[64]; 69262306a36Sopenharmony_ci __be16 pframe_bit = htons(PFRAME_BIT); 69362306a36Sopenharmony_ci u32 encoding = 0; 69462306a36Sopenharmony_ci unsigned int last_size = 0; 69562306a36Sopenharmony_ci unsigned int i, j; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci width = round_up(width, 8); 69862306a36Sopenharmony_ci height = round_up(height, 8); 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci for (j = 0; j < height / 8; j++) { 70162306a36Sopenharmony_ci input = input_start + j * 8 * stride; 70262306a36Sopenharmony_ci for (i = 0; i < width / 8; i++) { 70362306a36Sopenharmony_ci /* intra code, first frame is always intra coded. */ 70462306a36Sopenharmony_ci int blocktype = IBLOCK; 70562306a36Sopenharmony_ci unsigned int size; 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci if (!is_intra) 70862306a36Sopenharmony_ci blocktype = decide_blocktype(input, refp, 70962306a36Sopenharmony_ci deltablock, stride, input_step); 71062306a36Sopenharmony_ci if (blocktype == IBLOCK) { 71162306a36Sopenharmony_ci fwht(input, cf->coeffs, stride, input_step, 1); 71262306a36Sopenharmony_ci quantize_intra(cf->coeffs, cf->de_coeffs, 71362306a36Sopenharmony_ci cf->i_frame_qp); 71462306a36Sopenharmony_ci } else { 71562306a36Sopenharmony_ci /* inter code */ 71662306a36Sopenharmony_ci encoding |= FWHT_FRAME_PCODED; 71762306a36Sopenharmony_ci fwht16(deltablock, cf->coeffs, 8, 0); 71862306a36Sopenharmony_ci quantize_inter(cf->coeffs, cf->de_coeffs, 71962306a36Sopenharmony_ci cf->p_frame_qp); 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci if (!next_is_intra) { 72262306a36Sopenharmony_ci ifwht(cf->de_coeffs, cf->de_fwht, blocktype); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci if (blocktype == PBLOCK) 72562306a36Sopenharmony_ci add_deltas(cf->de_fwht, refp, 8, 1); 72662306a36Sopenharmony_ci fill_decoder_block(refp, cf->de_fwht, 8, 1); 72762306a36Sopenharmony_ci } 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci input += 8 * input_step; 73062306a36Sopenharmony_ci refp += 8 * 8; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci size = rlc(cf->coeffs, *rlco, blocktype); 73362306a36Sopenharmony_ci if (last_size == size && 73462306a36Sopenharmony_ci !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) { 73562306a36Sopenharmony_ci __be16 *last_rlco = *rlco - size; 73662306a36Sopenharmony_ci s16 hdr = ntohs(*last_rlco); 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci if (!((*last_rlco ^ **rlco) & pframe_bit) && 73962306a36Sopenharmony_ci (hdr & DUPS_MASK) < DUPS_MASK) 74062306a36Sopenharmony_ci *last_rlco = htons(hdr + 2); 74162306a36Sopenharmony_ci else 74262306a36Sopenharmony_ci *rlco += size; 74362306a36Sopenharmony_ci } else { 74462306a36Sopenharmony_ci *rlco += size; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci if (*rlco >= rlco_max) { 74762306a36Sopenharmony_ci encoding |= FWHT_FRAME_UNENCODED; 74862306a36Sopenharmony_ci goto exit_loop; 74962306a36Sopenharmony_ci } 75062306a36Sopenharmony_ci last_size = size; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ciexit_loop: 75562306a36Sopenharmony_ci if (encoding & FWHT_FRAME_UNENCODED) { 75662306a36Sopenharmony_ci u8 *out = (u8 *)rlco_start; 75762306a36Sopenharmony_ci u8 *p; 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci input = input_start; 76062306a36Sopenharmony_ci /* 76162306a36Sopenharmony_ci * The compressed stream should never contain the magic 76262306a36Sopenharmony_ci * header, so when we copy the YUV data we replace 0xff 76362306a36Sopenharmony_ci * by 0xfe. Since YUV is limited range such values 76462306a36Sopenharmony_ci * shouldn't appear anyway. 76562306a36Sopenharmony_ci */ 76662306a36Sopenharmony_ci for (j = 0; j < height; j++) { 76762306a36Sopenharmony_ci for (i = 0, p = input; i < width; i++, p += input_step) 76862306a36Sopenharmony_ci *out++ = (*p == 0xff) ? 0xfe : *p; 76962306a36Sopenharmony_ci input += stride; 77062306a36Sopenharmony_ci } 77162306a36Sopenharmony_ci *rlco = (__be16 *)out; 77262306a36Sopenharmony_ci encoding &= ~FWHT_FRAME_PCODED; 77362306a36Sopenharmony_ci } 77462306a36Sopenharmony_ci return encoding; 77562306a36Sopenharmony_ci} 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ciu32 fwht_encode_frame(struct fwht_raw_frame *frm, 77862306a36Sopenharmony_ci struct fwht_raw_frame *ref_frm, 77962306a36Sopenharmony_ci struct fwht_cframe *cf, 78062306a36Sopenharmony_ci bool is_intra, bool next_is_intra, 78162306a36Sopenharmony_ci unsigned int width, unsigned int height, 78262306a36Sopenharmony_ci unsigned int stride, unsigned int chroma_stride) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci unsigned int size = height * width; 78562306a36Sopenharmony_ci __be16 *rlco = cf->rlc_data; 78662306a36Sopenharmony_ci __be16 *rlco_max; 78762306a36Sopenharmony_ci u32 encoding; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci rlco_max = rlco + size / 2 - 256; 79062306a36Sopenharmony_ci encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf, 79162306a36Sopenharmony_ci height, width, stride, 79262306a36Sopenharmony_ci frm->luma_alpha_step, is_intra, next_is_intra); 79362306a36Sopenharmony_ci if (encoding & FWHT_FRAME_UNENCODED) 79462306a36Sopenharmony_ci encoding |= FWHT_LUMA_UNENCODED; 79562306a36Sopenharmony_ci encoding &= ~FWHT_FRAME_UNENCODED; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (frm->components_num >= 3) { 79862306a36Sopenharmony_ci u32 chroma_h = height / frm->height_div; 79962306a36Sopenharmony_ci u32 chroma_w = width / frm->width_div; 80062306a36Sopenharmony_ci unsigned int chroma_size = chroma_h * chroma_w; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci rlco_max = rlco + chroma_size / 2 - 256; 80362306a36Sopenharmony_ci encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, 80462306a36Sopenharmony_ci cf, chroma_h, chroma_w, 80562306a36Sopenharmony_ci chroma_stride, frm->chroma_step, 80662306a36Sopenharmony_ci is_intra, next_is_intra); 80762306a36Sopenharmony_ci if (encoding & FWHT_FRAME_UNENCODED) 80862306a36Sopenharmony_ci encoding |= FWHT_CB_UNENCODED; 80962306a36Sopenharmony_ci encoding &= ~FWHT_FRAME_UNENCODED; 81062306a36Sopenharmony_ci rlco_max = rlco + chroma_size / 2 - 256; 81162306a36Sopenharmony_ci encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, 81262306a36Sopenharmony_ci cf, chroma_h, chroma_w, 81362306a36Sopenharmony_ci chroma_stride, frm->chroma_step, 81462306a36Sopenharmony_ci is_intra, next_is_intra); 81562306a36Sopenharmony_ci if (encoding & FWHT_FRAME_UNENCODED) 81662306a36Sopenharmony_ci encoding |= FWHT_CR_UNENCODED; 81762306a36Sopenharmony_ci encoding &= ~FWHT_FRAME_UNENCODED; 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (frm->components_num == 4) { 82162306a36Sopenharmony_ci rlco_max = rlco + size / 2 - 256; 82262306a36Sopenharmony_ci encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco, 82362306a36Sopenharmony_ci rlco_max, cf, height, width, 82462306a36Sopenharmony_ci stride, frm->luma_alpha_step, 82562306a36Sopenharmony_ci is_intra, next_is_intra); 82662306a36Sopenharmony_ci if (encoding & FWHT_FRAME_UNENCODED) 82762306a36Sopenharmony_ci encoding |= FWHT_ALPHA_UNENCODED; 82862306a36Sopenharmony_ci encoding &= ~FWHT_FRAME_UNENCODED; 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci cf->size = (rlco - cf->rlc_data) * sizeof(*rlco); 83262306a36Sopenharmony_ci return encoding; 83362306a36Sopenharmony_ci} 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_cistatic bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, 83662306a36Sopenharmony_ci u32 height, u32 width, const u8 *ref, u32 ref_stride, 83762306a36Sopenharmony_ci unsigned int ref_step, u8 *dst, 83862306a36Sopenharmony_ci unsigned int dst_stride, unsigned int dst_step, 83962306a36Sopenharmony_ci bool uncompressed, const __be16 *end_of_rlco_buf) 84062306a36Sopenharmony_ci{ 84162306a36Sopenharmony_ci unsigned int copies = 0; 84262306a36Sopenharmony_ci s16 copy[8 * 8]; 84362306a36Sopenharmony_ci u16 stat; 84462306a36Sopenharmony_ci unsigned int i, j; 84562306a36Sopenharmony_ci bool is_intra = !ref; 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci width = round_up(width, 8); 84862306a36Sopenharmony_ci height = round_up(height, 8); 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci if (uncompressed) { 85162306a36Sopenharmony_ci int i; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci if (end_of_rlco_buf + 1 < *rlco + width * height / 2) 85462306a36Sopenharmony_ci return false; 85562306a36Sopenharmony_ci for (i = 0; i < height; i++) { 85662306a36Sopenharmony_ci memcpy(dst, *rlco, width); 85762306a36Sopenharmony_ci dst += dst_stride; 85862306a36Sopenharmony_ci *rlco += width / 2; 85962306a36Sopenharmony_ci } 86062306a36Sopenharmony_ci return true; 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci /* 86462306a36Sopenharmony_ci * When decoding each macroblock the rlco pointer will be increased 86562306a36Sopenharmony_ci * by 65 * 2 bytes worst-case. 86662306a36Sopenharmony_ci * To avoid overflow the buffer has to be 65/64th of the actual raw 86762306a36Sopenharmony_ci * image size, just in case someone feeds it malicious data. 86862306a36Sopenharmony_ci */ 86962306a36Sopenharmony_ci for (j = 0; j < height / 8; j++) { 87062306a36Sopenharmony_ci for (i = 0; i < width / 8; i++) { 87162306a36Sopenharmony_ci const u8 *refp = ref + j * 8 * ref_stride + 87262306a36Sopenharmony_ci i * 8 * ref_step; 87362306a36Sopenharmony_ci u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci if (copies) { 87662306a36Sopenharmony_ci memcpy(cf->de_fwht, copy, sizeof(copy)); 87762306a36Sopenharmony_ci if ((stat & PFRAME_BIT) && !is_intra) 87862306a36Sopenharmony_ci add_deltas(cf->de_fwht, refp, 87962306a36Sopenharmony_ci ref_stride, ref_step); 88062306a36Sopenharmony_ci fill_decoder_block(dstp, cf->de_fwht, 88162306a36Sopenharmony_ci dst_stride, dst_step); 88262306a36Sopenharmony_ci copies--; 88362306a36Sopenharmony_ci continue; 88462306a36Sopenharmony_ci } 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci stat = derlc(rlco, cf->coeffs, end_of_rlco_buf); 88762306a36Sopenharmony_ci if (stat & OVERFLOW_BIT) 88862306a36Sopenharmony_ci return false; 88962306a36Sopenharmony_ci if ((stat & PFRAME_BIT) && !is_intra) 89062306a36Sopenharmony_ci dequantize_inter(cf->coeffs); 89162306a36Sopenharmony_ci else 89262306a36Sopenharmony_ci dequantize_intra(cf->coeffs); 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci ifwht(cf->coeffs, cf->de_fwht, 89562306a36Sopenharmony_ci ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1); 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci copies = (stat & DUPS_MASK) >> 1; 89862306a36Sopenharmony_ci if (copies) 89962306a36Sopenharmony_ci memcpy(copy, cf->de_fwht, sizeof(copy)); 90062306a36Sopenharmony_ci if ((stat & PFRAME_BIT) && !is_intra) 90162306a36Sopenharmony_ci add_deltas(cf->de_fwht, refp, 90262306a36Sopenharmony_ci ref_stride, ref_step); 90362306a36Sopenharmony_ci fill_decoder_block(dstp, cf->de_fwht, dst_stride, 90462306a36Sopenharmony_ci dst_step); 90562306a36Sopenharmony_ci } 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci return true; 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_cibool fwht_decode_frame(struct fwht_cframe *cf, u32 hdr_flags, 91162306a36Sopenharmony_ci unsigned int components_num, unsigned int width, 91262306a36Sopenharmony_ci unsigned int height, const struct fwht_raw_frame *ref, 91362306a36Sopenharmony_ci unsigned int ref_stride, unsigned int ref_chroma_stride, 91462306a36Sopenharmony_ci struct fwht_raw_frame *dst, unsigned int dst_stride, 91562306a36Sopenharmony_ci unsigned int dst_chroma_stride) 91662306a36Sopenharmony_ci{ 91762306a36Sopenharmony_ci const __be16 *rlco = cf->rlc_data; 91862306a36Sopenharmony_ci const __be16 *end_of_rlco_buf = cf->rlc_data + 91962306a36Sopenharmony_ci (cf->size / sizeof(*rlco)) - 1; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride, 92262306a36Sopenharmony_ci ref->luma_alpha_step, dst->luma, dst_stride, 92362306a36Sopenharmony_ci dst->luma_alpha_step, 92462306a36Sopenharmony_ci hdr_flags & V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED, 92562306a36Sopenharmony_ci end_of_rlco_buf)) 92662306a36Sopenharmony_ci return false; 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci if (components_num >= 3) { 92962306a36Sopenharmony_ci u32 h = height; 93062306a36Sopenharmony_ci u32 w = width; 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci if (!(hdr_flags & V4L2_FWHT_FL_CHROMA_FULL_HEIGHT)) 93362306a36Sopenharmony_ci h /= 2; 93462306a36Sopenharmony_ci if (!(hdr_flags & V4L2_FWHT_FL_CHROMA_FULL_WIDTH)) 93562306a36Sopenharmony_ci w /= 2; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride, 93862306a36Sopenharmony_ci ref->chroma_step, dst->cb, dst_chroma_stride, 93962306a36Sopenharmony_ci dst->chroma_step, 94062306a36Sopenharmony_ci hdr_flags & V4L2_FWHT_FL_CB_IS_UNCOMPRESSED, 94162306a36Sopenharmony_ci end_of_rlco_buf)) 94262306a36Sopenharmony_ci return false; 94362306a36Sopenharmony_ci if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride, 94462306a36Sopenharmony_ci ref->chroma_step, dst->cr, dst_chroma_stride, 94562306a36Sopenharmony_ci dst->chroma_step, 94662306a36Sopenharmony_ci hdr_flags & V4L2_FWHT_FL_CR_IS_UNCOMPRESSED, 94762306a36Sopenharmony_ci end_of_rlco_buf)) 94862306a36Sopenharmony_ci return false; 94962306a36Sopenharmony_ci } 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (components_num == 4) 95262306a36Sopenharmony_ci if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride, 95362306a36Sopenharmony_ci ref->luma_alpha_step, dst->alpha, dst_stride, 95462306a36Sopenharmony_ci dst->luma_alpha_step, 95562306a36Sopenharmony_ci hdr_flags & V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED, 95662306a36Sopenharmony_ci end_of_rlco_buf)) 95762306a36Sopenharmony_ci return false; 95862306a36Sopenharmony_ci return true; 95962306a36Sopenharmony_ci} 960