1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * MPEG video MMX templates 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include <stdint.h> 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "libavutil/internal.h" 26cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 27cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 28cabdff1aSopenharmony_ci#include "libavcodec/mpegutils.h" 29cabdff1aSopenharmony_ci#include "libavcodec/mpegvideo.h" 30cabdff1aSopenharmony_ci#include "fdct.h" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#undef MMREG_WIDTH 33cabdff1aSopenharmony_ci#undef MM 34cabdff1aSopenharmony_ci#undef MOVQ 35cabdff1aSopenharmony_ci#undef SPREADW 36cabdff1aSopenharmony_ci#undef PMAXW 37cabdff1aSopenharmony_ci#undef PMAX 38cabdff1aSopenharmony_ci#undef SAVE_SIGN 39cabdff1aSopenharmony_ci#undef RESTORE_SIGN 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci#if COMPILE_TEMPLATE_SSE2 42cabdff1aSopenharmony_ci#define MMREG_WIDTH "16" 43cabdff1aSopenharmony_ci#define MM "%%xmm" 44cabdff1aSopenharmony_ci#define MOVQ "movdqa" 45cabdff1aSopenharmony_ci#define SPREADW(a) \ 46cabdff1aSopenharmony_ci "pshuflw $0, "a", "a" \n\t"\ 47cabdff1aSopenharmony_ci "punpcklwd "a", "a" \n\t" 48cabdff1aSopenharmony_ci#define PMAXW(a,b) "pmaxsw "a", "b" \n\t" 49cabdff1aSopenharmony_ci#define PMAX(a,b) \ 50cabdff1aSopenharmony_ci "movhlps "a", "b" \n\t"\ 51cabdff1aSopenharmony_ci PMAXW(b, a)\ 52cabdff1aSopenharmony_ci "pshuflw $0x0E, "a", "b" \n\t"\ 53cabdff1aSopenharmony_ci PMAXW(b, a)\ 54cabdff1aSopenharmony_ci "pshuflw $0x01, "a", "b" \n\t"\ 55cabdff1aSopenharmony_ci PMAXW(b, a) 56cabdff1aSopenharmony_ci#else 57cabdff1aSopenharmony_ci#define MMREG_WIDTH "8" 58cabdff1aSopenharmony_ci#define MM "%%mm" 59cabdff1aSopenharmony_ci#define MOVQ "movq" 60cabdff1aSopenharmony_ci#define SPREADW(a) \ 61cabdff1aSopenharmony_ci "punpcklwd "a", "a" \n\t"\ 62cabdff1aSopenharmony_ci "punpcklwd "a", "a" \n\t" 63cabdff1aSopenharmony_ci#define PMAXW(a,b) \ 64cabdff1aSopenharmony_ci "psubusw "a", "b" \n\t"\ 65cabdff1aSopenharmony_ci "paddw "a", "b" \n\t" 66cabdff1aSopenharmony_ci#define PMAX(a,b) \ 67cabdff1aSopenharmony_ci "movq "a", "b" \n\t"\ 68cabdff1aSopenharmony_ci "psrlq $32, "a" \n\t"\ 69cabdff1aSopenharmony_ci PMAXW(b, a)\ 70cabdff1aSopenharmony_ci "movq "a", "b" \n\t"\ 71cabdff1aSopenharmony_ci "psrlq $16, "a" \n\t"\ 72cabdff1aSopenharmony_ci PMAXW(b, a) 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci#endif 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci#if COMPILE_TEMPLATE_SSSE3 77cabdff1aSopenharmony_ci#define SAVE_SIGN(a,b) \ 78cabdff1aSopenharmony_ci "movdqa "b", "a" \n\t"\ 79cabdff1aSopenharmony_ci "pabsw "b", "b" \n\t" 80cabdff1aSopenharmony_ci#define RESTORE_SIGN(a,b) \ 81cabdff1aSopenharmony_ci "psignw "a", "b" \n\t" 82cabdff1aSopenharmony_ci#else 83cabdff1aSopenharmony_ci#define SAVE_SIGN(a,b) \ 84cabdff1aSopenharmony_ci "pxor "a", "a" \n\t"\ 85cabdff1aSopenharmony_ci "pcmpgtw "b", "a" \n\t" /* block[i] <= 0 ? 0xFF : 0x00 */\ 86cabdff1aSopenharmony_ci "pxor "a", "b" \n\t"\ 87cabdff1aSopenharmony_ci "psubw "a", "b" \n\t" /* ABS(block[i]) */ 88cabdff1aSopenharmony_ci#define RESTORE_SIGN(a,b) \ 89cabdff1aSopenharmony_ci "pxor "a", "b" \n\t"\ 90cabdff1aSopenharmony_ci "psubw "a", "b" \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) 91cabdff1aSopenharmony_ci#endif 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_cistatic int RENAME(dct_quantize)(MpegEncContext *s, 94cabdff1aSopenharmony_ci int16_t *block, int n, 95cabdff1aSopenharmony_ci int qscale, int *overflow) 96cabdff1aSopenharmony_ci{ 97cabdff1aSopenharmony_ci x86_reg last_non_zero_p1; 98cabdff1aSopenharmony_ci int level=0, q; //=0 is because gcc says uninitialized ... 99cabdff1aSopenharmony_ci const uint16_t *qmat, *bias; 100cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp_block, [64]); 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci av_assert2((7&(uintptr_t)(&temp_block[0])) == 0); //did gcc align it correctly? 103cabdff1aSopenharmony_ci 104cabdff1aSopenharmony_ci //s->fdct (block); 105cabdff1aSopenharmony_ci RENAME_FDCT(ff_fdct)(block); // cannot be anything else ... 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci if(s->dct_error_sum) 108cabdff1aSopenharmony_ci s->denoise_dct(s, block); 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci if (s->mb_intra) { 111cabdff1aSopenharmony_ci int dummy; 112cabdff1aSopenharmony_ci if (n < 4){ 113cabdff1aSopenharmony_ci q = s->y_dc_scale; 114cabdff1aSopenharmony_ci bias = s->q_intra_matrix16[qscale][1]; 115cabdff1aSopenharmony_ci qmat = s->q_intra_matrix16[qscale][0]; 116cabdff1aSopenharmony_ci }else{ 117cabdff1aSopenharmony_ci q = s->c_dc_scale; 118cabdff1aSopenharmony_ci bias = s->q_chroma_intra_matrix16[qscale][1]; 119cabdff1aSopenharmony_ci qmat = s->q_chroma_intra_matrix16[qscale][0]; 120cabdff1aSopenharmony_ci } 121cabdff1aSopenharmony_ci /* note: block[0] is assumed to be positive */ 122cabdff1aSopenharmony_ci if (!s->h263_aic) { 123cabdff1aSopenharmony_ci __asm__ volatile ( 124cabdff1aSopenharmony_ci "mul %%ecx \n\t" 125cabdff1aSopenharmony_ci : "=d" (level), "=a"(dummy) 126cabdff1aSopenharmony_ci : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1]) 127cabdff1aSopenharmony_ci ); 128cabdff1aSopenharmony_ci } else 129cabdff1aSopenharmony_ci /* For AIC we skip quant/dequant of INTRADC */ 130cabdff1aSopenharmony_ci level = (block[0] + 4)>>3; 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci block[0]=0; //avoid fake overflow 133cabdff1aSopenharmony_ci// temp_block[0] = (block[0] + (q >> 1)) / q; 134cabdff1aSopenharmony_ci last_non_zero_p1 = 1; 135cabdff1aSopenharmony_ci } else { 136cabdff1aSopenharmony_ci last_non_zero_p1 = 0; 137cabdff1aSopenharmony_ci bias = s->q_inter_matrix16[qscale][1]; 138cabdff1aSopenharmony_ci qmat = s->q_inter_matrix16[qscale][0]; 139cabdff1aSopenharmony_ci } 140cabdff1aSopenharmony_ci 141cabdff1aSopenharmony_ci if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ 142cabdff1aSopenharmony_ci 143cabdff1aSopenharmony_ci __asm__ volatile( 144cabdff1aSopenharmony_ci "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 145cabdff1aSopenharmony_ci SPREADW(MM"3") 146cabdff1aSopenharmony_ci "pxor "MM"7, "MM"7 \n\t" // 0 147cabdff1aSopenharmony_ci "pxor "MM"4, "MM"4 \n\t" // 0 148cabdff1aSopenharmony_ci MOVQ" (%2), "MM"5 \n\t" // qmat[0] 149cabdff1aSopenharmony_ci "pxor "MM"6, "MM"6 \n\t" 150cabdff1aSopenharmony_ci "psubw (%3), "MM"6 \n\t" // -bias[0] 151cabdff1aSopenharmony_ci "mov $-128, %%"FF_REG_a" \n\t" 152cabdff1aSopenharmony_ci ".p2align 4 \n\t" 153cabdff1aSopenharmony_ci "1: \n\t" 154cabdff1aSopenharmony_ci MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] 155cabdff1aSopenharmony_ci SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) 156cabdff1aSopenharmony_ci "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] 157cabdff1aSopenharmony_ci "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 158cabdff1aSopenharmony_ci "por "MM"0, "MM"4 \n\t" 159cabdff1aSopenharmony_ci RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) 160cabdff1aSopenharmony_ci MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" 161cabdff1aSopenharmony_ci "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 162cabdff1aSopenharmony_ci MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" 163cabdff1aSopenharmony_ci MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 164cabdff1aSopenharmony_ci "pandn "MM"1, "MM"0 \n\t" 165cabdff1aSopenharmony_ci PMAXW(MM"0", MM"3") 166cabdff1aSopenharmony_ci "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" 167cabdff1aSopenharmony_ci " js 1b \n\t" 168cabdff1aSopenharmony_ci PMAX(MM"3", MM"0") 169cabdff1aSopenharmony_ci "movd "MM"3, %%"FF_REG_a" \n\t" 170cabdff1aSopenharmony_ci "movzbl %%al, %%eax \n\t" // last_non_zero_p1 171cabdff1aSopenharmony_ci : "+a" (last_non_zero_p1) 172cabdff1aSopenharmony_ci : "r" (block+64), "r" (qmat), "r" (bias), 173cabdff1aSopenharmony_ci "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64) 174cabdff1aSopenharmony_ci XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", 175cabdff1aSopenharmony_ci "%xmm4", "%xmm5", "%xmm6", "%xmm7") 176cabdff1aSopenharmony_ci ); 177cabdff1aSopenharmony_ci }else{ // FMT_H263 178cabdff1aSopenharmony_ci __asm__ volatile( 179cabdff1aSopenharmony_ci "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 180cabdff1aSopenharmony_ci SPREADW(MM"3") 181cabdff1aSopenharmony_ci "pxor "MM"7, "MM"7 \n\t" // 0 182cabdff1aSopenharmony_ci "pxor "MM"4, "MM"4 \n\t" // 0 183cabdff1aSopenharmony_ci "mov $-128, %%"FF_REG_a" \n\t" 184cabdff1aSopenharmony_ci ".p2align 4 \n\t" 185cabdff1aSopenharmony_ci "1: \n\t" 186cabdff1aSopenharmony_ci MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] 187cabdff1aSopenharmony_ci SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) 188cabdff1aSopenharmony_ci MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0] 189cabdff1aSopenharmony_ci "paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] 190cabdff1aSopenharmony_ci MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i] 191cabdff1aSopenharmony_ci "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 192cabdff1aSopenharmony_ci "por "MM"0, "MM"4 \n\t" 193cabdff1aSopenharmony_ci RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) 194cabdff1aSopenharmony_ci MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" 195cabdff1aSopenharmony_ci "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 196cabdff1aSopenharmony_ci MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" 197cabdff1aSopenharmony_ci MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 198cabdff1aSopenharmony_ci "pandn "MM"1, "MM"0 \n\t" 199cabdff1aSopenharmony_ci PMAXW(MM"0", MM"3") 200cabdff1aSopenharmony_ci "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" 201cabdff1aSopenharmony_ci " js 1b \n\t" 202cabdff1aSopenharmony_ci PMAX(MM"3", MM"0") 203cabdff1aSopenharmony_ci "movd "MM"3, %%"FF_REG_a" \n\t" 204cabdff1aSopenharmony_ci "movzbl %%al, %%eax \n\t" // last_non_zero_p1 205cabdff1aSopenharmony_ci : "+a" (last_non_zero_p1) 206cabdff1aSopenharmony_ci : "r" (block+64), "r" (qmat+64), "r" (bias+64), 207cabdff1aSopenharmony_ci "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64) 208cabdff1aSopenharmony_ci XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", 209cabdff1aSopenharmony_ci "%xmm4", "%xmm5", "%xmm6", "%xmm7") 210cabdff1aSopenharmony_ci ); 211cabdff1aSopenharmony_ci } 212cabdff1aSopenharmony_ci __asm__ volatile( 213cabdff1aSopenharmony_ci "movd %1, "MM"1 \n\t" // max_qcoeff 214cabdff1aSopenharmony_ci SPREADW(MM"1") 215cabdff1aSopenharmony_ci "psubusw "MM"1, "MM"4 \n\t" 216cabdff1aSopenharmony_ci "packuswb "MM"4, "MM"4 \n\t" 217cabdff1aSopenharmony_ci#if COMPILE_TEMPLATE_SSE2 218cabdff1aSopenharmony_ci "packsswb "MM"4, "MM"4 \n\t" 219cabdff1aSopenharmony_ci#endif 220cabdff1aSopenharmony_ci "movd "MM"4, %0 \n\t" // *overflow 221cabdff1aSopenharmony_ci : "=g" (*overflow) 222cabdff1aSopenharmony_ci : "g" (s->max_qcoeff) 223cabdff1aSopenharmony_ci ); 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci if(s->mb_intra) block[0]= level; 226cabdff1aSopenharmony_ci else block[0]= temp_block[0]; 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci if (s->idsp.perm_type == FF_IDCT_PERM_SIMPLE) { 229cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 1) goto end; 230cabdff1aSopenharmony_ci block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; 231cabdff1aSopenharmony_ci block[0x20] = temp_block[0x10]; 232cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 4) goto end; 233cabdff1aSopenharmony_ci block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; 234cabdff1aSopenharmony_ci block[0x09] = temp_block[0x03]; 235cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 7) goto end; 236cabdff1aSopenharmony_ci block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; 237cabdff1aSopenharmony_ci block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; 238cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 11) goto end; 239cabdff1aSopenharmony_ci block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; 240cabdff1aSopenharmony_ci block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; 241cabdff1aSopenharmony_ci block[0x0C] = temp_block[0x05]; 242cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 16) goto end; 243cabdff1aSopenharmony_ci block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; 244cabdff1aSopenharmony_ci block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; 245cabdff1aSopenharmony_ci block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; 246cabdff1aSopenharmony_ci block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; 247cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 24) goto end; 248cabdff1aSopenharmony_ci block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; 249cabdff1aSopenharmony_ci block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; 250cabdff1aSopenharmony_ci block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; 251cabdff1aSopenharmony_ci block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; 252cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 32) goto end; 253cabdff1aSopenharmony_ci block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; 254cabdff1aSopenharmony_ci block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; 255cabdff1aSopenharmony_ci block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; 256cabdff1aSopenharmony_ci block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; 257cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 40) goto end; 258cabdff1aSopenharmony_ci block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; 259cabdff1aSopenharmony_ci block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; 260cabdff1aSopenharmony_ci block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; 261cabdff1aSopenharmony_ci block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; 262cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 48) goto end; 263cabdff1aSopenharmony_ci block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; 264cabdff1aSopenharmony_ci block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; 265cabdff1aSopenharmony_ci block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 266cabdff1aSopenharmony_ci block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; 267cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 56) goto end; 268cabdff1aSopenharmony_ci block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; 269cabdff1aSopenharmony_ci block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; 270cabdff1aSopenharmony_ci block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; 271cabdff1aSopenharmony_ci block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; 272cabdff1aSopenharmony_ci }else if(s->idsp.perm_type == FF_IDCT_PERM_LIBMPEG2){ 273cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 1) goto end; 274cabdff1aSopenharmony_ci block[0x04] = temp_block[0x01]; 275cabdff1aSopenharmony_ci block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; 276cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 4) goto end; 277cabdff1aSopenharmony_ci block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; 278cabdff1aSopenharmony_ci block[0x05] = temp_block[0x03]; 279cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 7) goto end; 280cabdff1aSopenharmony_ci block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; 281cabdff1aSopenharmony_ci block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; 282cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 11) goto end; 283cabdff1aSopenharmony_ci block[0x1C] = temp_block[0x19]; 284cabdff1aSopenharmony_ci block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; 285cabdff1aSopenharmony_ci block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; 286cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 16) goto end; 287cabdff1aSopenharmony_ci block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; 288cabdff1aSopenharmony_ci block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; 289cabdff1aSopenharmony_ci block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; 290cabdff1aSopenharmony_ci block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; 291cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 24) goto end; 292cabdff1aSopenharmony_ci block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; 293cabdff1aSopenharmony_ci block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; 294cabdff1aSopenharmony_ci block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; 295cabdff1aSopenharmony_ci block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; 296cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 32) goto end; 297cabdff1aSopenharmony_ci block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; 298cabdff1aSopenharmony_ci block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; 299cabdff1aSopenharmony_ci block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; 300cabdff1aSopenharmony_ci block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; 301cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 40) goto end; 302cabdff1aSopenharmony_ci block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; 303cabdff1aSopenharmony_ci block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; 304cabdff1aSopenharmony_ci block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; 305cabdff1aSopenharmony_ci block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; 306cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 48) goto end; 307cabdff1aSopenharmony_ci block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; 308cabdff1aSopenharmony_ci block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; 309cabdff1aSopenharmony_ci block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 310cabdff1aSopenharmony_ci block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; 311cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 56) goto end; 312cabdff1aSopenharmony_ci block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; 313cabdff1aSopenharmony_ci block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; 314cabdff1aSopenharmony_ci block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; 315cabdff1aSopenharmony_ci block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; 316cabdff1aSopenharmony_ci } else if (s->idsp.perm_type == FF_IDCT_PERM_NONE) { 317cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 1) goto end; 318cabdff1aSopenharmony_ci block[0x01] = temp_block[0x01]; 319cabdff1aSopenharmony_ci block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; 320cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 4) goto end; 321cabdff1aSopenharmony_ci block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; 322cabdff1aSopenharmony_ci block[0x03] = temp_block[0x03]; 323cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 7) goto end; 324cabdff1aSopenharmony_ci block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; 325cabdff1aSopenharmony_ci block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; 326cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 11) goto end; 327cabdff1aSopenharmony_ci block[0x19] = temp_block[0x19]; 328cabdff1aSopenharmony_ci block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; 329cabdff1aSopenharmony_ci block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; 330cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 16) goto end; 331cabdff1aSopenharmony_ci block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; 332cabdff1aSopenharmony_ci block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; 333cabdff1aSopenharmony_ci block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; 334cabdff1aSopenharmony_ci block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; 335cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 24) goto end; 336cabdff1aSopenharmony_ci block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; 337cabdff1aSopenharmony_ci block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; 338cabdff1aSopenharmony_ci block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; 339cabdff1aSopenharmony_ci block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; 340cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 32) goto end; 341cabdff1aSopenharmony_ci block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; 342cabdff1aSopenharmony_ci block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; 343cabdff1aSopenharmony_ci block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; 344cabdff1aSopenharmony_ci block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; 345cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 40) goto end; 346cabdff1aSopenharmony_ci block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; 347cabdff1aSopenharmony_ci block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; 348cabdff1aSopenharmony_ci block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; 349cabdff1aSopenharmony_ci block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; 350cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 48) goto end; 351cabdff1aSopenharmony_ci block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; 352cabdff1aSopenharmony_ci block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; 353cabdff1aSopenharmony_ci block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 354cabdff1aSopenharmony_ci block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; 355cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 56) goto end; 356cabdff1aSopenharmony_ci block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; 357cabdff1aSopenharmony_ci block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; 358cabdff1aSopenharmony_ci block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; 359cabdff1aSopenharmony_ci block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; 360cabdff1aSopenharmony_ci } else if (s->idsp.perm_type == FF_IDCT_PERM_TRANSPOSE) { 361cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 1) goto end; 362cabdff1aSopenharmony_ci block[0x08] = temp_block[0x01]; 363cabdff1aSopenharmony_ci block[0x01] = temp_block[0x08]; block[0x02] = temp_block[0x10]; 364cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 4) goto end; 365cabdff1aSopenharmony_ci block[0x09] = temp_block[0x09]; block[0x10] = temp_block[0x02]; 366cabdff1aSopenharmony_ci block[0x18] = temp_block[0x03]; 367cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 7) goto end; 368cabdff1aSopenharmony_ci block[0x11] = temp_block[0x0A]; block[0x0A] = temp_block[0x11]; 369cabdff1aSopenharmony_ci block[0x03] = temp_block[0x18]; block[0x04] = temp_block[0x20]; 370cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 11) goto end; 371cabdff1aSopenharmony_ci block[0x0B] = temp_block[0x19]; 372cabdff1aSopenharmony_ci block[0x12] = temp_block[0x12]; block[0x19] = temp_block[0x0B]; 373cabdff1aSopenharmony_ci block[0x20] = temp_block[0x04]; block[0x28] = temp_block[0x05]; 374cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 16) goto end; 375cabdff1aSopenharmony_ci block[0x21] = temp_block[0x0C]; block[0x1A] = temp_block[0x13]; 376cabdff1aSopenharmony_ci block[0x13] = temp_block[0x1A]; block[0x0C] = temp_block[0x21]; 377cabdff1aSopenharmony_ci block[0x05] = temp_block[0x28]; block[0x06] = temp_block[0x30]; 378cabdff1aSopenharmony_ci block[0x0D] = temp_block[0x29]; block[0x14] = temp_block[0x22]; 379cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 24) goto end; 380cabdff1aSopenharmony_ci block[0x1B] = temp_block[0x1B]; block[0x22] = temp_block[0x14]; 381cabdff1aSopenharmony_ci block[0x29] = temp_block[0x0D]; block[0x30] = temp_block[0x06]; 382cabdff1aSopenharmony_ci block[0x38] = temp_block[0x07]; block[0x31] = temp_block[0x0E]; 383cabdff1aSopenharmony_ci block[0x2A] = temp_block[0x15]; block[0x23] = temp_block[0x1C]; 384cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 32) goto end; 385cabdff1aSopenharmony_ci block[0x1C] = temp_block[0x23]; block[0x15] = temp_block[0x2A]; 386cabdff1aSopenharmony_ci block[0x0E] = temp_block[0x31]; block[0x07] = temp_block[0x38]; 387cabdff1aSopenharmony_ci block[0x0F] = temp_block[0x39]; block[0x16] = temp_block[0x32]; 388cabdff1aSopenharmony_ci block[0x1D] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; 389cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 40) goto end; 390cabdff1aSopenharmony_ci block[0x2B] = temp_block[0x1D]; block[0x32] = temp_block[0x16]; 391cabdff1aSopenharmony_ci block[0x39] = temp_block[0x0F]; block[0x3A] = temp_block[0x17]; 392cabdff1aSopenharmony_ci block[0x33] = temp_block[0x1E]; block[0x2C] = temp_block[0x25]; 393cabdff1aSopenharmony_ci block[0x25] = temp_block[0x2C]; block[0x1E] = temp_block[0x33]; 394cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 48) goto end; 395cabdff1aSopenharmony_ci block[0x17] = temp_block[0x3A]; block[0x1F] = temp_block[0x3B]; 396cabdff1aSopenharmony_ci block[0x26] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; 397cabdff1aSopenharmony_ci block[0x34] = temp_block[0x26]; block[0x3B] = temp_block[0x1F]; 398cabdff1aSopenharmony_ci block[0x3C] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; 399cabdff1aSopenharmony_ci if(last_non_zero_p1 <= 56) goto end; 400cabdff1aSopenharmony_ci block[0x2E] = temp_block[0x35]; block[0x27] = temp_block[0x3C]; 401cabdff1aSopenharmony_ci block[0x2F] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; 402cabdff1aSopenharmony_ci block[0x3D] = temp_block[0x2F]; block[0x3E] = temp_block[0x37]; 403cabdff1aSopenharmony_ci block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; 404cabdff1aSopenharmony_ci } else { 405cabdff1aSopenharmony_ci av_log(s, AV_LOG_DEBUG, "s->idsp.perm_type: %d\n", 406cabdff1aSopenharmony_ci (int)s->idsp.perm_type); 407cabdff1aSopenharmony_ci av_assert0(s->idsp.perm_type == FF_IDCT_PERM_NONE || 408cabdff1aSopenharmony_ci s->idsp.perm_type == FF_IDCT_PERM_LIBMPEG2 || 409cabdff1aSopenharmony_ci s->idsp.perm_type == FF_IDCT_PERM_SIMPLE || 410cabdff1aSopenharmony_ci s->idsp.perm_type == FF_IDCT_PERM_TRANSPOSE); 411cabdff1aSopenharmony_ci } 412cabdff1aSopenharmony_ci end: 413cabdff1aSopenharmony_ci return last_non_zero_p1 - 1; 414cabdff1aSopenharmony_ci} 415