1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Loongson SIMD optimized xvid idct 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2015 Loongson Technology Corporation Limited 5cabdff1aSopenharmony_ci * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "idctdsp_mips.h" 27cabdff1aSopenharmony_ci#include "xvididct_mips.h" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#define BITS_INV_ACC 5 // 4 or 5 for IEEE 30cabdff1aSopenharmony_ci#define SHIFT_INV_ROW (16 - BITS_INV_ACC) //11 31cabdff1aSopenharmony_ci#define SHIFT_INV_COL (1 + BITS_INV_ACC) //6 32cabdff1aSopenharmony_ci#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) 33cabdff1aSopenharmony_ci#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) 34cabdff1aSopenharmony_ci#define RND_INV_CORR (RND_INV_COL - 1) 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci#define BITS_FRW_ACC 3 // 2 or 3 for accuracy 37cabdff1aSopenharmony_ci#define SHIFT_FRW_COL BITS_FRW_ACC 38cabdff1aSopenharmony_ci#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) 39cabdff1aSopenharmony_ci#define RND_FRW_ROW (262144*(BITS_FRW_ACC - 1)) 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static const int16_t, tg_1_16)[4*4] = { 42cabdff1aSopenharmony_ci 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 43cabdff1aSopenharmony_ci 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 44cabdff1aSopenharmony_ci -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 45cabdff1aSopenharmony_ci 23170, 23170, 23170, 23170 // cos * (2<<15) + 0.5 46cabdff1aSopenharmony_ci}; 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static const int32_t, rounder_0)[2*8] = { 49cabdff1aSopenharmony_ci 65536,65536, 50cabdff1aSopenharmony_ci 3597, 3597, 51cabdff1aSopenharmony_ci 2260, 2260, 52cabdff1aSopenharmony_ci 1203, 1203, 53cabdff1aSopenharmony_ci 0, 0, 54cabdff1aSopenharmony_ci 120, 120, 55cabdff1aSopenharmony_ci 512, 512, 56cabdff1aSopenharmony_ci 512, 512 57cabdff1aSopenharmony_ci}; 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmi)[32*4] = { 60cabdff1aSopenharmony_ci 16384, 21407, 16384, 8867, // w05 w04 w01 w00 61cabdff1aSopenharmony_ci 16384, 8867,-16384,-21407, // w07 w06 w03 w02 62cabdff1aSopenharmony_ci 16384, -8867, 16384,-21407, // w13 w12 w09 w08 63cabdff1aSopenharmony_ci -16384, 21407, 16384, -8867, // w15 w14 w11 w10 64cabdff1aSopenharmony_ci 22725, 19266, 19266, -4520, // w21 w20 w17 w16 65cabdff1aSopenharmony_ci 12873, 4520,-22725,-12873, // w23 w22 w19 w18 66cabdff1aSopenharmony_ci 12873,-22725, 4520,-12873, // w29 w28 w25 w24 67cabdff1aSopenharmony_ci 4520, 19266, 19266,-22725, // w31 w30 w27 w26 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci 22725, 29692, 22725, 12299, // w05 w04 w01 w00 70cabdff1aSopenharmony_ci 22725, 12299,-22725,-29692, // w07 w06 w03 w02 71cabdff1aSopenharmony_ci 22725,-12299, 22725,-29692, // w13 w12 w09 w08 72cabdff1aSopenharmony_ci -22725, 29692, 22725,-12299, // w15 w14 w11 w10 73cabdff1aSopenharmony_ci 31521, 26722, 26722, -6270, // w21 w20 w17 w16 74cabdff1aSopenharmony_ci 17855, 6270,-31521,-17855, // w23 w22 w19 w18 75cabdff1aSopenharmony_ci 17855,-31521, 6270,-17855, // w29 w28 w25 w24 76cabdff1aSopenharmony_ci 6270, 26722, 26722,-31521, // w31 w30 w27 w26 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci 21407, 27969, 21407, 11585, // w05 w04 w01 w00 79cabdff1aSopenharmony_ci 21407, 11585,-21407,-27969, // w07 w06 w03 w02 80cabdff1aSopenharmony_ci 21407,-11585, 21407,-27969, // w13 w12 w09 w08 81cabdff1aSopenharmony_ci -21407, 27969, 21407,-11585, // w15 w14 w11 w10 82cabdff1aSopenharmony_ci 29692, 25172, 25172, -5906, // w21 w20 w17 w16 83cabdff1aSopenharmony_ci 16819, 5906,-29692,-16819, // w23 w22 w19 w18 84cabdff1aSopenharmony_ci 16819,-29692, 5906,-16819, // w29 w28 w25 w24 85cabdff1aSopenharmony_ci 5906, 25172, 25172,-29692, // w31 w30 w27 w26 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ci 19266, 25172, 19266, 10426, // w05 w04 w01 w00 88cabdff1aSopenharmony_ci 19266, 10426,-19266,-25172, // w07 w06 w03 w02 89cabdff1aSopenharmony_ci 19266,-10426, 19266,-25172, // w13 w12 w09 w08 90cabdff1aSopenharmony_ci -19266, 25172, 19266,-10426, // w15 w14 w11 w10 91cabdff1aSopenharmony_ci 26722, 22654, 22654, -5315, // w21 w20 w17 w16 92cabdff1aSopenharmony_ci 15137, 5315,-26722,-15137, // w23 w22 w19 w18 93cabdff1aSopenharmony_ci 15137,-26722, 5315,-15137, // w29 w28 w25 w24 94cabdff1aSopenharmony_ci 5315, 22654, 22654,-26722, // w31 w30 w27 w26 95cabdff1aSopenharmony_ci}; 96cabdff1aSopenharmony_ci 97cabdff1aSopenharmony_ci#define DCT_8_INV_ROW_MMI(A1,A2,A3,A4) \ 98cabdff1aSopenharmony_ci "dli $10, 0x88 \n\t" \ 99cabdff1aSopenharmony_ci "ldc1 $f4, "#A1" \n\t" /* 0; x3 x2 x1 x0 */\ 100cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" \ 101cabdff1aSopenharmony_ci "ldc1 $f10, 8+"#A1" \n\t" /* 1; x7 x6 x5 x4 */\ 102cabdff1aSopenharmony_ci "ldc1 $f6, "#A3" \n\t" /* 3; w05 w04 w01 w00 */\ 103cabdff1aSopenharmony_ci "pshufh $f0, $f4, $f16 \n\t" /* x2 x0 x2 x0 */\ 104cabdff1aSopenharmony_ci "ldc1 $f8, 8+"#A3" \n\t" /* 4; w07 w06 w03 w02 */\ 105cabdff1aSopenharmony_ci "ldc1 $f12, 32+"#A3" \n\t" /* 6; w21 w20 w17 w16 */\ 106cabdff1aSopenharmony_ci "pmaddhw $f6, $f6, $f0 \n\t" /* x2*w05+x0*w04 x2*w01+x0*w00 */\ 107cabdff1aSopenharmony_ci "dli $10, 0xdd \n\t" \ 108cabdff1aSopenharmony_ci "pshufh $f2, $f10, $f16 \n\t" /* x6 x4 x6 x4 */\ 109cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" \ 110cabdff1aSopenharmony_ci "pmaddhw $f8, $f8, $f2 \n\t" /* x6*w07+x4*w06 x6*w03+x4*w02 */\ 111cabdff1aSopenharmony_ci "ldc1 $f14, 40+"#A3" \n\t" /* 7; w23 w22 w19 w18 */\ 112cabdff1aSopenharmony_ci "pshufh $f4, $f4, $f16 \n\t" /* x3 x1 x3 x1 */\ 113cabdff1aSopenharmony_ci "pmaddhw $f12, $f12, $f4 \n\t" /* x3*w21+x1*w20 x3*w17+x1*w16 */\ 114cabdff1aSopenharmony_ci "pshufh $f10, $f10, $f16 \n\t" /* x7 x5 x7 x5 */\ 115cabdff1aSopenharmony_ci "ldc1 $f18, "#A4" \n\t" \ 116cabdff1aSopenharmony_ci "pmaddhw $f14, $f14, $f10 \n\t" /* x7*w23+x5*w22 x7*w19+x5*w18 */\ 117cabdff1aSopenharmony_ci "paddw $f6, $f6, $f18 \n\t" /* +%4 */\ 118cabdff1aSopenharmony_ci "ldc1 $f16, 16+"#A3" \n\t" \ 119cabdff1aSopenharmony_ci "pmaddhw $f0, $f0, $f16 \n\t" /* x2*w13+x0*w12 x2*w09+x0*w08 */\ 120cabdff1aSopenharmony_ci "ldc1 $f16, 24+"#A3" \n\t" \ 121cabdff1aSopenharmony_ci "paddw $f6, $f6, $f8 \n\t" /* 4; a1=sum(even1) a0=sum(even0) */\ 122cabdff1aSopenharmony_ci "pmaddhw $f2, $f2, $f16 \n\t" /* x6*w15+x4*w14 x6*w11+x4*w10 */\ 123cabdff1aSopenharmony_ci "ldc1 $f16, 48+"#A3" \n\t" \ 124cabdff1aSopenharmony_ci "pmaddhw $f4, $f4, $f16 \n\t" /* x3*w29+x1*w28 x3*w25+x1*w24 */\ 125cabdff1aSopenharmony_ci "ldc1 $f16, 56+"#A3" \n\t" \ 126cabdff1aSopenharmony_ci "paddw $f12, $f12, $f14 \n\t" /* 7; b1=sum(odd1) b0=sum(odd0) */\ 127cabdff1aSopenharmony_ci "dli $10, 11 \n\t" \ 128cabdff1aSopenharmony_ci "pmaddhw $f10, $f10, $f16 \n\t" /* x7*w31+x5*w30 x7*w27+x5*w26 */\ 129cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" \ 130cabdff1aSopenharmony_ci "psubw $f8, $f6, $f12 \n\t" /* 6; a1-b1 a0-b0 */\ 131cabdff1aSopenharmony_ci "paddw $f6, $f6, $f12 \n\t" /* a1+b1 a0+b0 */\ 132cabdff1aSopenharmony_ci "paddw $f0, $f0, $f18 \n\t" /* +%4 */\ 133cabdff1aSopenharmony_ci "psraw $f6, $f6, $f16 \n\t" /* y1=a1+b1 y0=a0+b0 */\ 134cabdff1aSopenharmony_ci "paddw $f0, $f0, $f2 \n\t" /* 1; a3=sum(even3) a2=sum(even2) */\ 135cabdff1aSopenharmony_ci "paddw $f4, $f4, $f10 \n\t" /* 5; b3=sum(odd3) b2=sum(odd2) */\ 136cabdff1aSopenharmony_ci "psraw $f8, $f8, $f16 \n\t" /* y6=a1-b1 y7=a0-b0 */\ 137cabdff1aSopenharmony_ci "psubw $f14, $f0, $f4 \n\t" /* 2; a3-b3 a2-b2 */\ 138cabdff1aSopenharmony_ci "paddw $f0, $f0, $f4 \n\t" /* a3+b3 a2+b2 */\ 139cabdff1aSopenharmony_ci "psraw $f0, $f0, $f16 \n\t" /* y3=a3+b3 y2=a2+b2 */\ 140cabdff1aSopenharmony_ci "psraw $f14, $f14, $f16 \n\t" /* y4=a3-b3 y5=a2-b2 */\ 141cabdff1aSopenharmony_ci "dli $10, 0xb1 \n\t" \ 142cabdff1aSopenharmony_ci "packsswh $f6, $f6, $f0 \n\t" /* 0; y3 y2 y1 y0 */\ 143cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" \ 144cabdff1aSopenharmony_ci "packsswh $f14, $f14, $f8 \n\t" /* 4; y6 y7 y4 y5 */\ 145cabdff1aSopenharmony_ci "sdc1 $f6, "#A2" \n\t" /* 3; save y3 y2 y1 y0 */\ 146cabdff1aSopenharmony_ci "pshufh $f14, $f14, $f16 \n\t" /* y7 y6 y5 y4 */\ 147cabdff1aSopenharmony_ci "sdc1 $f14, 8+"#A2" \n\t" /* 7; save y7 y6 y5 y4 */\ 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci#define DCT_8_INV_COL(A1,A2) \ 151cabdff1aSopenharmony_ci "ldc1 $f2, 2*8(%3) \n\t" \ 152cabdff1aSopenharmony_ci "ldc1 $f6, 16*3+"#A1" \n\t" \ 153cabdff1aSopenharmony_ci "ldc1 $f10, 16*5+"#A1" \n\t" \ 154cabdff1aSopenharmony_ci "pmulhh $f0, $f2, $f6 \n\t" /* x3*(tg_3_16-1) */\ 155cabdff1aSopenharmony_ci "ldc1 $f4, 0(%3) \n\t" \ 156cabdff1aSopenharmony_ci "pmulhh $f2, $f2, $f10 \n\t" /* x5*(tg_3_16-1) */\ 157cabdff1aSopenharmony_ci "ldc1 $f14, 16*7+"#A1" \n\t" \ 158cabdff1aSopenharmony_ci "ldc1 $f12, 16*1+"#A1" \n\t" \ 159cabdff1aSopenharmony_ci "pmulhh $f8, $f4, $f14 \n\t" /* x7*tg_1_16 */\ 160cabdff1aSopenharmony_ci "paddsh $f0, $f0, $f6 \n\t" /* x3*tg_3_16 */\ 161cabdff1aSopenharmony_ci "pmulhh $f4, $f4, $f12 \n\t" /* x1*tg_1_16 */\ 162cabdff1aSopenharmony_ci "paddsh $f2, $f2, $f6 \n\t" /* x3+x5*(tg_3_16-1) */\ 163cabdff1aSopenharmony_ci "psubsh $f0, $f0, $f10 \n\t" /* x3*tg_3_16-x5 = tm35 */\ 164cabdff1aSopenharmony_ci "ldc1 $f6, 3*8(%3) \n\t" \ 165cabdff1aSopenharmony_ci "paddsh $f2, $f2, $f10 \n\t" /* x3+x5*tg_3_16 = tp35 */\ 166cabdff1aSopenharmony_ci "paddsh $f8, $f8, $f12 \n\t" /* x1+tg_1_16*x7 = tp17 */\ 167cabdff1aSopenharmony_ci "psubsh $f4, $f4, $f14 \n\t" /* x1*tg_1_16-x7 = tm17 */\ 168cabdff1aSopenharmony_ci "paddsh $f10, $f8, $f2 \n\t" /* tp17+tp35 = b0 */\ 169cabdff1aSopenharmony_ci "psubsh $f12, $f4, $f0 \n\t" /* tm17-tm35 = b3 */\ 170cabdff1aSopenharmony_ci "psubsh $f8, $f8, $f2 \n\t" /* tp17-tp35 = t1 */\ 171cabdff1aSopenharmony_ci "paddsh $f4, $f4, $f0 \n\t" /* tm17+tm35 = t2 */\ 172cabdff1aSopenharmony_ci "ldc1 $f14, 1*8(%3) \n\t" \ 173cabdff1aSopenharmony_ci "sdc1 $f10, 3*16+"#A2" \n\t" /* save b0 */\ 174cabdff1aSopenharmony_ci "paddsh $f2, $f8, $f4 \n\t" /* t1+t2 */\ 175cabdff1aSopenharmony_ci "sdc1 $f12, 5*16+"#A2" \n\t" /* save b3 */\ 176cabdff1aSopenharmony_ci "psubsh $f8, $f8, $f4 \n\t" /* t1-t2 */\ 177cabdff1aSopenharmony_ci "ldc1 $f10, 2*16+"#A1" \n\t" \ 178cabdff1aSopenharmony_ci "ldc1 $f12, 6*16+"#A1" \n\t" \ 179cabdff1aSopenharmony_ci "pmulhh $f0, $f14, $f10 \n\t" /* x2*tg_2_16 */\ 180cabdff1aSopenharmony_ci "pmulhh $f14, $f14, $f12 \n\t" /* x6*tg_2_16 */\ 181cabdff1aSopenharmony_ci "pmulhh $f2, $f2, $f6 \n\t" /* ocos_4_16*(t1+t2) = b1/2 */\ 182cabdff1aSopenharmony_ci "ldc1 $f4, 0*16+"#A1" \n\t" \ 183cabdff1aSopenharmony_ci "pmulhh $f8, $f8, $f6 \n\t" /* ocos_4_16*(t1-t2) = b2/2 */\ 184cabdff1aSopenharmony_ci "psubsh $f0, $f0, $f12 \n\t" /* t2*tg_2_16-x6 = tm26 */\ 185cabdff1aSopenharmony_ci "ldc1 $f12, 4*16+"#A1" \n\t" \ 186cabdff1aSopenharmony_ci "paddsh $f14, $f14, $f10 \n\t" /* x2+x6*tg_2_16 = tp26 */\ 187cabdff1aSopenharmony_ci "psubsh $f6, $f4, $f12 \n\t" /* x0-x4 = tm04 */\ 188cabdff1aSopenharmony_ci "paddsh $f4, $f4, $f12 \n\t" /* x0+x4 = tp04 */\ 189cabdff1aSopenharmony_ci "paddsh $f10, $f4, $f14 \n\t" /* tp04+tp26 = a0 */\ 190cabdff1aSopenharmony_ci "psubsh $f12, $f6, $f0 \n\t" /* tm04-tm26 = a2 */\ 191cabdff1aSopenharmony_ci "psubsh $f4, $f4, $f14 \n\t" /* tp04-tp26 = a3 */\ 192cabdff1aSopenharmony_ci "paddsh $f6, $f6, $f0 \n\t" /* tm04+tm26 = a1 */\ 193cabdff1aSopenharmony_ci "paddsh $f2, $f2, $f2 \n\t" /* b1 */\ 194cabdff1aSopenharmony_ci "paddsh $f8, $f8, $f8 \n\t" /* b2 */\ 195cabdff1aSopenharmony_ci "psubsh $f14, $f6, $f2 \n\t" /* a1-b1 */\ 196cabdff1aSopenharmony_ci "dli $10, 6 \n\t" \ 197cabdff1aSopenharmony_ci "paddsh $f6, $f6, $f2 \n\t" /* a1+b1 */\ 198cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" \ 199cabdff1aSopenharmony_ci "psubsh $f0, $f12, $f8 \n\t" /* a2-b2 */\ 200cabdff1aSopenharmony_ci "paddsh $f12, $f12, $f8 \n\t" /* a2+b2 */\ 201cabdff1aSopenharmony_ci "psrah $f6, $f6, $f16 \n\t" /* dst1 */\ 202cabdff1aSopenharmony_ci "psrah $f12, $f12, $f16 \n\t" /* dst2 */\ 203cabdff1aSopenharmony_ci "ldc1 $f2, 3*16+"#A2" \n\t" /* load b0 */\ 204cabdff1aSopenharmony_ci "psrah $f14, $f14, $f16 \n\t" /* dst6 */\ 205cabdff1aSopenharmony_ci "psrah $f0, $f0, $f16 \n\t" /* dst5 */\ 206cabdff1aSopenharmony_ci "sdc1 $f6, 1*16+"#A2" \n\t" \ 207cabdff1aSopenharmony_ci "psubsh $f8, $f10, $f2 \n\t" /* a0-b0 */\ 208cabdff1aSopenharmony_ci "paddsh $f10, $f10, $f2 \n\t" /* a0+b0 */\ 209cabdff1aSopenharmony_ci "sdc1 $f12, 2*16+"#A2" \n\t" \ 210cabdff1aSopenharmony_ci "ldc1 $f6, 5*16+"#A2" \n\t" /* load b3 */\ 211cabdff1aSopenharmony_ci "psrah $f10, $f10, $f16 \n\t" /* dst0 */\ 212cabdff1aSopenharmony_ci "psrah $f8, $f8, $f16 \n\t" /* dst7 */\ 213cabdff1aSopenharmony_ci "sdc1 $f0, 5*16+"#A2" \n\t" \ 214cabdff1aSopenharmony_ci "psubsh $f12, $f4, $f6 \n\t" /* a3-b3 */\ 215cabdff1aSopenharmony_ci "paddsh $f4, $f4, $f6 \n\t" /* a3+b3 */\ 216cabdff1aSopenharmony_ci "sdc1 $f14, 6*16+"#A2" \n\t" \ 217cabdff1aSopenharmony_ci "sdc1 $f10, 0*16+"#A2" \n\t" \ 218cabdff1aSopenharmony_ci "psrah $f4, $f4, $f16 \n\t" /* dst3 */\ 219cabdff1aSopenharmony_ci "sdc1 $f8, 7*16+"#A2" \n\t" \ 220cabdff1aSopenharmony_ci "psrah $f12, $f12, $f16 \n\t" /* dst4 */\ 221cabdff1aSopenharmony_ci "sdc1 $f4, 3*16+"#A2" \n\t" \ 222cabdff1aSopenharmony_ci "sdc1 $f12, 4*16+"#A2" \n\t" \ 223cabdff1aSopenharmony_ci 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_civoid ff_xvid_idct_mmi(int16_t *block) 226cabdff1aSopenharmony_ci{ 227cabdff1aSopenharmony_ci __asm__ volatile ( 228cabdff1aSopenharmony_ci //# Process each row 229cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) 230cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) 231cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1)) 232cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1)) 233cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1)) 234cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1)) 235cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1)) 236cabdff1aSopenharmony_ci DCT_8_INV_ROW_MMI(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1)) 237cabdff1aSopenharmony_ci //# Process the columns (4 at a time) 238cabdff1aSopenharmony_ci DCT_8_INV_COL(0(%0), 0(%0)) 239cabdff1aSopenharmony_ci DCT_8_INV_COL(8(%0), 8(%0)) 240cabdff1aSopenharmony_ci ::"r"(block),"r"(rounder_0),"r"(tab_i_04_mmi),"r"(tg_1_16) 241cabdff1aSopenharmony_ci : "$10" 242cabdff1aSopenharmony_ci ); 243cabdff1aSopenharmony_ci} 244cabdff1aSopenharmony_ci 245cabdff1aSopenharmony_civoid ff_xvid_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 246cabdff1aSopenharmony_ci{ 247cabdff1aSopenharmony_ci ff_xvid_idct_mmi(block); 248cabdff1aSopenharmony_ci ff_put_pixels_clamped_mmi(block, dest, line_size); 249cabdff1aSopenharmony_ci} 250cabdff1aSopenharmony_ci 251cabdff1aSopenharmony_civoid ff_xvid_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 252cabdff1aSopenharmony_ci{ 253cabdff1aSopenharmony_ci ff_xvid_idct_mmi(block); 254cabdff1aSopenharmony_ci ff_add_pixels_clamped_mmi(block, dest, line_size); 255cabdff1aSopenharmony_ci} 256