1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Loongson SIMD optimized simple idct 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2015 Loongson Technology Corporation Limited 5cabdff1aSopenharmony_ci * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6cabdff1aSopenharmony_ci * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * This file is part of FFmpeg. 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 14cabdff1aSopenharmony_ci * 15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18cabdff1aSopenharmony_ci * Lesser General Public License for more details. 19cabdff1aSopenharmony_ci * 20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23cabdff1aSopenharmony_ci */ 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "idctdsp_mips.h" 26cabdff1aSopenharmony_ci#include "constants.h" 27cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h" 28cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 29cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 32cabdff1aSopenharmony_ci#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 33cabdff1aSopenharmony_ci#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 34cabdff1aSopenharmony_ci#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 35cabdff1aSopenharmony_ci#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 36cabdff1aSopenharmony_ci#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 37cabdff1aSopenharmony_ci#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci#define ROW_SHIFT 11 40cabdff1aSopenharmony_ci#define COL_SHIFT 20 41cabdff1aSopenharmony_ci#define DC_SHIFT 3 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_ciDECLARE_ALIGNED(16, const int16_t, W_arr)[46] = { 44cabdff1aSopenharmony_ci W4, W2, W4, W6, 45cabdff1aSopenharmony_ci W1, W3, W5, W7, 46cabdff1aSopenharmony_ci W4, W6, -W4, -W2, 47cabdff1aSopenharmony_ci W3, -W7, -W1, -W5, 48cabdff1aSopenharmony_ci W4, -W6, -W4, W2, 49cabdff1aSopenharmony_ci W5, -W1, W7, W3, 50cabdff1aSopenharmony_ci W4, -W2, W4, -W6, 51cabdff1aSopenharmony_ci W7, -W5, W3, -W1, 52cabdff1aSopenharmony_ci 1024, 0, 1024, 0, //ff_p32_1024 = 0x0000040000000400ULL 53cabdff1aSopenharmony_ci 0, -1, -1, -1, //mask = 0xffffffffffff0000ULL 54cabdff1aSopenharmony_ci 32, 32, 32, 32 //ff_p16_32 = 0x0020002000200020ULL 55cabdff1aSopenharmony_ci}; 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_civoid ff_simple_idct_8_mmi(int16_t *block) 58cabdff1aSopenharmony_ci{ 59cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci BACKUP_REG 62cabdff1aSopenharmony_ci __asm__ volatile ( 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci#define IDCT_ROW_COND_DC(src1, src2) \ 65cabdff1aSopenharmony_ci "dmfc1 $11, "#src1" \n\t" \ 66cabdff1aSopenharmony_ci "dmfc1 $12, "#src2" \n\t" \ 67cabdff1aSopenharmony_ci "and $11, $11, $9 \n\t" \ 68cabdff1aSopenharmony_ci "or $10, $11, $12 \n\t" \ 69cabdff1aSopenharmony_ci "beqz $10, 1f \n\t" \ 70cabdff1aSopenharmony_ci \ 71cabdff1aSopenharmony_ci "punpcklhw $f30, "#src1", "#src2" \n\t" \ 72cabdff1aSopenharmony_ci "punpckhhw $f31, "#src1", "#src2" \n\t" \ 73cabdff1aSopenharmony_ci /* s6, s4, s2, s0 */ \ 74cabdff1aSopenharmony_ci "punpcklhw "#src1", $f30, $f31 \n\t" \ 75cabdff1aSopenharmony_ci /* s7, s5, s3, s1 */ \ 76cabdff1aSopenharmony_ci "punpckhhw "#src2", $f30, $f31 \n\t" \ 77cabdff1aSopenharmony_ci \ 78cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f18 \n\t" \ 79cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f19 \n\t" \ 80cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 81cabdff1aSopenharmony_ci "psubw $f29, $f30, $f31 \n\t" \ 82cabdff1aSopenharmony_ci "punpcklwd $f30, $f28, $f29 \n\t" \ 83cabdff1aSopenharmony_ci "punpckhwd $f31, $f28, $f29 \n\t" \ 84cabdff1aSopenharmony_ci "paddw $f26, $f30, $f31 \n\t" \ 85cabdff1aSopenharmony_ci "paddw $f26, $f26, $f16 \n\t" \ 86cabdff1aSopenharmony_ci /* $f26: src[7], src[0] */ \ 87cabdff1aSopenharmony_ci "psraw $f26, $f26, $f17 \n\t" \ 88cabdff1aSopenharmony_ci \ 89cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f20 \n\t" \ 90cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f21 \n\t" \ 91cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 92cabdff1aSopenharmony_ci "psubw $f29, $f30, $f31 \n\t" \ 93cabdff1aSopenharmony_ci "punpcklwd $f30, $f28, $f29 \n\t" \ 94cabdff1aSopenharmony_ci "punpckhwd $f31, $f28, $f29 \n\t" \ 95cabdff1aSopenharmony_ci "paddw $f27, $f30, $f31 \n\t" \ 96cabdff1aSopenharmony_ci "paddw $f27, $f27, $f16 \n\t" \ 97cabdff1aSopenharmony_ci /* $f27: src[6], src[1] */ \ 98cabdff1aSopenharmony_ci "psraw $f27, $f27, $f17 \n\t" \ 99cabdff1aSopenharmony_ci \ 100cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f22 \n\t" \ 101cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f23 \n\t" \ 102cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 103cabdff1aSopenharmony_ci "psubw $f29, $f30, $f31 \n\t" \ 104cabdff1aSopenharmony_ci "punpcklwd $f30, $f28, $f29 \n\t" \ 105cabdff1aSopenharmony_ci "punpckhwd $f31, $f28, $f29 \n\t" \ 106cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 107cabdff1aSopenharmony_ci "paddw $f28, $f28, $f16 \n\t" \ 108cabdff1aSopenharmony_ci /* $f28: src[5], src[2] */ \ 109cabdff1aSopenharmony_ci "psraw $f28, $f28, $f17 \n\t" \ 110cabdff1aSopenharmony_ci \ 111cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f24 \n\t" \ 112cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f25 \n\t" \ 113cabdff1aSopenharmony_ci "paddw "#src1", $f30, $f31 \n\t" \ 114cabdff1aSopenharmony_ci "psubw "#src2", $f30, $f31 \n\t" \ 115cabdff1aSopenharmony_ci "punpcklwd $f30, "#src1", "#src2" \n\t" \ 116cabdff1aSopenharmony_ci "punpckhwd $f31, "#src1", "#src2" \n\t" \ 117cabdff1aSopenharmony_ci "paddw $f29, $f30, $f31 \n\t" \ 118cabdff1aSopenharmony_ci "paddw $f29, $f29, $f16 \n\t" \ 119cabdff1aSopenharmony_ci /* $f29: src[4], src[3] */ \ 120cabdff1aSopenharmony_ci "psraw $f29, $f29, $f17 \n\t" \ 121cabdff1aSopenharmony_ci \ 122cabdff1aSopenharmony_ci "punpcklhw "#src1", $f26, $f27 \n\t" \ 123cabdff1aSopenharmony_ci "punpckhhw $f30, $f27, $f26 \n\t" \ 124cabdff1aSopenharmony_ci "punpcklhw $f31, $f28, $f29 \n\t" \ 125cabdff1aSopenharmony_ci "punpckhhw "#src2", $f29, $f28 \n\t" \ 126cabdff1aSopenharmony_ci /* src[3], src[2], src[1], src[0] */ \ 127cabdff1aSopenharmony_ci "punpcklwd "#src1", "#src1", $f31 \n\t" \ 128cabdff1aSopenharmony_ci /* src[7], src[6], src[5], src[4] */ \ 129cabdff1aSopenharmony_ci "punpcklwd "#src2", "#src2", $f30 \n\t" \ 130cabdff1aSopenharmony_ci "j 2f \n\t" \ 131cabdff1aSopenharmony_ci \ 132cabdff1aSopenharmony_ci "1: \n\t" \ 133cabdff1aSopenharmony_ci "li $10, 3 \n\t" \ 134cabdff1aSopenharmony_ci "dmtc1 $10, $f30 \n\t" \ 135cabdff1aSopenharmony_ci "psllh $f28, "#src1", $f30 \n\t" \ 136cabdff1aSopenharmony_ci "dmtc1 $9, $f31 \n\t" \ 137cabdff1aSopenharmony_ci "punpcklhw $f29, $f28, $f28 \n\t" \ 138cabdff1aSopenharmony_ci "pand $f29, $f29, $f31 \n\t" \ 139cabdff1aSopenharmony_ci "paddw $f28, $f28, $f29 \n\t" \ 140cabdff1aSopenharmony_ci "punpcklwd "#src1", $f28, $f28 \n\t" \ 141cabdff1aSopenharmony_ci "punpcklwd "#src2", $f28, $f28 \n\t" \ 142cabdff1aSopenharmony_ci "2: \n\t" \ 143cabdff1aSopenharmony_ci 144cabdff1aSopenharmony_ci /* idctRowCondDC row0~8 */ 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci /* load W */ 147cabdff1aSopenharmony_ci MMI_LQC1($f19, $f18, %[w_arr], 0x00) 148cabdff1aSopenharmony_ci MMI_LQC1($f21, $f20, %[w_arr], 0x10) 149cabdff1aSopenharmony_ci MMI_LQC1($f23, $f22, %[w_arr], 0x20) 150cabdff1aSopenharmony_ci MMI_LQC1($f25, $f24, %[w_arr], 0x30) 151cabdff1aSopenharmony_ci MMI_LQC1($f17, $f16, %[w_arr], 0x40) 152cabdff1aSopenharmony_ci /* load source in block */ 153cabdff1aSopenharmony_ci MMI_LQC1($f1, $f0, %[block], 0x00) 154cabdff1aSopenharmony_ci MMI_LQC1($f3, $f2, %[block], 0x10) 155cabdff1aSopenharmony_ci MMI_LQC1($f5, $f4, %[block], 0x20) 156cabdff1aSopenharmony_ci MMI_LQC1($f7, $f6, %[block], 0x30) 157cabdff1aSopenharmony_ci MMI_LQC1($f9, $f8, %[block], 0x40) 158cabdff1aSopenharmony_ci MMI_LQC1($f11, $f10, %[block], 0x50) 159cabdff1aSopenharmony_ci MMI_LQC1($f13, $f12, %[block], 0x60) 160cabdff1aSopenharmony_ci MMI_LQC1($f15, $f14, %[block], 0x70) 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci /* $9: mask ; $f17: ROW_SHIFT */ 163cabdff1aSopenharmony_ci "dmfc1 $9, $f17 \n\t" 164cabdff1aSopenharmony_ci "li $10, 11 \n\t" 165cabdff1aSopenharmony_ci "mtc1 $10, $f17 \n\t" 166cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f0,$f1) 167cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f2,$f3) 168cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f4,$f5) 169cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f6,$f7) 170cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f8,$f9) 171cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f10,$f11) 172cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f12,$f13) 173cabdff1aSopenharmony_ci IDCT_ROW_COND_DC($f14,$f15) 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci#define IDCT_COL_CASE1(src, out1, out2) \ 176cabdff1aSopenharmony_ci "pmaddhw $f26, "#src", $f18 \n\t" \ 177cabdff1aSopenharmony_ci "pmaddhw $f27, "#src", $f20 \n\t" \ 178cabdff1aSopenharmony_ci "pmaddhw $f28, "#src", $f22 \n\t" \ 179cabdff1aSopenharmony_ci "pmaddhw $f29, "#src", $f24 \n\t" \ 180cabdff1aSopenharmony_ci \ 181cabdff1aSopenharmony_ci "punpcklwd $f30, $f26, $f26 \n\t" \ 182cabdff1aSopenharmony_ci "punpckhwd $f31, $f26, $f26 \n\t" \ 183cabdff1aSopenharmony_ci /* $f26: src[0], src[56] */ \ 184cabdff1aSopenharmony_ci "paddw $f26, $f30, $f31 \n\t" \ 185cabdff1aSopenharmony_ci "punpcklwd $f30, $f27, $f27 \n\t" \ 186cabdff1aSopenharmony_ci "punpckhwd $f31, $f27, $f27 \n\t" \ 187cabdff1aSopenharmony_ci /* $f27: src[8], src[48] */ \ 188cabdff1aSopenharmony_ci "paddw $f27, $f30, $f31 \n\t" \ 189cabdff1aSopenharmony_ci "punpcklwd $f30, $f28, $f28 \n\t" \ 190cabdff1aSopenharmony_ci "punpckhwd $f31, $f28, $f28 \n\t" \ 191cabdff1aSopenharmony_ci /* $f28: src[16], src[40] */ \ 192cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 193cabdff1aSopenharmony_ci "punpcklwd $f30, $f29, $f29 \n\t" \ 194cabdff1aSopenharmony_ci "punpckhwd $f31, $f29, $f29 \n\t" \ 195cabdff1aSopenharmony_ci /* $f29: src[24], src[32] */ \ 196cabdff1aSopenharmony_ci "paddw $f29, $f30, $f31 \n\t" \ 197cabdff1aSopenharmony_ci \ 198cabdff1aSopenharmony_ci /* out1: src[24], src[16], src[8], src[0] */ \ 199cabdff1aSopenharmony_ci /* out2: src[56], src[48], src[40], src[32] */ \ 200cabdff1aSopenharmony_ci "punpcklhw $f30, $f26, $f27 \n\t" \ 201cabdff1aSopenharmony_ci "punpcklhw $f31, $f28, $f29 \n\t" \ 202cabdff1aSopenharmony_ci "punpckhwd "#out1", $f30, $f31 \n\t" \ 203cabdff1aSopenharmony_ci "psrah "#out1", "#out1", $f16 \n\t" \ 204cabdff1aSopenharmony_ci "punpcklhw $f30, $f27, $f26 \n\t" \ 205cabdff1aSopenharmony_ci "punpcklhw $f31, $f29, $f28 \n\t" \ 206cabdff1aSopenharmony_ci "punpckhwd "#out2", $f31, $f30 \n\t" \ 207cabdff1aSopenharmony_ci "psrah "#out2", "#out2", $f16 \n\t" 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_ci#define IDCT_COL_CASE2(src1, src2, out1, out2) \ 210cabdff1aSopenharmony_ci "pmaddhw $f28, "#src1", $f18 \n\t" \ 211cabdff1aSopenharmony_ci "pmaddhw $f29, "#src2", $f19 \n\t" \ 212cabdff1aSopenharmony_ci "paddw $f30, $f28, $f29 \n\t" \ 213cabdff1aSopenharmony_ci "psubw $f31, $f28, $f29 \n\t" \ 214cabdff1aSopenharmony_ci "punpcklwd $f28, $f30, $f31 \n\t" \ 215cabdff1aSopenharmony_ci "punpckhwd $f29, $f30, $f31 \n\t" \ 216cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f20 \n\t" \ 217cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f21 \n\t" \ 218cabdff1aSopenharmony_ci /* $f26: src[0], src[56] */ \ 219cabdff1aSopenharmony_ci "paddw $f26, $f28, $f29 \n\t" \ 220cabdff1aSopenharmony_ci "paddw $f28, $f30, $f31 \n\t" \ 221cabdff1aSopenharmony_ci "psubw $f29, $f30, $f31 \n\t" \ 222cabdff1aSopenharmony_ci "punpcklwd $f30, $f28, $f29 \n\t" \ 223cabdff1aSopenharmony_ci "punpckhwd $f31, $f28, $f29 \n\t" \ 224cabdff1aSopenharmony_ci "pmaddhw $f28, "#src1", $f22 \n\t" \ 225cabdff1aSopenharmony_ci "pmaddhw $f29, "#src2", $f23 \n\t" \ 226cabdff1aSopenharmony_ci /* $f27: src[8], src[48] */ \ 227cabdff1aSopenharmony_ci "paddw $f27, $f30, $f31 \n\t" \ 228cabdff1aSopenharmony_ci "paddw $f30, $f28, $f29 \n\t" \ 229cabdff1aSopenharmony_ci "psubw $f31, $f28, $f29 \n\t" \ 230cabdff1aSopenharmony_ci "punpcklwd $f28, $f30, $f31 \n\t" \ 231cabdff1aSopenharmony_ci "punpckhwd $f29, $f30, $f31 \n\t" \ 232cabdff1aSopenharmony_ci "pmaddhw $f30, "#src1", $f24 \n\t" \ 233cabdff1aSopenharmony_ci "pmaddhw $f31, "#src2", $f25 \n\t" \ 234cabdff1aSopenharmony_ci /* $f28: src[16], src[40] */ \ 235cabdff1aSopenharmony_ci "paddw $f28, $f28, $f29 \n\t" \ 236cabdff1aSopenharmony_ci "paddw "#out1", $f30, $f31 \n\t" \ 237cabdff1aSopenharmony_ci "psubw "#out2", $f30, $f31 \n\t" \ 238cabdff1aSopenharmony_ci "punpcklwd $f30, "#out1", "#out2" \n\t" \ 239cabdff1aSopenharmony_ci "punpckhwd $f31, "#out1", "#out2" \n\t" \ 240cabdff1aSopenharmony_ci /* $f29: src[24], src[32] */ \ 241cabdff1aSopenharmony_ci "paddw $f29, $f30, $f31 \n\t" \ 242cabdff1aSopenharmony_ci \ 243cabdff1aSopenharmony_ci /* out1: src[24], src[16], src[8], src[0] */ \ 244cabdff1aSopenharmony_ci /* out2: src[56], src[48], src[40], src[32] */ \ 245cabdff1aSopenharmony_ci "punpcklhw "#out1", $f26, $f27 \n\t" \ 246cabdff1aSopenharmony_ci "punpckhhw "#out2", $f27, $f26 \n\t" \ 247cabdff1aSopenharmony_ci "punpcklhw $f30, $f28, $f29 \n\t" \ 248cabdff1aSopenharmony_ci "punpckhhw $f31, $f29, $f28 \n\t" \ 249cabdff1aSopenharmony_ci "punpckhwd "#out1", "#out1", $f30 \n\t" \ 250cabdff1aSopenharmony_ci "punpckhwd "#out2", $f31, "#out2" \n\t" \ 251cabdff1aSopenharmony_ci "psrah "#out1", "#out1", $f16 \n\t" \ 252cabdff1aSopenharmony_ci "psrah "#out2", "#out2", $f16 \n\t" 253cabdff1aSopenharmony_ci 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci /* idctSparseCol col0~3 */ 256cabdff1aSopenharmony_ci 257cabdff1aSopenharmony_ci /* $f17: ff_p16_32; $f16: COL_SHIFT-16 */ 258cabdff1aSopenharmony_ci MMI_ULDC1($f17, %[w_arr], 0x50) 259cabdff1aSopenharmony_ci "li $10, 4 \n\t" 260cabdff1aSopenharmony_ci "dmtc1 $10, $f16 \n\t" 261cabdff1aSopenharmony_ci "paddh $f0, $f0, $f17 \n\t" 262cabdff1aSopenharmony_ci /* Transpose row[0,2,4,6] */ 263cabdff1aSopenharmony_ci "punpcklhw $f26, $f0, $f4 \n\t" 264cabdff1aSopenharmony_ci "punpckhhw $f27, $f0, $f4 \n\t" 265cabdff1aSopenharmony_ci "punpcklhw $f28, $f8, $f12 \n\t" 266cabdff1aSopenharmony_ci "punpckhhw $f29, $f8, $f12 \n\t" 267cabdff1aSopenharmony_ci "punpcklwd $f0, $f26, $f28 \n\t" 268cabdff1aSopenharmony_ci "punpckhwd $f4, $f26, $f28 \n\t" 269cabdff1aSopenharmony_ci "punpcklwd $f8, $f27, $f29 \n\t" 270cabdff1aSopenharmony_ci "punpckhwd $f12, $f27, $f29 \n\t" 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci "por $f26, $f2, $f6 \n\t" 273cabdff1aSopenharmony_ci "por $f26, $f26, $f10 \n\t" 274cabdff1aSopenharmony_ci "por $f26, $f26, $f14 \n\t" 275cabdff1aSopenharmony_ci "dmfc1 $10, $f26 \n\t" 276cabdff1aSopenharmony_ci "bnez $10, 1f \n\t" 277cabdff1aSopenharmony_ci /* case1: In this case, row[1,3,5,7] are all zero */ 278cabdff1aSopenharmony_ci /* col0: $f0: col[24,16,8,0]; $f2: col[56,48,40,32] */ 279cabdff1aSopenharmony_ci IDCT_COL_CASE1($f0, $f0, $f2) 280cabdff1aSopenharmony_ci /* col1: $f4: col[25,17,9,1]; $f6: col[57,49,41,33] */ 281cabdff1aSopenharmony_ci IDCT_COL_CASE1($f4, $f4, $f6) 282cabdff1aSopenharmony_ci /* col2: $f8: col[26,18,10,2]; $f10: col[58,50,42,34] */ 283cabdff1aSopenharmony_ci IDCT_COL_CASE1($f8, $f8, $f10) 284cabdff1aSopenharmony_ci /* col3: $f12: col[27,19,11,3]; $f14: col[59,51,43,35] */ 285cabdff1aSopenharmony_ci IDCT_COL_CASE1($f12, $f12, $f14) 286cabdff1aSopenharmony_ci "j 2f \n\t" 287cabdff1aSopenharmony_ci 288cabdff1aSopenharmony_ci "1: \n\t" 289cabdff1aSopenharmony_ci /* case2: row[1,3,5,7] are not all zero */ 290cabdff1aSopenharmony_ci /* Transpose */ 291cabdff1aSopenharmony_ci "punpcklhw $f26, $f2, $f6 \n\t" 292cabdff1aSopenharmony_ci "punpckhhw $f27, $f2, $f6 \n\t" 293cabdff1aSopenharmony_ci "punpcklhw $f28, $f10, $f14 \n\t" 294cabdff1aSopenharmony_ci "punpckhhw $f29, $f10, $f14 \n\t" 295cabdff1aSopenharmony_ci "punpcklwd $f2, $f26, $f28 \n\t" 296cabdff1aSopenharmony_ci "punpckhwd $f6, $f26, $f28 \n\t" 297cabdff1aSopenharmony_ci "punpcklwd $f10, $f27, $f29 \n\t" 298cabdff1aSopenharmony_ci "punpckhwd $f14, $f27, $f29 \n\t" 299cabdff1aSopenharmony_ci 300cabdff1aSopenharmony_ci /* col0: $f0: col[24,16,8,0]; $f2: col[56,48,40,32] */ 301cabdff1aSopenharmony_ci IDCT_COL_CASE2($f0, $f2, $f0, $f2) 302cabdff1aSopenharmony_ci /* col1: $f4: col[25,17,9,1]; $f6: col[57,49,41,33] */ 303cabdff1aSopenharmony_ci IDCT_COL_CASE2($f4, $f6, $f4, $f6) 304cabdff1aSopenharmony_ci /* col2: $f8: col[26,18,10,2]; $f10: col[58,50,42,34] */ 305cabdff1aSopenharmony_ci IDCT_COL_CASE2($f8, $f10, $f8, $f10) 306cabdff1aSopenharmony_ci /* col3: $f12: col[27,19,11,3]; $f14: col[59,51,43,35] */ 307cabdff1aSopenharmony_ci IDCT_COL_CASE2($f12, $f14, $f12, $f14) 308cabdff1aSopenharmony_ci 309cabdff1aSopenharmony_ci "2: \n\t" 310cabdff1aSopenharmony_ci /* Transpose */ 311cabdff1aSopenharmony_ci "punpcklhw $f26, $f0, $f4 \n\t" 312cabdff1aSopenharmony_ci "punpckhhw $f27, $f0, $f4 \n\t" 313cabdff1aSopenharmony_ci "punpcklhw $f28, $f8, $f12 \n\t" 314cabdff1aSopenharmony_ci "punpckhhw $f29, $f8, $f12 \n\t" 315cabdff1aSopenharmony_ci "punpcklwd $f0, $f26, $f28 \n\t" 316cabdff1aSopenharmony_ci "punpckhwd $f4, $f26, $f28 \n\t" 317cabdff1aSopenharmony_ci "punpcklwd $f8, $f27, $f29 \n\t" 318cabdff1aSopenharmony_ci "punpckhwd $f12, $f27, $f29 \n\t" 319cabdff1aSopenharmony_ci /* Transpose */ 320cabdff1aSopenharmony_ci "punpcklhw $f26, $f2, $f6 \n\t" 321cabdff1aSopenharmony_ci "punpckhhw $f27, $f2, $f6 \n\t" 322cabdff1aSopenharmony_ci "punpcklhw $f28, $f10, $f14 \n\t" 323cabdff1aSopenharmony_ci "punpckhhw $f29, $f10, $f14 \n\t" 324cabdff1aSopenharmony_ci "punpcklwd $f2, $f26, $f28 \n\t" 325cabdff1aSopenharmony_ci "punpckhwd $f6, $f26, $f28 \n\t" 326cabdff1aSopenharmony_ci "punpcklwd $f10, $f27, $f29 \n\t" 327cabdff1aSopenharmony_ci "punpckhwd $f14, $f27, $f29 \n\t" 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_ci /* idctSparseCol col4~7 */ 330cabdff1aSopenharmony_ci 331cabdff1aSopenharmony_ci "paddh $f1, $f1, $f17 \n\t" 332cabdff1aSopenharmony_ci /* Transpose */ 333cabdff1aSopenharmony_ci "punpcklhw $f26, $f1, $f5 \n\t" 334cabdff1aSopenharmony_ci "punpckhhw $f27, $f1, $f5 \n\t" 335cabdff1aSopenharmony_ci "punpcklhw $f28, $f9, $f13 \n\t" 336cabdff1aSopenharmony_ci "punpckhhw $f29, $f9, $f13 \n\t" 337cabdff1aSopenharmony_ci "punpcklwd $f1, $f26, $f28 \n\t" 338cabdff1aSopenharmony_ci "punpckhwd $f5, $f26, $f28 \n\t" 339cabdff1aSopenharmony_ci "punpcklwd $f9, $f27, $f29 \n\t" 340cabdff1aSopenharmony_ci "punpckhwd $f13, $f27, $f29 \n\t" 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci "por $f26, $f3, $f7 \n\t" 343cabdff1aSopenharmony_ci "por $f26, $f26, $f11 \n\t" 344cabdff1aSopenharmony_ci "por $f26, $f26, $f15 \n\t" 345cabdff1aSopenharmony_ci "dmfc1 $10, $f26 \n\t" 346cabdff1aSopenharmony_ci "bnez $10, 1f \n\t" 347cabdff1aSopenharmony_ci /* case1: In this case, row[1,3,5,7] are all zero */ 348cabdff1aSopenharmony_ci /* col4: $f1: col[24,16,8,0]; $f3: col[56,48,40,32] */ 349cabdff1aSopenharmony_ci IDCT_COL_CASE1($f1, $f1, $f3) 350cabdff1aSopenharmony_ci /* col5: $f5: col[25,17,9,1]; $f7: col[57,49,41,33] */ 351cabdff1aSopenharmony_ci IDCT_COL_CASE1($f5, $f5, $f7) 352cabdff1aSopenharmony_ci /* col6: $f9: col[26,18,10,2]; $f11: col[58,50,42,34] */ 353cabdff1aSopenharmony_ci IDCT_COL_CASE1($f9, $f9, $f11) 354cabdff1aSopenharmony_ci /* col7: $f13: col[27,19,11,3]; $f15: col[59,51,43,35] */ 355cabdff1aSopenharmony_ci IDCT_COL_CASE1($f13, $f13, $f15) 356cabdff1aSopenharmony_ci "j 2f \n\t" 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ci "1: \n\t" 359cabdff1aSopenharmony_ci /* case2: row[1,3,5,7] are not all zero */ 360cabdff1aSopenharmony_ci /* Transpose */ 361cabdff1aSopenharmony_ci "punpcklhw $f26, $f3, $f7 \n\t" 362cabdff1aSopenharmony_ci "punpckhhw $f27, $f3, $f7 \n\t" 363cabdff1aSopenharmony_ci "punpcklhw $f28, $f11, $f15 \n\t" 364cabdff1aSopenharmony_ci "punpckhhw $f29, $f11, $f15 \n\t" 365cabdff1aSopenharmony_ci "punpcklwd $f3, $f26, $f28 \n\t" 366cabdff1aSopenharmony_ci "punpckhwd $f7, $f26, $f28 \n\t" 367cabdff1aSopenharmony_ci "punpcklwd $f11, $f27, $f29 \n\t" 368cabdff1aSopenharmony_ci "punpckhwd $f15, $f27, $f29 \n\t" 369cabdff1aSopenharmony_ci 370cabdff1aSopenharmony_ci /* col4: $f1: col[24,16,8,0]; $f3: col[56,48,40,32] */ 371cabdff1aSopenharmony_ci IDCT_COL_CASE2($f1, $f3, $f1, $f3) 372cabdff1aSopenharmony_ci /* col5: $f5: col[25,17,9,1]; $f7: col[57,49,41,33] */ 373cabdff1aSopenharmony_ci IDCT_COL_CASE2($f5, $f7, $f5, $f7) 374cabdff1aSopenharmony_ci /* col6: $f9: col[26,18,10,2]; $f11: col[58,50,42,34] */ 375cabdff1aSopenharmony_ci IDCT_COL_CASE2($f9, $f11, $f9, $f11) 376cabdff1aSopenharmony_ci /* col7: $f13: col[27,19,11,3]; $f15: col[59,51,43,35] */ 377cabdff1aSopenharmony_ci IDCT_COL_CASE2($f13, $f15, $f13, $f15) 378cabdff1aSopenharmony_ci 379cabdff1aSopenharmony_ci "2: \n\t" 380cabdff1aSopenharmony_ci /* Transpose */ 381cabdff1aSopenharmony_ci "punpcklhw $f26, $f1, $f5 \n\t" 382cabdff1aSopenharmony_ci "punpckhhw $f27, $f1, $f5 \n\t" 383cabdff1aSopenharmony_ci "punpcklhw $f28, $f9, $f13 \n\t" 384cabdff1aSopenharmony_ci "punpckhhw $f29, $f9, $f13 \n\t" 385cabdff1aSopenharmony_ci "punpcklwd $f1, $f26, $f28 \n\t" 386cabdff1aSopenharmony_ci "punpckhwd $f5, $f26, $f28 \n\t" 387cabdff1aSopenharmony_ci "punpcklwd $f9, $f27, $f29 \n\t" 388cabdff1aSopenharmony_ci "punpckhwd $f13, $f27, $f29 \n\t" 389cabdff1aSopenharmony_ci /* Transpose */ 390cabdff1aSopenharmony_ci "punpcklhw $f26, $f3, $f7 \n\t" 391cabdff1aSopenharmony_ci "punpckhhw $f27, $f3, $f7 \n\t" 392cabdff1aSopenharmony_ci "punpcklhw $f28, $f11, $f15 \n\t" 393cabdff1aSopenharmony_ci "punpckhhw $f29, $f11, $f15 \n\t" 394cabdff1aSopenharmony_ci "punpcklwd $f3, $f26, $f28 \n\t" 395cabdff1aSopenharmony_ci "punpckhwd $f7, $f26, $f28 \n\t" 396cabdff1aSopenharmony_ci "punpcklwd $f11, $f27, $f29 \n\t" 397cabdff1aSopenharmony_ci "punpckhwd $f15, $f27, $f29 \n\t" 398cabdff1aSopenharmony_ci /* Store */ 399cabdff1aSopenharmony_ci MMI_SQC1($f1, $f0, %[block], 0x00) 400cabdff1aSopenharmony_ci MMI_SQC1($f5, $f4, %[block], 0x10) 401cabdff1aSopenharmony_ci MMI_SQC1($f9, $f8, %[block], 0x20) 402cabdff1aSopenharmony_ci MMI_SQC1($f13, $f12, %[block], 0x30) 403cabdff1aSopenharmony_ci MMI_SQC1($f3, $f2, %[block], 0x40) 404cabdff1aSopenharmony_ci MMI_SQC1($f7, $f6, %[block], 0x50) 405cabdff1aSopenharmony_ci MMI_SQC1($f11, $f10, %[block], 0x60) 406cabdff1aSopenharmony_ci MMI_SQC1($f15, $f14, %[block], 0x70) 407cabdff1aSopenharmony_ci 408cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 [block]"+&r"(block) 409cabdff1aSopenharmony_ci : [w_arr]"r"(W_arr) 410cabdff1aSopenharmony_ci : "memory" 411cabdff1aSopenharmony_ci ); 412cabdff1aSopenharmony_ci 413cabdff1aSopenharmony_ci RECOVER_REG 414cabdff1aSopenharmony_ci} 415cabdff1aSopenharmony_ci 416cabdff1aSopenharmony_civoid ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 417cabdff1aSopenharmony_ci{ 418cabdff1aSopenharmony_ci ff_simple_idct_8_mmi(block); 419cabdff1aSopenharmony_ci ff_put_pixels_clamped_mmi(block, dest, line_size); 420cabdff1aSopenharmony_ci} 421cabdff1aSopenharmony_civoid ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 422cabdff1aSopenharmony_ci{ 423cabdff1aSopenharmony_ci ff_simple_idct_8_mmi(block); 424cabdff1aSopenharmony_ci ff_add_pixels_clamped_mmi(block, dest, line_size); 425cabdff1aSopenharmony_ci} 426