1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Loongson SIMD optimized vp8dsp 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2016 Loongson Technology Corporation Limited 5cabdff1aSopenharmony_ci * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "vp8dsp_mips.h" 25cabdff1aSopenharmony_ci#include "constants.h" 26cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 27cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 28cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci#define DECLARE_DOUBLE_1 double db_1 31cabdff1aSopenharmony_ci#define DECLARE_DOUBLE_2 double db_2 32cabdff1aSopenharmony_ci#define DECLARE_UINT32_T uint32_t it_1 33cabdff1aSopenharmony_ci#define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1) 34cabdff1aSopenharmony_ci#define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2) 35cabdff1aSopenharmony_ci#define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1) 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci#define MMI_PCMPGTUB(dst, src1, src2) \ 38cabdff1aSopenharmony_ci "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \ 39cabdff1aSopenharmony_ci "pmaxub %[db_2], "#src1", "#src2" \n\t" \ 40cabdff1aSopenharmony_ci "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \ 41cabdff1aSopenharmony_ci "pxor "#dst", %[db_2], %[db_1] \n\t" 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_ci#define MMI_BTOH(dst_l, dst_r, src) \ 44cabdff1aSopenharmony_ci "pxor %[db_1], %[db_1], %[db_1] \n\t" \ 45cabdff1aSopenharmony_ci "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \ 46cabdff1aSopenharmony_ci "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \ 47cabdff1aSopenharmony_ci "punpckhbh "#dst_l", "#src", %[db_2] \n\t" 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci#define MMI_VP8_LOOP_FILTER \ 50cabdff1aSopenharmony_ci /* Calculation of hev */ \ 51cabdff1aSopenharmony_ci "dmtc1 %[thresh], %[ftmp3] \n\t" \ 52cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 53cabdff1aSopenharmony_ci "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 54cabdff1aSopenharmony_ci "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 55cabdff1aSopenharmony_ci "pasubub %[ftmp0], %[p1], %[p0] \n\t" \ 56cabdff1aSopenharmony_ci "pasubub %[ftmp1], %[q1], %[q0] \n\t" \ 57cabdff1aSopenharmony_ci "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \ 58cabdff1aSopenharmony_ci MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \ 59cabdff1aSopenharmony_ci /* Calculation of mask */ \ 60cabdff1aSopenharmony_ci "pasubub %[ftmp1], %[p0], %[q0] \n\t" \ 61cabdff1aSopenharmony_ci "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 62cabdff1aSopenharmony_ci "pasubub %[ftmp2], %[p1], %[q1] \n\t" \ 63cabdff1aSopenharmony_ci "li %[tmp0], 0x09 \n\t" \ 64cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp3] \n\t" \ 65cabdff1aSopenharmony_ci PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \ 66cabdff1aSopenharmony_ci "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 67cabdff1aSopenharmony_ci "dmtc1 %[e], %[ftmp3] \n\t" \ 68cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 69cabdff1aSopenharmony_ci "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 70cabdff1aSopenharmony_ci "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 71cabdff1aSopenharmony_ci MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \ 72cabdff1aSopenharmony_ci "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \ 73cabdff1aSopenharmony_ci "pasubub %[ftmp1], %[p3], %[p2] \n\t" \ 74cabdff1aSopenharmony_ci "pasubub %[ftmp2], %[p2], %[p1] \n\t" \ 75cabdff1aSopenharmony_ci "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 76cabdff1aSopenharmony_ci "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \ 77cabdff1aSopenharmony_ci "pasubub %[ftmp1], %[q3], %[q2] \n\t" \ 78cabdff1aSopenharmony_ci "pasubub %[ftmp2], %[q2], %[q1] \n\t" \ 79cabdff1aSopenharmony_ci "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 80cabdff1aSopenharmony_ci "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \ 81cabdff1aSopenharmony_ci "dmtc1 %[i], %[ftmp3] \n\t" \ 82cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 83cabdff1aSopenharmony_ci "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 84cabdff1aSopenharmony_ci "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 85cabdff1aSopenharmony_ci MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \ 86cabdff1aSopenharmony_ci "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 87cabdff1aSopenharmony_ci "pxor %[mask], %[mask], %[ftmp3] \n\t" \ 88cabdff1aSopenharmony_ci /* VP8_MBFILTER */ \ 89cabdff1aSopenharmony_ci "li %[tmp0], 0x80808080 \n\t" \ 90cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp7] \n\t" \ 91cabdff1aSopenharmony_ci "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \ 92cabdff1aSopenharmony_ci "pxor %[p2], %[p2], %[ftmp7] \n\t" \ 93cabdff1aSopenharmony_ci "pxor %[p1], %[p1], %[ftmp7] \n\t" \ 94cabdff1aSopenharmony_ci "pxor %[p0], %[p0], %[ftmp7] \n\t" \ 95cabdff1aSopenharmony_ci "pxor %[q0], %[q0], %[ftmp7] \n\t" \ 96cabdff1aSopenharmony_ci "pxor %[q1], %[q1], %[ftmp7] \n\t" \ 97cabdff1aSopenharmony_ci "pxor %[q2], %[q2], %[ftmp7] \n\t" \ 98cabdff1aSopenharmony_ci "psubsb %[ftmp4], %[p1], %[q1] \n\t" \ 99cabdff1aSopenharmony_ci "psubb %[ftmp5], %[q0], %[p0] \n\t" \ 100cabdff1aSopenharmony_ci MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \ 101cabdff1aSopenharmony_ci MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \ 102cabdff1aSopenharmony_ci /* Right part */ \ 103cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \ 104cabdff1aSopenharmony_ci "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \ 105cabdff1aSopenharmony_ci "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \ 106cabdff1aSopenharmony_ci /* Left part */ \ 107cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \ 108cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \ 109cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \ 110cabdff1aSopenharmony_ci /* Combine left and right part */ \ 111cabdff1aSopenharmony_ci "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 112cabdff1aSopenharmony_ci "pand %[ftmp1], %[ftmp1], %[mask] \n\t" \ 113cabdff1aSopenharmony_ci "pand %[ftmp2], %[ftmp1], %[hev] \n\t" \ 114cabdff1aSopenharmony_ci "li %[tmp0], 0x04040404 \n\t" \ 115cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 116cabdff1aSopenharmony_ci "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 117cabdff1aSopenharmony_ci "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \ 118cabdff1aSopenharmony_ci "li %[tmp0], 0x0B \n\t" \ 119cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp4] \n\t" \ 120cabdff1aSopenharmony_ci PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \ 121cabdff1aSopenharmony_ci "li %[tmp0], 0x03030303 \n\t" \ 122cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 123cabdff1aSopenharmony_ci "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 124cabdff1aSopenharmony_ci "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \ 125cabdff1aSopenharmony_ci "li %[tmp0], 0x0B \n\t" \ 126cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp2] \n\t" \ 127cabdff1aSopenharmony_ci PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \ 128cabdff1aSopenharmony_ci "psubsb %[q0], %[q0], %[ftmp3] \n\t" \ 129cabdff1aSopenharmony_ci "paddsb %[p0], %[p0], %[ftmp4] \n\t" \ 130cabdff1aSopenharmony_ci /* filt_val &= ~hev */ \ 131cabdff1aSopenharmony_ci "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 132cabdff1aSopenharmony_ci "pxor %[hev], %[hev], %[ftmp0] \n\t" \ 133cabdff1aSopenharmony_ci "pand %[ftmp1], %[ftmp1], %[hev] \n\t" \ 134cabdff1aSopenharmony_ci MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \ 135cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" \ 136cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp2] \n\t" \ 137cabdff1aSopenharmony_ci "li %[tmp0], 0x001b001b \n\t" \ 138cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 139cabdff1aSopenharmony_ci "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 140cabdff1aSopenharmony_ci "li %[tmp0], 0x003f003f \n\t" \ 141cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 142cabdff1aSopenharmony_ci "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 143cabdff1aSopenharmony_ci /* Right part */ \ 144cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 145cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 146cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 147cabdff1aSopenharmony_ci /* Left part */ \ 148cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 149cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 150cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 151cabdff1aSopenharmony_ci /* Combine left and right part */ \ 152cabdff1aSopenharmony_ci "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 153cabdff1aSopenharmony_ci "psubsb %[q0], %[q0], %[ftmp4] \n\t" \ 154cabdff1aSopenharmony_ci "pxor %[q0], %[q0], %[ftmp7] \n\t" \ 155cabdff1aSopenharmony_ci "paddsb %[p0], %[p0], %[ftmp4] \n\t" \ 156cabdff1aSopenharmony_ci "pxor %[p0], %[p0], %[ftmp7] \n\t" \ 157cabdff1aSopenharmony_ci "li %[tmp0], 0x00120012 \n\t" \ 158cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 159cabdff1aSopenharmony_ci "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 160cabdff1aSopenharmony_ci /* Right part */ \ 161cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 162cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 163cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 164cabdff1aSopenharmony_ci /* Left part */ \ 165cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 166cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 167cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 168cabdff1aSopenharmony_ci /* Combine left and right part */ \ 169cabdff1aSopenharmony_ci "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 170cabdff1aSopenharmony_ci "psubsb %[q1], %[q1], %[ftmp4] \n\t" \ 171cabdff1aSopenharmony_ci "pxor %[q1], %[q1], %[ftmp7] \n\t" \ 172cabdff1aSopenharmony_ci "paddsb %[p1], %[p1], %[ftmp4] \n\t" \ 173cabdff1aSopenharmony_ci "pxor %[p1], %[p1], %[ftmp7] \n\t" \ 174cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" \ 175cabdff1aSopenharmony_ci "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 176cabdff1aSopenharmony_ci /* Right part */ \ 177cabdff1aSopenharmony_ci "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 178cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \ 179cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 180cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 181cabdff1aSopenharmony_ci /* Left part */ \ 182cabdff1aSopenharmony_ci "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 183cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 184cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 185cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 186cabdff1aSopenharmony_ci /* Combine left and right part */ \ 187cabdff1aSopenharmony_ci "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 188cabdff1aSopenharmony_ci "psubsb %[q2], %[q2], %[ftmp4] \n\t" \ 189cabdff1aSopenharmony_ci "pxor %[q2], %[q2], %[ftmp7] \n\t" \ 190cabdff1aSopenharmony_ci "paddsb %[p2], %[p2], %[ftmp4] \n\t" \ 191cabdff1aSopenharmony_ci "pxor %[p2], %[p2], %[ftmp7] \n\t" 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_H6_MMI(src, dst) \ 194cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 195cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 196cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 197cabdff1aSopenharmony_ci \ 198cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, -0x01) \ 199cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 200cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 201cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 202cabdff1aSopenharmony_ci \ 203cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, -0x02) \ 204cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 205cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 206cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 207cabdff1aSopenharmony_ci \ 208cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x01) \ 209cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 210cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 211cabdff1aSopenharmony_ci \ 212cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x02) \ 213cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 214cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 215cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 216cabdff1aSopenharmony_ci \ 217cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x03) \ 218cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 219cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 220cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 221cabdff1aSopenharmony_ci \ 222cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 223cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 224cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 225cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 226cabdff1aSopenharmony_ci \ 227cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci 230cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_H4_MMI(src, dst) \ 231cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 232cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 233cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 234cabdff1aSopenharmony_ci \ 235cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, -0x01) \ 236cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 237cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 238cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 239cabdff1aSopenharmony_ci \ 240cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x01) \ 241cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 242cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 243cabdff1aSopenharmony_ci \ 244cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x02) \ 245cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 246cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 247cabdff1aSopenharmony_ci "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 248cabdff1aSopenharmony_ci \ 249cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 250cabdff1aSopenharmony_ci \ 251cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 252cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 253cabdff1aSopenharmony_ci \ 254cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 255cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 256cabdff1aSopenharmony_ci 257cabdff1aSopenharmony_ci 258cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \ 259cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 260cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 261cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 262cabdff1aSopenharmony_ci \ 263cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 264cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 265cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 266cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 267cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 268cabdff1aSopenharmony_ci \ 269cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \ 270cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 271cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 272cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 273cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 274cabdff1aSopenharmony_ci \ 275cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 276cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 277cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 278cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 279cabdff1aSopenharmony_ci \ 280cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 281cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 282cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 283cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 284cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 285cabdff1aSopenharmony_ci \ 286cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 287cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 288cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 289cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 290cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 291cabdff1aSopenharmony_ci \ 292cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 293cabdff1aSopenharmony_ci \ 294cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 295cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 296cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 297cabdff1aSopenharmony_ci \ 298cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 299cabdff1aSopenharmony_ci 300cabdff1aSopenharmony_ci 301cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \ 302cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 303cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 304cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 305cabdff1aSopenharmony_ci \ 306cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 307cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 308cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 309cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 310cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 311cabdff1aSopenharmony_ci \ 312cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 313cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 314cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 315cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 316cabdff1aSopenharmony_ci \ 317cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 318cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 319cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 320cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 321cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 322cabdff1aSopenharmony_ci \ 323cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 324cabdff1aSopenharmony_ci \ 325cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 326cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 327cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 328cabdff1aSopenharmony_ci \ 329cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 330cabdff1aSopenharmony_ci 331cabdff1aSopenharmony_ci 332cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_H6_MMI(src, dst) \ 333cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 334cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 335cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 336cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 337cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 338cabdff1aSopenharmony_ci \ 339cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, -0x01) \ 340cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 341cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 342cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 343cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 344cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 345cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 346cabdff1aSopenharmony_ci \ 347cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, -0x02) \ 348cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 349cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 350cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 351cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \ 352cabdff1aSopenharmony_ci "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 353cabdff1aSopenharmony_ci "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 354cabdff1aSopenharmony_ci \ 355cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x01) \ 356cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 357cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 358cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 359cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 360cabdff1aSopenharmony_ci \ 361cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x02) \ 362cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 363cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 364cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 365cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 366cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 367cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 368cabdff1aSopenharmony_ci \ 369cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x03) \ 370cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 371cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 372cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 373cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \ 374cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 375cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 376cabdff1aSopenharmony_ci \ 377cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 378cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 379cabdff1aSopenharmony_ci \ 380cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 381cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 382cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 383cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 384cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 385cabdff1aSopenharmony_ci \ 386cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 387cabdff1aSopenharmony_ci 388cabdff1aSopenharmony_ci 389cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_H4_MMI(src, dst) \ 390cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 391cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 392cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 393cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 394cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 395cabdff1aSopenharmony_ci \ 396cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, -0x01) \ 397cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 398cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 399cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 400cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 401cabdff1aSopenharmony_ci "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 402cabdff1aSopenharmony_ci "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 403cabdff1aSopenharmony_ci \ 404cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x01) \ 405cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 406cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 407cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 408cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 409cabdff1aSopenharmony_ci \ 410cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x02) \ 411cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 412cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 413cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 414cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 415cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 416cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 417cabdff1aSopenharmony_ci \ 418cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 419cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 420cabdff1aSopenharmony_ci \ 421cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 422cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 423cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 424cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 425cabdff1aSopenharmony_ci \ 426cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 427cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 428cabdff1aSopenharmony_ci 429cabdff1aSopenharmony_ci 430cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \ 431cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 432cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 433cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 434cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 435cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 436cabdff1aSopenharmony_ci \ 437cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 438cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 439cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 440cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 441cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 442cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 443cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 444cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 445cabdff1aSopenharmony_ci \ 446cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \ 447cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 448cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 449cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 450cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 451cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \ 452cabdff1aSopenharmony_ci "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 453cabdff1aSopenharmony_ci "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 454cabdff1aSopenharmony_ci \ 455cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 456cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 457cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 458cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 459cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 460cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 461cabdff1aSopenharmony_ci \ 462cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 463cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 464cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 465cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 466cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 467cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 468cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 469cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 470cabdff1aSopenharmony_ci \ 471cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 472cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 473cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 474cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 475cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 476cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \ 477cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 478cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 479cabdff1aSopenharmony_ci \ 480cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 481cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 482cabdff1aSopenharmony_ci \ 483cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 484cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 485cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 486cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 487cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 488cabdff1aSopenharmony_ci \ 489cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 490cabdff1aSopenharmony_ci 491cabdff1aSopenharmony_ci 492cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \ 493cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 494cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 495cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 496cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 497cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 498cabdff1aSopenharmony_ci \ 499cabdff1aSopenharmony_ci PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 500cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 501cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 502cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 503cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 504cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 505cabdff1aSopenharmony_ci "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 506cabdff1aSopenharmony_ci "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 507cabdff1aSopenharmony_ci \ 508cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 509cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 510cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 511cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 512cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 513cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 514cabdff1aSopenharmony_ci \ 515cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 516cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 517cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 518cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 519cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 520cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 521cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 522cabdff1aSopenharmony_ci "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 523cabdff1aSopenharmony_ci \ 524cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 525cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 526cabdff1aSopenharmony_ci \ 527cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 528cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 529cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 530cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 531cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 532cabdff1aSopenharmony_ci \ 533cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 534cabdff1aSopenharmony_ci 535cabdff1aSopenharmony_ci 536cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR8_H_MMI(src, dst) \ 537cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 538cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 539cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 540cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \ 541cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \ 542cabdff1aSopenharmony_ci \ 543cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x01) \ 544cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 545cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 546cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \ 547cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \ 548cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 549cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 550cabdff1aSopenharmony_ci \ 551cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \ 552cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \ 553cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 554cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 555cabdff1aSopenharmony_ci \ 556cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 557cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 558cabdff1aSopenharmony_ci 559cabdff1aSopenharmony_ci 560cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR4_H_MMI(src, dst) \ 561cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 562cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 563cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \ 564cabdff1aSopenharmony_ci \ 565cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x01) \ 566cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 567cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \ 568cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 569cabdff1aSopenharmony_ci \ 570cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \ 571cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 572cabdff1aSopenharmony_ci \ 573cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 574cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 575cabdff1aSopenharmony_ci 576cabdff1aSopenharmony_ci 577cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \ 578cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src, 0x00) \ 579cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 580cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 581cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \ 582cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \ 583cabdff1aSopenharmony_ci \ 584cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \ 585cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], src1, 0x00) \ 586cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 587cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 588cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \ 589cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \ 590cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 591cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 592cabdff1aSopenharmony_ci \ 593cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \ 594cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \ 595cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 596cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 597cabdff1aSopenharmony_ci \ 598cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 599cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], dst, 0x00) 600cabdff1aSopenharmony_ci 601cabdff1aSopenharmony_ci 602cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \ 603cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src, 0x00) \ 604cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 605cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \ 606cabdff1aSopenharmony_ci \ 607cabdff1aSopenharmony_ci PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \ 608cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], src1, 0x00) \ 609cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 610cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \ 611cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 612cabdff1aSopenharmony_ci \ 613cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \ 614cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 615cabdff1aSopenharmony_ci \ 616cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 617cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], dst, 0x00) 618cabdff1aSopenharmony_ci 619cabdff1aSopenharmony_ci 620cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static const uint64_t, fourtap_subpel_filters[7][6]) = { 621cabdff1aSopenharmony_ci {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b, 622cabdff1aSopenharmony_ci 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000}, 623cabdff1aSopenharmony_ci 624cabdff1aSopenharmony_ci {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c, 625cabdff1aSopenharmony_ci 0x0024002400240024, 0x0008000800080008, 0x0001000100010001}, 626cabdff1aSopenharmony_ci 627cabdff1aSopenharmony_ci {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d, 628cabdff1aSopenharmony_ci 0x0032003200320032, 0x0006000600060006, 0x0000000000000000}, 629cabdff1aSopenharmony_ci 630cabdff1aSopenharmony_ci {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d, 631cabdff1aSopenharmony_ci 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003}, 632cabdff1aSopenharmony_ci 633cabdff1aSopenharmony_ci {0x0000000000000000, 0x0006000600060006, 0x0032003200320032, 634cabdff1aSopenharmony_ci 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000}, 635cabdff1aSopenharmony_ci 636cabdff1aSopenharmony_ci {0x0001000100010001, 0x0008000800080008, 0x0024002400240024, 637cabdff1aSopenharmony_ci 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002}, 638cabdff1aSopenharmony_ci 639cabdff1aSopenharmony_ci {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c, 640cabdff1aSopenharmony_ci 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000} 641cabdff1aSopenharmony_ci}; 642cabdff1aSopenharmony_ci 643cabdff1aSopenharmony_ci#if 0 644cabdff1aSopenharmony_ci#define FILTER_6TAP(src, F, stride) \ 645cabdff1aSopenharmony_ci cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 646cabdff1aSopenharmony_ci F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ 647cabdff1aSopenharmony_ci F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] 648cabdff1aSopenharmony_ci 649cabdff1aSopenharmony_ci#define FILTER_4TAP(src, F, stride) \ 650cabdff1aSopenharmony_ci cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 651cabdff1aSopenharmony_ci F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] 652cabdff1aSopenharmony_ci 653cabdff1aSopenharmony_cistatic const uint8_t subpel_filters[7][6] = { 654cabdff1aSopenharmony_ci { 0, 6, 123, 12, 1, 0 }, 655cabdff1aSopenharmony_ci { 2, 11, 108, 36, 8, 1 }, 656cabdff1aSopenharmony_ci { 0, 9, 93, 50, 6, 0 }, 657cabdff1aSopenharmony_ci { 3, 16, 77, 77, 16, 3 }, 658cabdff1aSopenharmony_ci { 0, 6, 50, 93, 9, 0 }, 659cabdff1aSopenharmony_ci { 1, 8, 36, 108, 11, 2 }, 660cabdff1aSopenharmony_ci { 0, 1, 12, 123, 6, 0 }, 661cabdff1aSopenharmony_ci}; 662cabdff1aSopenharmony_ci 663cabdff1aSopenharmony_ci#define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) 664cabdff1aSopenharmony_ci#define MUL_35468(a) (((a) * 35468) >> 16) 665cabdff1aSopenharmony_ci#endif 666cabdff1aSopenharmony_ci 667cabdff1aSopenharmony_ci#define clip_int8(n) (cm[(n) + 0x80] - 0x80) 668cabdff1aSopenharmony_cistatic av_always_inline void vp8_filter_common_is4tap(uint8_t *p, 669cabdff1aSopenharmony_ci ptrdiff_t stride) 670cabdff1aSopenharmony_ci{ 671cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 672cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 673cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 674cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 675cabdff1aSopenharmony_ci int a, f1, f2; 676cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 677cabdff1aSopenharmony_ci 678cabdff1aSopenharmony_ci a = 3 * (q0 - p0); 679cabdff1aSopenharmony_ci a += clip_int8(p1 - q1); 680cabdff1aSopenharmony_ci a = clip_int8(a); 681cabdff1aSopenharmony_ci 682cabdff1aSopenharmony_ci // We deviate from the spec here with c(a+3) >> 3 683cabdff1aSopenharmony_ci // since that's what libvpx does. 684cabdff1aSopenharmony_ci f1 = FFMIN(a + 4, 127) >> 3; 685cabdff1aSopenharmony_ci f2 = FFMIN(a + 3, 127) >> 3; 686cabdff1aSopenharmony_ci 687cabdff1aSopenharmony_ci // Despite what the spec says, we do need to clamp here to 688cabdff1aSopenharmony_ci // be bitexact with libvpx. 689cabdff1aSopenharmony_ci p[-1 * stride] = cm[p0 + f2]; 690cabdff1aSopenharmony_ci p[ 0 * stride] = cm[q0 - f1]; 691cabdff1aSopenharmony_ci} 692cabdff1aSopenharmony_ci 693cabdff1aSopenharmony_cistatic av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, 694cabdff1aSopenharmony_ci ptrdiff_t stride) 695cabdff1aSopenharmony_ci{ 696cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 697cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 698cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 699cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 700cabdff1aSopenharmony_ci int a, f1, f2; 701cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 702cabdff1aSopenharmony_ci 703cabdff1aSopenharmony_ci a = 3 * (q0 - p0); 704cabdff1aSopenharmony_ci a = clip_int8(a); 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci // We deviate from the spec here with c(a+3) >> 3 707cabdff1aSopenharmony_ci // since that's what libvpx does. 708cabdff1aSopenharmony_ci f1 = FFMIN(a + 4, 127) >> 3; 709cabdff1aSopenharmony_ci f2 = FFMIN(a + 3, 127) >> 3; 710cabdff1aSopenharmony_ci 711cabdff1aSopenharmony_ci // Despite what the spec says, we do need to clamp here to 712cabdff1aSopenharmony_ci // be bitexact with libvpx. 713cabdff1aSopenharmony_ci p[-1 * stride] = cm[p0 + f2]; 714cabdff1aSopenharmony_ci p[ 0 * stride] = cm[q0 - f1]; 715cabdff1aSopenharmony_ci a = (f1 + 1) >> 1; 716cabdff1aSopenharmony_ci p[-2 * stride] = cm[p1 + a]; 717cabdff1aSopenharmony_ci p[ 1 * stride] = cm[q1 - a]; 718cabdff1aSopenharmony_ci} 719cabdff1aSopenharmony_ci 720cabdff1aSopenharmony_cistatic av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, 721cabdff1aSopenharmony_ci int flim) 722cabdff1aSopenharmony_ci{ 723cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 724cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 725cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 726cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 727cabdff1aSopenharmony_ci 728cabdff1aSopenharmony_ci return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; 729cabdff1aSopenharmony_ci} 730cabdff1aSopenharmony_ci 731cabdff1aSopenharmony_cistatic av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) 732cabdff1aSopenharmony_ci{ 733cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 734cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 735cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 736cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 737cabdff1aSopenharmony_ci 738cabdff1aSopenharmony_ci return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; 739cabdff1aSopenharmony_ci} 740cabdff1aSopenharmony_ci 741cabdff1aSopenharmony_cistatic av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) 742cabdff1aSopenharmony_ci{ 743cabdff1aSopenharmony_ci int a0, a1, a2, w; 744cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 745cabdff1aSopenharmony_ci 746cabdff1aSopenharmony_ci int av_unused p2 = p[-3 * stride]; 747cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 748cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 749cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 750cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 751cabdff1aSopenharmony_ci int av_unused q2 = p[ 2 * stride]; 752cabdff1aSopenharmony_ci 753cabdff1aSopenharmony_ci w = clip_int8(p1 - q1); 754cabdff1aSopenharmony_ci w = clip_int8(w + 3 * (q0 - p0)); 755cabdff1aSopenharmony_ci 756cabdff1aSopenharmony_ci a0 = (27 * w + 63) >> 7; 757cabdff1aSopenharmony_ci a1 = (18 * w + 63) >> 7; 758cabdff1aSopenharmony_ci a2 = (9 * w + 63) >> 7; 759cabdff1aSopenharmony_ci 760cabdff1aSopenharmony_ci p[-3 * stride] = cm[p2 + a2]; 761cabdff1aSopenharmony_ci p[-2 * stride] = cm[p1 + a1]; 762cabdff1aSopenharmony_ci p[-1 * stride] = cm[p0 + a0]; 763cabdff1aSopenharmony_ci p[ 0 * stride] = cm[q0 - a0]; 764cabdff1aSopenharmony_ci p[ 1 * stride] = cm[q1 - a1]; 765cabdff1aSopenharmony_ci p[ 2 * stride] = cm[q2 - a2]; 766cabdff1aSopenharmony_ci} 767cabdff1aSopenharmony_ci 768cabdff1aSopenharmony_cistatic av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, 769cabdff1aSopenharmony_ci int E, int I) 770cabdff1aSopenharmony_ci{ 771cabdff1aSopenharmony_ci int av_unused p3 = p[-4 * stride]; 772cabdff1aSopenharmony_ci int av_unused p2 = p[-3 * stride]; 773cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; 774cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; 775cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; 776cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; 777cabdff1aSopenharmony_ci int av_unused q2 = p[ 2 * stride]; 778cabdff1aSopenharmony_ci int av_unused q3 = p[ 3 * stride]; 779cabdff1aSopenharmony_ci 780cabdff1aSopenharmony_ci return vp8_simple_limit(p, stride, E) && 781cabdff1aSopenharmony_ci FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && 782cabdff1aSopenharmony_ci FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && 783cabdff1aSopenharmony_ci FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; 784cabdff1aSopenharmony_ci} 785cabdff1aSopenharmony_ci 786cabdff1aSopenharmony_cistatic av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, 787cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 788cabdff1aSopenharmony_ci{ 789cabdff1aSopenharmony_ci double ftmp[18]; 790cabdff1aSopenharmony_ci uint32_t tmp[1]; 791cabdff1aSopenharmony_ci DECLARE_DOUBLE_1; 792cabdff1aSopenharmony_ci DECLARE_DOUBLE_2; 793cabdff1aSopenharmony_ci DECLARE_UINT32_T; 794cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 795cabdff1aSopenharmony_ci 796cabdff1aSopenharmony_ci __asm__ volatile( 797cabdff1aSopenharmony_ci /* Get data from dst */ 798cabdff1aSopenharmony_ci MMI_ULDC1(%[q0], %[dst], 0x0) 799cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t" 800cabdff1aSopenharmony_ci MMI_ULDC1(%[p0], %[tmp0], 0x0) 801cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 802cabdff1aSopenharmony_ci MMI_ULDC1(%[p1], %[tmp0], 0x0) 803cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 804cabdff1aSopenharmony_ci MMI_ULDC1(%[p2], %[tmp0], 0x0) 805cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 806cabdff1aSopenharmony_ci MMI_ULDC1(%[p3], %[tmp0], 0x0) 807cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 808cabdff1aSopenharmony_ci MMI_ULDC1(%[q1], %[tmp0], 0x0) 809cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 810cabdff1aSopenharmony_ci MMI_ULDC1(%[q2], %[tmp0], 0x0) 811cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 812cabdff1aSopenharmony_ci MMI_ULDC1(%[q3], %[tmp0], 0x0) 813cabdff1aSopenharmony_ci MMI_VP8_LOOP_FILTER 814cabdff1aSopenharmony_ci /* Move to dst */ 815cabdff1aSopenharmony_ci MMI_USDC1(%[q0], %[dst], 0x0) 816cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t" 817cabdff1aSopenharmony_ci MMI_USDC1(%[p0], %[tmp0], 0x0) 818cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 819cabdff1aSopenharmony_ci MMI_USDC1(%[p1], %[tmp0], 0x0) 820cabdff1aSopenharmony_ci PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 821cabdff1aSopenharmony_ci MMI_USDC1(%[p2], %[tmp0], 0x0) 822cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 823cabdff1aSopenharmony_ci MMI_USDC1(%[q1], %[tmp0], 0x0) 824cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 825cabdff1aSopenharmony_ci MMI_USDC1(%[q2], %[tmp0], 0x0) 826cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 827cabdff1aSopenharmony_ci [p3]"=&f"(ftmp[0]), [p2]"=&f"(ftmp[1]), 828cabdff1aSopenharmony_ci [p1]"=&f"(ftmp[2]), [p0]"=&f"(ftmp[3]), 829cabdff1aSopenharmony_ci [q0]"=&f"(ftmp[4]), [q1]"=&f"(ftmp[5]), 830cabdff1aSopenharmony_ci [q2]"=&f"(ftmp[6]), [q3]"=&f"(ftmp[7]), 831cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[8]), [ftmp1]"=&f"(ftmp[9]), 832cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[10]), [ftmp3]"=&f"(ftmp[11]), 833cabdff1aSopenharmony_ci [hev]"=&f"(ftmp[12]), [mask]"=&f"(ftmp[13]), 834cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[14]), [ftmp5]"=&f"(ftmp[15]), 835cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[16]), [ftmp7]"=&f"(ftmp[17]), 836cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), 837cabdff1aSopenharmony_ci RESTRICT_ASM_DOUBLE_1, RESTRICT_ASM_DOUBLE_2, 838cabdff1aSopenharmony_ci RESTRICT_ASM_UINT32_T 839cabdff1aSopenharmony_ci : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh), 840cabdff1aSopenharmony_ci [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride) 841cabdff1aSopenharmony_ci : "memory" 842cabdff1aSopenharmony_ci ); 843cabdff1aSopenharmony_ci} 844cabdff1aSopenharmony_ci 845cabdff1aSopenharmony_cistatic av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, 846cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 847cabdff1aSopenharmony_ci{ 848cabdff1aSopenharmony_ci int i; 849cabdff1aSopenharmony_ci 850cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 851cabdff1aSopenharmony_ci if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) { 852cabdff1aSopenharmony_ci int hv = hev(dst + i * 1, stride, hev_thresh); 853cabdff1aSopenharmony_ci if (hv) 854cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i * 1, stride); 855cabdff1aSopenharmony_ci else 856cabdff1aSopenharmony_ci vp8_filter_common_isnot4tap(dst + i * 1, stride); 857cabdff1aSopenharmony_ci } 858cabdff1aSopenharmony_ci} 859cabdff1aSopenharmony_ci 860cabdff1aSopenharmony_cistatic av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, 861cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 862cabdff1aSopenharmony_ci{ 863cabdff1aSopenharmony_ci double ftmp[18]; 864cabdff1aSopenharmony_ci uint32_t tmp[1]; 865cabdff1aSopenharmony_ci DECLARE_DOUBLE_1; 866cabdff1aSopenharmony_ci DECLARE_DOUBLE_2; 867cabdff1aSopenharmony_ci DECLARE_UINT32_T; 868cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 869cabdff1aSopenharmony_ci 870cabdff1aSopenharmony_ci __asm__ volatile( 871cabdff1aSopenharmony_ci /* Get data from dst */ 872cabdff1aSopenharmony_ci MMI_ULDC1(%[p3], %[dst], -0x04) 873cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 874cabdff1aSopenharmony_ci MMI_ULDC1(%[p2], %[tmp0], -0x04) 875cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 876cabdff1aSopenharmony_ci MMI_ULDC1(%[p1], %[tmp0], -0x04) 877cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 878cabdff1aSopenharmony_ci MMI_ULDC1(%[p0], %[tmp0], -0x04) 879cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 880cabdff1aSopenharmony_ci MMI_ULDC1(%[q0], %[tmp0], -0x04) 881cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 882cabdff1aSopenharmony_ci MMI_ULDC1(%[q1], %[tmp0], -0x04) 883cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 884cabdff1aSopenharmony_ci MMI_ULDC1(%[q2], %[tmp0], -0x04) 885cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 886cabdff1aSopenharmony_ci MMI_ULDC1(%[q3], %[tmp0], -0x04) 887cabdff1aSopenharmony_ci /* Matrix transpose */ 888cabdff1aSopenharmony_ci TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0], 889cabdff1aSopenharmony_ci %[q0], %[q1], %[q2], %[q3], 890cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 891cabdff1aSopenharmony_ci MMI_VP8_LOOP_FILTER 892cabdff1aSopenharmony_ci /* Matrix transpose */ 893cabdff1aSopenharmony_ci TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0], 894cabdff1aSopenharmony_ci %[q0], %[q1], %[q2], %[q3], 895cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 896cabdff1aSopenharmony_ci /* Move to dst */ 897cabdff1aSopenharmony_ci MMI_USDC1(%[p3], %[dst], -0x04) 898cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 899cabdff1aSopenharmony_ci MMI_USDC1(%[p2], %[dst], -0x04) 900cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 901cabdff1aSopenharmony_ci MMI_USDC1(%[p1], %[dst], -0x04) 902cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 903cabdff1aSopenharmony_ci MMI_USDC1(%[p0], %[dst], -0x04) 904cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 905cabdff1aSopenharmony_ci MMI_USDC1(%[q0], %[dst], -0x04) 906cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 907cabdff1aSopenharmony_ci MMI_USDC1(%[q1], %[dst], -0x04) 908cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 909cabdff1aSopenharmony_ci MMI_USDC1(%[q2], %[dst], -0x04) 910cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 911cabdff1aSopenharmony_ci MMI_USDC1(%[q3], %[dst], -0x04) 912cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 913cabdff1aSopenharmony_ci [p3]"=&f"(ftmp[0]), [p2]"=&f"(ftmp[1]), 914cabdff1aSopenharmony_ci [p1]"=&f"(ftmp[2]), [p0]"=&f"(ftmp[3]), 915cabdff1aSopenharmony_ci [q0]"=&f"(ftmp[4]), [q1]"=&f"(ftmp[5]), 916cabdff1aSopenharmony_ci [q2]"=&f"(ftmp[6]), [q3]"=&f"(ftmp[7]), 917cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[8]), [ftmp1]"=&f"(ftmp[9]), 918cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[10]), [ftmp3]"=&f"(ftmp[11]), 919cabdff1aSopenharmony_ci [hev]"=&f"(ftmp[12]), [mask]"=&f"(ftmp[13]), 920cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[14]), [ftmp5]"=&f"(ftmp[15]), 921cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[16]), [ftmp7]"=&f"(ftmp[17]), 922cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), 923cabdff1aSopenharmony_ci RESTRICT_ASM_DOUBLE_1, RESTRICT_ASM_DOUBLE_2, 924cabdff1aSopenharmony_ci RESTRICT_ASM_UINT32_T 925cabdff1aSopenharmony_ci : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh), 926cabdff1aSopenharmony_ci [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride) 927cabdff1aSopenharmony_ci : "memory" 928cabdff1aSopenharmony_ci ); 929cabdff1aSopenharmony_ci} 930cabdff1aSopenharmony_ci 931cabdff1aSopenharmony_cistatic av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, 932cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 933cabdff1aSopenharmony_ci{ 934cabdff1aSopenharmony_ci int i; 935cabdff1aSopenharmony_ci 936cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 937cabdff1aSopenharmony_ci if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) { 938cabdff1aSopenharmony_ci int hv = hev(dst + i * stride, 1, hev_thresh); 939cabdff1aSopenharmony_ci if (hv) 940cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i * stride, 1); 941cabdff1aSopenharmony_ci else 942cabdff1aSopenharmony_ci vp8_filter_common_isnot4tap(dst + i * stride, 1); 943cabdff1aSopenharmony_ci } 944cabdff1aSopenharmony_ci} 945cabdff1aSopenharmony_ci 946cabdff1aSopenharmony_civoid ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16]) 947cabdff1aSopenharmony_ci{ 948cabdff1aSopenharmony_ci#if 1 949cabdff1aSopenharmony_ci double ftmp[8]; 950cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 951cabdff1aSopenharmony_ci 952cabdff1aSopenharmony_ci __asm__ volatile ( 953cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp0], %[dc], 0x00) 954cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[dc], 0x08) 955cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dc], 0x10) 956cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[dc], 0x18) 957cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t" 958cabdff1aSopenharmony_ci "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t" 959cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 960cabdff1aSopenharmony_ci "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t" 961cabdff1aSopenharmony_ci "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t" 962cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 963cabdff1aSopenharmony_ci "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 964cabdff1aSopenharmony_ci "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 965cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dc], 0x00) 966cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dc], 0x08) 967cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp2], %[dc], 0x10) 968cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp3], %[dc], 0x18) 969cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 970cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 971cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 972cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), 973cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 974cabdff1aSopenharmony_ci [ftmp7]"=&f"(ftmp[7]) 975cabdff1aSopenharmony_ci : [dc]"r"((uint8_t*)dc) 976cabdff1aSopenharmony_ci : "memory" 977cabdff1aSopenharmony_ci ); 978cabdff1aSopenharmony_ci 979cabdff1aSopenharmony_ci block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3; 980cabdff1aSopenharmony_ci block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3; 981cabdff1aSopenharmony_ci block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3; 982cabdff1aSopenharmony_ci block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3; 983cabdff1aSopenharmony_ci 984cabdff1aSopenharmony_ci block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3; 985cabdff1aSopenharmony_ci block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3; 986cabdff1aSopenharmony_ci block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3; 987cabdff1aSopenharmony_ci block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3; 988cabdff1aSopenharmony_ci 989cabdff1aSopenharmony_ci block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3; 990cabdff1aSopenharmony_ci block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3; 991cabdff1aSopenharmony_ci block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3; 992cabdff1aSopenharmony_ci block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3; 993cabdff1aSopenharmony_ci 994cabdff1aSopenharmony_ci block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3; 995cabdff1aSopenharmony_ci block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3; 996cabdff1aSopenharmony_ci block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3; 997cabdff1aSopenharmony_ci block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3; 998cabdff1aSopenharmony_ci 999cabdff1aSopenharmony_ci __asm__ volatile ( 1000cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1001cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dc], 0x00) 1002cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dc], 0x08) 1003cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dc], 0x10) 1004cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dc], 0x18) 1005cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 1006cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]) 1007cabdff1aSopenharmony_ci : [dc]"r"((uint8_t *)dc) 1008cabdff1aSopenharmony_ci : "memory" 1009cabdff1aSopenharmony_ci ); 1010cabdff1aSopenharmony_ci#else 1011cabdff1aSopenharmony_ci int t00, t01, t02, t03, t10, t11, t12, t13, t20, t21, t22, t23, t30, t31, t32, t33; 1012cabdff1aSopenharmony_ci 1013cabdff1aSopenharmony_ci t00 = dc[0] + dc[12]; 1014cabdff1aSopenharmony_ci t10 = dc[1] + dc[13]; 1015cabdff1aSopenharmony_ci t20 = dc[2] + dc[14]; 1016cabdff1aSopenharmony_ci t30 = dc[3] + dc[15]; 1017cabdff1aSopenharmony_ci 1018cabdff1aSopenharmony_ci t03 = dc[0] - dc[12]; 1019cabdff1aSopenharmony_ci t13 = dc[1] - dc[13]; 1020cabdff1aSopenharmony_ci t23 = dc[2] - dc[14]; 1021cabdff1aSopenharmony_ci t33 = dc[3] - dc[15]; 1022cabdff1aSopenharmony_ci 1023cabdff1aSopenharmony_ci t01 = dc[4] + dc[ 8]; 1024cabdff1aSopenharmony_ci t11 = dc[5] + dc[ 9]; 1025cabdff1aSopenharmony_ci t21 = dc[6] + dc[10]; 1026cabdff1aSopenharmony_ci t31 = dc[7] + dc[11]; 1027cabdff1aSopenharmony_ci 1028cabdff1aSopenharmony_ci t02 = dc[4] - dc[ 8]; 1029cabdff1aSopenharmony_ci t12 = dc[5] - dc[ 9]; 1030cabdff1aSopenharmony_ci t22 = dc[6] - dc[10]; 1031cabdff1aSopenharmony_ci t32 = dc[7] - dc[11]; 1032cabdff1aSopenharmony_ci 1033cabdff1aSopenharmony_ci dc[ 0] = t00 + t01; 1034cabdff1aSopenharmony_ci dc[ 1] = t10 + t11; 1035cabdff1aSopenharmony_ci dc[ 2] = t20 + t21; 1036cabdff1aSopenharmony_ci dc[ 3] = t30 + t31; 1037cabdff1aSopenharmony_ci 1038cabdff1aSopenharmony_ci dc[ 4] = t03 + t02; 1039cabdff1aSopenharmony_ci dc[ 5] = t13 + t12; 1040cabdff1aSopenharmony_ci dc[ 6] = t23 + t22; 1041cabdff1aSopenharmony_ci dc[ 7] = t33 + t32; 1042cabdff1aSopenharmony_ci 1043cabdff1aSopenharmony_ci dc[ 8] = t00 - t01; 1044cabdff1aSopenharmony_ci dc[ 9] = t10 - t11; 1045cabdff1aSopenharmony_ci dc[10] = t20 - t21; 1046cabdff1aSopenharmony_ci dc[11] = t30 - t31; 1047cabdff1aSopenharmony_ci 1048cabdff1aSopenharmony_ci dc[12] = t03 - t02; 1049cabdff1aSopenharmony_ci dc[13] = t13 - t12; 1050cabdff1aSopenharmony_ci dc[14] = t23 - t22; 1051cabdff1aSopenharmony_ci dc[15] = t33 - t32; 1052cabdff1aSopenharmony_ci 1053cabdff1aSopenharmony_ci block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3; 1054cabdff1aSopenharmony_ci block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3; 1055cabdff1aSopenharmony_ci block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3; 1056cabdff1aSopenharmony_ci block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3; 1057cabdff1aSopenharmony_ci 1058cabdff1aSopenharmony_ci block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3; 1059cabdff1aSopenharmony_ci block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3; 1060cabdff1aSopenharmony_ci block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3; 1061cabdff1aSopenharmony_ci block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3; 1062cabdff1aSopenharmony_ci 1063cabdff1aSopenharmony_ci block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3; 1064cabdff1aSopenharmony_ci block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3; 1065cabdff1aSopenharmony_ci block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3; 1066cabdff1aSopenharmony_ci block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3; 1067cabdff1aSopenharmony_ci 1068cabdff1aSopenharmony_ci block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3; 1069cabdff1aSopenharmony_ci block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3; 1070cabdff1aSopenharmony_ci block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3; 1071cabdff1aSopenharmony_ci block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3; 1072cabdff1aSopenharmony_ci 1073cabdff1aSopenharmony_ci AV_ZERO64(dc + 0); 1074cabdff1aSopenharmony_ci AV_ZERO64(dc + 4); 1075cabdff1aSopenharmony_ci AV_ZERO64(dc + 8); 1076cabdff1aSopenharmony_ci AV_ZERO64(dc + 12); 1077cabdff1aSopenharmony_ci#endif 1078cabdff1aSopenharmony_ci} 1079cabdff1aSopenharmony_ci 1080cabdff1aSopenharmony_civoid ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16]) 1081cabdff1aSopenharmony_ci{ 1082cabdff1aSopenharmony_ci int val = (dc[0] + 3) >> 3; 1083cabdff1aSopenharmony_ci 1084cabdff1aSopenharmony_ci dc[0] = 0; 1085cabdff1aSopenharmony_ci 1086cabdff1aSopenharmony_ci block[0][0][0] = val; 1087cabdff1aSopenharmony_ci block[0][1][0] = val; 1088cabdff1aSopenharmony_ci block[0][2][0] = val; 1089cabdff1aSopenharmony_ci block[0][3][0] = val; 1090cabdff1aSopenharmony_ci block[1][0][0] = val; 1091cabdff1aSopenharmony_ci block[1][1][0] = val; 1092cabdff1aSopenharmony_ci block[1][2][0] = val; 1093cabdff1aSopenharmony_ci block[1][3][0] = val; 1094cabdff1aSopenharmony_ci block[2][0][0] = val; 1095cabdff1aSopenharmony_ci block[2][1][0] = val; 1096cabdff1aSopenharmony_ci block[2][2][0] = val; 1097cabdff1aSopenharmony_ci block[2][3][0] = val; 1098cabdff1aSopenharmony_ci block[3][0][0] = val; 1099cabdff1aSopenharmony_ci block[3][1][0] = val; 1100cabdff1aSopenharmony_ci block[3][2][0] = val; 1101cabdff1aSopenharmony_ci block[3][3][0] = val; 1102cabdff1aSopenharmony_ci} 1103cabdff1aSopenharmony_ci 1104cabdff1aSopenharmony_civoid ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 1105cabdff1aSopenharmony_ci{ 1106cabdff1aSopenharmony_ci#if 1 1107cabdff1aSopenharmony_ci double ftmp[12]; 1108cabdff1aSopenharmony_ci uint32_t tmp[1]; 1109cabdff1aSopenharmony_ci union av_intfloat64 ff_ph_4e7b_u; 1110cabdff1aSopenharmony_ci union av_intfloat64 ff_ph_22a3_u; 1111cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1112cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1113cabdff1aSopenharmony_ci ff_ph_4e7b_u.i = 0x4e7b4e7b4e7b4e7bULL; 1114cabdff1aSopenharmony_ci ff_ph_22a3_u.i = 0x22a322a322a322a3ULL; 1115cabdff1aSopenharmony_ci 1116cabdff1aSopenharmony_ci __asm__ volatile ( 1117cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1118cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[block], 0x00) 1119cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[block], 0x08) 1120cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[block], 0x10) 1121cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[block], 0x18) 1122cabdff1aSopenharmony_ci 1123cabdff1aSopenharmony_ci "li %[tmp0], 0x02 \n\t" 1124cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp11] \n\t" 1125cabdff1aSopenharmony_ci 1126cabdff1aSopenharmony_ci // block[0...3] + block[8...11] 1127cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" 1128cabdff1aSopenharmony_ci // block[0...3] - block[8...11] 1129cabdff1aSopenharmony_ci "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" 1130cabdff1aSopenharmony_ci // MUL_35468(block[12...15]) 1131cabdff1aSopenharmony_ci "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" 1132cabdff1aSopenharmony_ci "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t" 1133cabdff1aSopenharmony_ci // MUL_35468(block[4...7]) 1134cabdff1aSopenharmony_ci "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" 1135cabdff1aSopenharmony_ci "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t" 1136cabdff1aSopenharmony_ci // MUL_20091(block[4...7] 1137cabdff1aSopenharmony_ci "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t" 1138cabdff1aSopenharmony_ci "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" 1139cabdff1aSopenharmony_ci // MUL_20091(block[12...15]) 1140cabdff1aSopenharmony_ci "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" 1141cabdff1aSopenharmony_ci "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t" 1142cabdff1aSopenharmony_ci 1143cabdff1aSopenharmony_ci // tmp[0 4 8 12] 1144cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 1145cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 1146cabdff1aSopenharmony_ci // tmp[1 5 9 13] 1147cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 1148cabdff1aSopenharmony_ci "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 1149cabdff1aSopenharmony_ci // tmp[2 6 10 14] 1150cabdff1aSopenharmony_ci "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" 1151cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 1152cabdff1aSopenharmony_ci // tmp[3 7 11 15] 1153cabdff1aSopenharmony_ci "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t" 1154cabdff1aSopenharmony_ci "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 1155cabdff1aSopenharmony_ci 1156cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[block], 0x00) 1157cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[block], 0x08) 1158cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[block], 0x10) 1159cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[block], 0x18) 1160cabdff1aSopenharmony_ci 1161cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4], 1162cabdff1aSopenharmony_ci %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8]) 1163cabdff1aSopenharmony_ci 1164cabdff1aSopenharmony_ci // t[0 4 8 12] 1165cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" 1166cabdff1aSopenharmony_ci // t[1 5 9 13] 1167cabdff1aSopenharmony_ci "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" 1168cabdff1aSopenharmony_ci // t[2 6 10 14] 1169cabdff1aSopenharmony_ci "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" 1170cabdff1aSopenharmony_ci "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" 1171cabdff1aSopenharmony_ci "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t" 1172cabdff1aSopenharmony_ci "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" 1173cabdff1aSopenharmony_ci "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1174cabdff1aSopenharmony_ci // t[3 7 11 15] 1175cabdff1aSopenharmony_ci "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" 1176cabdff1aSopenharmony_ci "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" 1177cabdff1aSopenharmony_ci "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t" 1178cabdff1aSopenharmony_ci "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t" 1179cabdff1aSopenharmony_ci "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1180cabdff1aSopenharmony_ci 1181cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 1182cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp11] \n\t" 1183cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t" 1184cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t" 1185cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 1186cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t" 1187cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t" 1188cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" 1189cabdff1aSopenharmony_ci "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t" 1190cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" 1191cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" 1192cabdff1aSopenharmony_ci "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t" 1193cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t" 1194cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 1195cabdff1aSopenharmony_ci 1196cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4], 1197cabdff1aSopenharmony_ci %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8]) 1198cabdff1aSopenharmony_ci 1199cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp5], %[dst0], 0x00) 1200cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp6], %[dst1], 0x00) 1201cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp7], %[dst2], 0x00) 1202cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp8], %[dst3], 0x00) 1203cabdff1aSopenharmony_ci 1204cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1205cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1206cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1207cabdff1aSopenharmony_ci "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1208cabdff1aSopenharmony_ci 1209cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1210cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1211cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1212cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 1213cabdff1aSopenharmony_ci 1214cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1215cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1216cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1217cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1218cabdff1aSopenharmony_ci 1219cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst0], 0x00) 1220cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[dst1], 0x00) 1221cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[dst2], 0x00) 1222cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[dst3], 0x00) 1223cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1224cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1225cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1226cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1227cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1228cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1229cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1230cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1231cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]) 1232cabdff1aSopenharmony_ci : [dst0]"r"(dst), [dst1]"r"(dst+stride), 1233cabdff1aSopenharmony_ci [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), 1234cabdff1aSopenharmony_ci [block]"r"(block), [ff_pw_4]"f"(ff_pw_4.f), 1235cabdff1aSopenharmony_ci [ff_ph_4e7b]"f"(ff_ph_4e7b_u.f), [ff_ph_22a3]"f"(ff_ph_22a3_u.f) 1236cabdff1aSopenharmony_ci : "memory" 1237cabdff1aSopenharmony_ci ); 1238cabdff1aSopenharmony_ci#else 1239cabdff1aSopenharmony_ci int i, t0, t1, t2, t3; 1240cabdff1aSopenharmony_ci int16_t tmp[16]; 1241cabdff1aSopenharmony_ci 1242cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 1243cabdff1aSopenharmony_ci t0 = block[0 + i] + block[8 + i]; 1244cabdff1aSopenharmony_ci t1 = block[0 + i] - block[8 + i]; 1245cabdff1aSopenharmony_ci t2 = MUL_35468(block[4 + i]) - MUL_20091(block[12 + i]); 1246cabdff1aSopenharmony_ci t3 = MUL_20091(block[4 + i]) + MUL_35468(block[12 + i]); 1247cabdff1aSopenharmony_ci block[ 0 + i] = 0; 1248cabdff1aSopenharmony_ci block[ 4 + i] = 0; 1249cabdff1aSopenharmony_ci block[ 8 + i] = 0; 1250cabdff1aSopenharmony_ci block[12 + i] = 0; 1251cabdff1aSopenharmony_ci 1252cabdff1aSopenharmony_ci tmp[i * 4 + 0] = t0 + t3; 1253cabdff1aSopenharmony_ci tmp[i * 4 + 1] = t1 + t2; 1254cabdff1aSopenharmony_ci tmp[i * 4 + 2] = t1 - t2; 1255cabdff1aSopenharmony_ci tmp[i * 4 + 3] = t0 - t3; 1256cabdff1aSopenharmony_ci } 1257cabdff1aSopenharmony_ci 1258cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 1259cabdff1aSopenharmony_ci t0 = tmp[0 + i] + tmp[8 + i]; 1260cabdff1aSopenharmony_ci t1 = tmp[0 + i] - tmp[8 + i]; 1261cabdff1aSopenharmony_ci t2 = MUL_35468(tmp[4 + i]) - MUL_20091(tmp[12 + i]); 1262cabdff1aSopenharmony_ci t3 = MUL_20091(tmp[4 + i]) + MUL_35468(tmp[12 + i]); 1263cabdff1aSopenharmony_ci 1264cabdff1aSopenharmony_ci dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); 1265cabdff1aSopenharmony_ci dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); 1266cabdff1aSopenharmony_ci dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); 1267cabdff1aSopenharmony_ci dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); 1268cabdff1aSopenharmony_ci dst += stride; 1269cabdff1aSopenharmony_ci } 1270cabdff1aSopenharmony_ci#endif 1271cabdff1aSopenharmony_ci} 1272cabdff1aSopenharmony_ci 1273cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 1274cabdff1aSopenharmony_ci{ 1275cabdff1aSopenharmony_ci#if 1 1276cabdff1aSopenharmony_ci int dc = (block[0] + 4) >> 3; 1277cabdff1aSopenharmony_ci double ftmp[6]; 1278cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1279cabdff1aSopenharmony_ci 1280cabdff1aSopenharmony_ci block[0] = 0; 1281cabdff1aSopenharmony_ci 1282cabdff1aSopenharmony_ci __asm__ volatile ( 1283cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1284cabdff1aSopenharmony_ci "mtc1 %[dc], %[ftmp5] \n\t" 1285cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dst0], 0x00) 1286cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dst1], 0x00) 1287cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[dst2], 0x00) 1288cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[dst3], 0x00) 1289cabdff1aSopenharmony_ci "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1290cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1291cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1292cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1293cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1294cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1295cabdff1aSopenharmony_ci "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1296cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1297cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1298cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1299cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1300cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1301cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1302cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst0], 0x00) 1303cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[dst1], 0x00) 1304cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[dst2], 0x00) 1305cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[dst3], 0x00) 1306cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1307cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1308cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), 1309cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1310cabdff1aSopenharmony_ci [ftmp5]"=&f"(ftmp[5]) 1311cabdff1aSopenharmony_ci : [dst0]"r"(dst), [dst1]"r"(dst+stride), 1312cabdff1aSopenharmony_ci [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), 1313cabdff1aSopenharmony_ci [dc]"r"(dc) 1314cabdff1aSopenharmony_ci : "memory" 1315cabdff1aSopenharmony_ci ); 1316cabdff1aSopenharmony_ci#else 1317cabdff1aSopenharmony_ci int i, dc = (block[0] + 4) >> 3; 1318cabdff1aSopenharmony_ci 1319cabdff1aSopenharmony_ci block[0] = 0; 1320cabdff1aSopenharmony_ci 1321cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 1322cabdff1aSopenharmony_ci dst[0] = av_clip_uint8(dst[0] + dc); 1323cabdff1aSopenharmony_ci dst[1] = av_clip_uint8(dst[1] + dc); 1324cabdff1aSopenharmony_ci dst[2] = av_clip_uint8(dst[2] + dc); 1325cabdff1aSopenharmony_ci dst[3] = av_clip_uint8(dst[3] + dc); 1326cabdff1aSopenharmony_ci dst += stride; 1327cabdff1aSopenharmony_ci } 1328cabdff1aSopenharmony_ci#endif 1329cabdff1aSopenharmony_ci} 1330cabdff1aSopenharmony_ci 1331cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], 1332cabdff1aSopenharmony_ci ptrdiff_t stride) 1333cabdff1aSopenharmony_ci{ 1334cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + 0, block[0], stride); 1335cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + 4, block[1], stride); 1336cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + 8, block[2], stride); 1337cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + 12, block[3], stride); 1338cabdff1aSopenharmony_ci} 1339cabdff1aSopenharmony_ci 1340cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], 1341cabdff1aSopenharmony_ci ptrdiff_t stride) 1342cabdff1aSopenharmony_ci{ 1343cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 0, block[0], stride); 1344cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 4, block[1], stride); 1345cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 0, block[2], stride); 1346cabdff1aSopenharmony_ci ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 4, block[3], stride); 1347cabdff1aSopenharmony_ci} 1348cabdff1aSopenharmony_ci 1349cabdff1aSopenharmony_ci// loop filter applied to edges between macroblocks 1350cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, 1351cabdff1aSopenharmony_ci int flim_I, int hev_thresh) 1352cabdff1aSopenharmony_ci{ 1353cabdff1aSopenharmony_ci vp8_v_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh); 1354cabdff1aSopenharmony_ci vp8_v_loop_filter8_mmi(dst + 8, stride, flim_E, flim_I, hev_thresh); 1355cabdff1aSopenharmony_ci} 1356cabdff1aSopenharmony_ci 1357cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, 1358cabdff1aSopenharmony_ci int flim_I, int hev_thresh) 1359cabdff1aSopenharmony_ci{ 1360cabdff1aSopenharmony_ci vp8_h_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh); 1361cabdff1aSopenharmony_ci vp8_h_loop_filter8_mmi(dst + 8 * stride, stride, flim_E, flim_I, 1362cabdff1aSopenharmony_ci hev_thresh); 1363cabdff1aSopenharmony_ci} 1364cabdff1aSopenharmony_ci 1365cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, 1366cabdff1aSopenharmony_ci int flim_E, int flim_I, int hev_thresh) 1367cabdff1aSopenharmony_ci{ 1368cabdff1aSopenharmony_ci vp8_v_loop_filter8_mmi(dstU, stride, flim_E, flim_I, hev_thresh); 1369cabdff1aSopenharmony_ci vp8_v_loop_filter8_mmi(dstV, stride, flim_E, flim_I, hev_thresh); 1370cabdff1aSopenharmony_ci} 1371cabdff1aSopenharmony_ci 1372cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, 1373cabdff1aSopenharmony_ci int flim_E, int flim_I, int hev_thresh) 1374cabdff1aSopenharmony_ci{ 1375cabdff1aSopenharmony_ci vp8_h_loop_filter8_mmi(dstU, stride, flim_E, flim_I, hev_thresh); 1376cabdff1aSopenharmony_ci vp8_h_loop_filter8_mmi(dstV, stride, flim_E, flim_I, hev_thresh); 1377cabdff1aSopenharmony_ci} 1378cabdff1aSopenharmony_ci 1379cabdff1aSopenharmony_ci// loop filter applied to inner macroblock edges 1380cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, 1381cabdff1aSopenharmony_ci int flim_E, int flim_I, int hev_thresh) 1382cabdff1aSopenharmony_ci{ 1383cabdff1aSopenharmony_ci int i; 1384cabdff1aSopenharmony_ci 1385cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) 1386cabdff1aSopenharmony_ci if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) { 1387cabdff1aSopenharmony_ci int hv = hev(dst + i * 1, stride, hev_thresh); 1388cabdff1aSopenharmony_ci if (hv) 1389cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i * 1, stride); 1390cabdff1aSopenharmony_ci else 1391cabdff1aSopenharmony_ci vp8_filter_common_isnot4tap(dst + i * 1, stride); 1392cabdff1aSopenharmony_ci } 1393cabdff1aSopenharmony_ci} 1394cabdff1aSopenharmony_ci 1395cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, 1396cabdff1aSopenharmony_ci int flim_E, int flim_I, int hev_thresh) 1397cabdff1aSopenharmony_ci{ 1398cabdff1aSopenharmony_ci int i; 1399cabdff1aSopenharmony_ci 1400cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) 1401cabdff1aSopenharmony_ci if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) { 1402cabdff1aSopenharmony_ci int hv = hev(dst + i * stride, 1, hev_thresh); 1403cabdff1aSopenharmony_ci if (hv) 1404cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i * stride, 1); 1405cabdff1aSopenharmony_ci else 1406cabdff1aSopenharmony_ci vp8_filter_common_isnot4tap(dst + i * stride, 1); 1407cabdff1aSopenharmony_ci } 1408cabdff1aSopenharmony_ci} 1409cabdff1aSopenharmony_ci 1410cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, 1411cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 1412cabdff1aSopenharmony_ci{ 1413cabdff1aSopenharmony_ci vp8_v_loop_filter8_inner_mmi(dstU, stride, flim_E, flim_I, hev_thresh); 1414cabdff1aSopenharmony_ci vp8_v_loop_filter8_inner_mmi(dstV, stride, flim_E, flim_I, hev_thresh); 1415cabdff1aSopenharmony_ci} 1416cabdff1aSopenharmony_ci 1417cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, 1418cabdff1aSopenharmony_ci ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh) 1419cabdff1aSopenharmony_ci{ 1420cabdff1aSopenharmony_ci vp8_h_loop_filter8_inner_mmi(dstU, stride, flim_E, flim_I, hev_thresh); 1421cabdff1aSopenharmony_ci vp8_h_loop_filter8_inner_mmi(dstV, stride, flim_E, flim_I, hev_thresh); 1422cabdff1aSopenharmony_ci} 1423cabdff1aSopenharmony_ci 1424cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim) 1425cabdff1aSopenharmony_ci{ 1426cabdff1aSopenharmony_ci int i; 1427cabdff1aSopenharmony_ci 1428cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) 1429cabdff1aSopenharmony_ci if (vp8_simple_limit(dst + i, stride, flim)) 1430cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i, stride); 1431cabdff1aSopenharmony_ci} 1432cabdff1aSopenharmony_ci 1433cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim) 1434cabdff1aSopenharmony_ci{ 1435cabdff1aSopenharmony_ci int i; 1436cabdff1aSopenharmony_ci 1437cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) 1438cabdff1aSopenharmony_ci if (vp8_simple_limit(dst + i * stride, 1, flim)) 1439cabdff1aSopenharmony_ci vp8_filter_common_is4tap(dst + i * stride, 1); 1440cabdff1aSopenharmony_ci} 1441cabdff1aSopenharmony_ci 1442cabdff1aSopenharmony_civoid ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1443cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int x, int y) 1444cabdff1aSopenharmony_ci{ 1445cabdff1aSopenharmony_ci#if 1 1446cabdff1aSopenharmony_ci double ftmp[2]; 1447cabdff1aSopenharmony_ci uint64_t tmp[2]; 1448cabdff1aSopenharmony_ci mips_reg addr[2]; 1449cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1450cabdff1aSopenharmony_ci 1451cabdff1aSopenharmony_ci __asm__ volatile ( 1452cabdff1aSopenharmony_ci "1: \n\t" 1453cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1454cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp0], %[src], 0x00) 1455cabdff1aSopenharmony_ci "ldl %[tmp0], 0x0f(%[src]) \n\t" 1456cabdff1aSopenharmony_ci "ldr %[tmp0], 0x08(%[src]) \n\t" 1457cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 1458cabdff1aSopenharmony_ci "ldl %[tmp1], 0x0f(%[addr0]) \n\t" 1459cabdff1aSopenharmony_ci "ldr %[tmp1], 0x08(%[addr0]) \n\t" 1460cabdff1aSopenharmony_ci PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1461cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dst], 0x00) 1462cabdff1aSopenharmony_ci "sdl %[tmp0], 0x0f(%[dst]) \n\t" 1463cabdff1aSopenharmony_ci "sdr %[tmp0], 0x08(%[dst]) \n\t" 1464cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x02 \n\t" 1465cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[addr1], 0x00) 1466cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1467cabdff1aSopenharmony_ci "sdl %[tmp1], 0x0f(%[addr1]) \n\t" 1468cabdff1aSopenharmony_ci "sdr %[tmp1], 0x08(%[addr1]) \n\t" 1469cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1470cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1471cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1472cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 1473cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1474cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 1475cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 1476cabdff1aSopenharmony_ci [h]"+&r"(h) 1477cabdff1aSopenharmony_ci : [dststride]"r"((mips_reg)dststride), 1478cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride) 1479cabdff1aSopenharmony_ci : "memory" 1480cabdff1aSopenharmony_ci ); 1481cabdff1aSopenharmony_ci#else 1482cabdff1aSopenharmony_ci int i; 1483cabdff1aSopenharmony_ci 1484cabdff1aSopenharmony_ci for (i = 0; i < h; i++, dst += dststride, src += srcstride) 1485cabdff1aSopenharmony_ci memcpy(dst, src, 16); 1486cabdff1aSopenharmony_ci#endif 1487cabdff1aSopenharmony_ci} 1488cabdff1aSopenharmony_ci 1489cabdff1aSopenharmony_civoid ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1490cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int x, int y) 1491cabdff1aSopenharmony_ci{ 1492cabdff1aSopenharmony_ci#if 1 1493cabdff1aSopenharmony_ci double ftmp[1]; 1494cabdff1aSopenharmony_ci uint64_t tmp[1]; 1495cabdff1aSopenharmony_ci mips_reg addr[2]; 1496cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1497cabdff1aSopenharmony_ci 1498cabdff1aSopenharmony_ci __asm__ volatile ( 1499cabdff1aSopenharmony_ci "1: \n\t" 1500cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1501cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp0], %[src], 0x00) 1502cabdff1aSopenharmony_ci "ldl %[tmp0], 0x07(%[addr0]) \n\t" 1503cabdff1aSopenharmony_ci "ldr %[tmp0], 0x00(%[addr0]) \n\t" 1504cabdff1aSopenharmony_ci PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1505cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dst], 0x00) 1506cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x02 \n\t" 1507cabdff1aSopenharmony_ci "sdl %[tmp0], 0x07(%[addr1]) \n\t" 1508cabdff1aSopenharmony_ci "sdr %[tmp0], 0x00(%[addr1]) \n\t" 1509cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1510cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1511cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1512cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]), 1513cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1514cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 1515cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 1516cabdff1aSopenharmony_ci [h]"+&r"(h) 1517cabdff1aSopenharmony_ci : [dststride]"r"((mips_reg)dststride), 1518cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride) 1519cabdff1aSopenharmony_ci : "memory" 1520cabdff1aSopenharmony_ci ); 1521cabdff1aSopenharmony_ci#else 1522cabdff1aSopenharmony_ci int i; 1523cabdff1aSopenharmony_ci 1524cabdff1aSopenharmony_ci for (i = 0; i < h; i++, dst += dststride, src += srcstride) 1525cabdff1aSopenharmony_ci memcpy(dst, src, 8); 1526cabdff1aSopenharmony_ci#endif 1527cabdff1aSopenharmony_ci} 1528cabdff1aSopenharmony_ci 1529cabdff1aSopenharmony_civoid ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1530cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int x, int y) 1531cabdff1aSopenharmony_ci{ 1532cabdff1aSopenharmony_ci#if 1 1533cabdff1aSopenharmony_ci double ftmp[1]; 1534cabdff1aSopenharmony_ci uint64_t tmp[1]; 1535cabdff1aSopenharmony_ci mips_reg addr[2]; 1536cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1537cabdff1aSopenharmony_ci 1538cabdff1aSopenharmony_ci __asm__ volatile ( 1539cabdff1aSopenharmony_ci "1: \n\t" 1540cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1541cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp0], %[src], 0x00) 1542cabdff1aSopenharmony_ci "lwl %[tmp0], 0x03(%[addr0]) \n\t" 1543cabdff1aSopenharmony_ci "lwr %[tmp0], 0x00(%[addr0]) \n\t" 1544cabdff1aSopenharmony_ci PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1545cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp0], %[dst], 0x00) 1546cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x02 \n\t" 1547cabdff1aSopenharmony_ci "swl %[tmp0], 0x03(%[addr1]) \n\t" 1548cabdff1aSopenharmony_ci "swr %[tmp0], 0x00(%[addr1]) \n\t" 1549cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1550cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1551cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1552cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]), 1553cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1554cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 1555cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 1556cabdff1aSopenharmony_ci [h]"+&r"(h) 1557cabdff1aSopenharmony_ci : [dststride]"r"((mips_reg)dststride), 1558cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride) 1559cabdff1aSopenharmony_ci : "memory" 1560cabdff1aSopenharmony_ci ); 1561cabdff1aSopenharmony_ci#else 1562cabdff1aSopenharmony_ci int i; 1563cabdff1aSopenharmony_ci 1564cabdff1aSopenharmony_ci for (i = 0; i < h; i++, dst += dststride, src += srcstride) 1565cabdff1aSopenharmony_ci memcpy(dst, src, 4); 1566cabdff1aSopenharmony_ci#endif 1567cabdff1aSopenharmony_ci} 1568cabdff1aSopenharmony_ci 1569cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1570cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1571cabdff1aSopenharmony_ci{ 1572cabdff1aSopenharmony_ci#if 1 1573cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1574cabdff1aSopenharmony_ci double ftmp[9]; 1575cabdff1aSopenharmony_ci uint32_t tmp[1]; 1576cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1577cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1578cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1579cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1580cabdff1aSopenharmony_ci mips_reg src1, dst1; 1581cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1582cabdff1aSopenharmony_ci filter1.i = filter[1]; 1583cabdff1aSopenharmony_ci filter2.i = filter[2]; 1584cabdff1aSopenharmony_ci filter3.i = filter[3]; 1585cabdff1aSopenharmony_ci filter4.i = filter[4]; 1586cabdff1aSopenharmony_ci 1587cabdff1aSopenharmony_ci /* 1588cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; 1589cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7]; 1590cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7]; 1591cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7]; 1592cabdff1aSopenharmony_ci dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7]; 1593cabdff1aSopenharmony_ci dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7]; 1594cabdff1aSopenharmony_ci dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7]; 1595cabdff1aSopenharmony_ci dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7]; 1596cabdff1aSopenharmony_ci 1597cabdff1aSopenharmony_ci dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 7] + filter[3] * src[ 9] - filter[4] * src[10] + 64) >> 7]; 1598cabdff1aSopenharmony_ci dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 8] + filter[3] * src[10] - filter[4] * src[11] + 64) >> 7]; 1599cabdff1aSopenharmony_ci dst[10] = cm[(filter[2] * src[10] - filter[1] * src[ 9] + filter[3] * src[11] - filter[4] * src[12] + 64) >> 7]; 1600cabdff1aSopenharmony_ci dst[11] = cm[(filter[2] * src[11] - filter[1] * src[10] + filter[3] * src[12] - filter[4] * src[13] + 64) >> 7]; 1601cabdff1aSopenharmony_ci dst[12] = cm[(filter[2] * src[12] - filter[1] * src[11] + filter[3] * src[13] - filter[4] * src[14] + 64) >> 7]; 1602cabdff1aSopenharmony_ci dst[13] = cm[(filter[2] * src[13] - filter[1] * src[12] + filter[3] * src[14] - filter[4] * src[15] + 64) >> 7]; 1603cabdff1aSopenharmony_ci dst[14] = cm[(filter[2] * src[14] - filter[1] * src[13] + filter[3] * src[15] - filter[4] * src[16] + 64) >> 7]; 1604cabdff1aSopenharmony_ci dst[15] = cm[(filter[2] * src[15] - filter[1] * src[14] + filter[3] * src[16] - filter[4] * src[17] + 64) >> 7]; 1605cabdff1aSopenharmony_ci */ 1606cabdff1aSopenharmony_ci __asm__ volatile ( 1607cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1608cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1609cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1610cabdff1aSopenharmony_ci 1611cabdff1aSopenharmony_ci "1: \n\t" 1612cabdff1aSopenharmony_ci // 0 - 7 1613cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H4_MMI(%[src], %[dst]) 1614cabdff1aSopenharmony_ci PTR_ADDIU "%[src1], %[src], 0x08 \n\t" 1615cabdff1aSopenharmony_ci PTR_ADDIU "%[dst1], %[dst], 0x08 \n\t" 1616cabdff1aSopenharmony_ci // 8 - 15 1617cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H4_MMI(%[src1], %[dst1]) 1618cabdff1aSopenharmony_ci 1619cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1620cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1621cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1622cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1623cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1624cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1625cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1626cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1627cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 1628cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1629cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1630cabdff1aSopenharmony_ci [dst1]"=&r"(dst1), [src1]"=&r"(src1), 1631cabdff1aSopenharmony_ci [h]"+&r"(h), 1632cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1633cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1634cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1635cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 1636cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 1637cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 1638cabdff1aSopenharmony_ci : "memory" 1639cabdff1aSopenharmony_ci ); 1640cabdff1aSopenharmony_ci#else 1641cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 1642cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 1643cabdff1aSopenharmony_ci int x, y; 1644cabdff1aSopenharmony_ci 1645cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 1646cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 1647cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, 1); 1648cabdff1aSopenharmony_ci dst += dststride; 1649cabdff1aSopenharmony_ci src += srcstride; 1650cabdff1aSopenharmony_ci } 1651cabdff1aSopenharmony_ci#endif 1652cabdff1aSopenharmony_ci} 1653cabdff1aSopenharmony_ci 1654cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1655cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1656cabdff1aSopenharmony_ci{ 1657cabdff1aSopenharmony_ci#if 1 1658cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1659cabdff1aSopenharmony_ci double ftmp[9]; 1660cabdff1aSopenharmony_ci uint32_t tmp[1]; 1661cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1662cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1663cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1664cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1665cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1666cabdff1aSopenharmony_ci filter1.i = filter[1]; 1667cabdff1aSopenharmony_ci filter2.i = filter[2]; 1668cabdff1aSopenharmony_ci filter3.i = filter[3]; 1669cabdff1aSopenharmony_ci filter4.i = filter[4]; 1670cabdff1aSopenharmony_ci 1671cabdff1aSopenharmony_ci 1672cabdff1aSopenharmony_ci /* 1673cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; 1674cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7]; 1675cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7]; 1676cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7]; 1677cabdff1aSopenharmony_ci dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7]; 1678cabdff1aSopenharmony_ci dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7]; 1679cabdff1aSopenharmony_ci dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7]; 1680cabdff1aSopenharmony_ci dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7]; 1681cabdff1aSopenharmony_ci */ 1682cabdff1aSopenharmony_ci __asm__ volatile ( 1683cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1684cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1685cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1686cabdff1aSopenharmony_ci 1687cabdff1aSopenharmony_ci "1: \n\t" 1688cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H4_MMI(%[src], %[dst]) 1689cabdff1aSopenharmony_ci 1690cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1691cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1692cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1693cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1694cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1695cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1696cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1697cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1698cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 1699cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1700cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1701cabdff1aSopenharmony_ci [h]"+&r"(h), 1702cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1703cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1704cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1705cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 1706cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 1707cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 1708cabdff1aSopenharmony_ci : "memory" 1709cabdff1aSopenharmony_ci ); 1710cabdff1aSopenharmony_ci#else 1711cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 1712cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 1713cabdff1aSopenharmony_ci int x, y; 1714cabdff1aSopenharmony_ci 1715cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 1716cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 1717cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, 1); 1718cabdff1aSopenharmony_ci dst += dststride; 1719cabdff1aSopenharmony_ci src += srcstride; 1720cabdff1aSopenharmony_ci } 1721cabdff1aSopenharmony_ci#endif 1722cabdff1aSopenharmony_ci} 1723cabdff1aSopenharmony_ci 1724cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1725cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1726cabdff1aSopenharmony_ci{ 1727cabdff1aSopenharmony_ci#if 1 1728cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1729cabdff1aSopenharmony_ci double ftmp[6]; 1730cabdff1aSopenharmony_ci uint32_t tmp[1]; 1731cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1732cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1733cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1734cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1735cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1736cabdff1aSopenharmony_ci filter1.i = filter[1]; 1737cabdff1aSopenharmony_ci filter2.i = filter[2]; 1738cabdff1aSopenharmony_ci filter3.i = filter[3]; 1739cabdff1aSopenharmony_ci filter4.i = filter[4]; 1740cabdff1aSopenharmony_ci 1741cabdff1aSopenharmony_ci /* 1742cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; 1743cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7]; 1744cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7]; 1745cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7]; 1746cabdff1aSopenharmony_ci */ 1747cabdff1aSopenharmony_ci __asm__ volatile ( 1748cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1749cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1750cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1751cabdff1aSopenharmony_ci 1752cabdff1aSopenharmony_ci "1: \n\t" 1753cabdff1aSopenharmony_ci PUT_VP8_EPEL4_H4_MMI(%[src], %[dst]) 1754cabdff1aSopenharmony_ci 1755cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1756cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1757cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1758cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1759cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1760cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1761cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1762cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1763cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1764cabdff1aSopenharmony_ci [h]"+&r"(h), 1765cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1766cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1767cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1768cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 1769cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 1770cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 1771cabdff1aSopenharmony_ci : "memory" 1772cabdff1aSopenharmony_ci ); 1773cabdff1aSopenharmony_ci#else 1774cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 1775cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 1776cabdff1aSopenharmony_ci int x, y; 1777cabdff1aSopenharmony_ci 1778cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 1779cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 1780cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, 1); 1781cabdff1aSopenharmony_ci dst += dststride; 1782cabdff1aSopenharmony_ci src += srcstride; 1783cabdff1aSopenharmony_ci } 1784cabdff1aSopenharmony_ci#endif 1785cabdff1aSopenharmony_ci} 1786cabdff1aSopenharmony_ci 1787cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1788cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1789cabdff1aSopenharmony_ci{ 1790cabdff1aSopenharmony_ci#if 1 1791cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1792cabdff1aSopenharmony_ci double ftmp[9]; 1793cabdff1aSopenharmony_ci uint32_t tmp[1]; 1794cabdff1aSopenharmony_ci mips_reg src1, dst1; 1795cabdff1aSopenharmony_ci union av_intfloat64 filter0; 1796cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1797cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1798cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1799cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1800cabdff1aSopenharmony_ci union av_intfloat64 filter5; 1801cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1802cabdff1aSopenharmony_ci filter0.i = filter[0]; 1803cabdff1aSopenharmony_ci filter1.i = filter[1]; 1804cabdff1aSopenharmony_ci filter2.i = filter[2]; 1805cabdff1aSopenharmony_ci filter3.i = filter[3]; 1806cabdff1aSopenharmony_ci filter4.i = filter[4]; 1807cabdff1aSopenharmony_ci filter5.i = filter[5]; 1808cabdff1aSopenharmony_ci 1809cabdff1aSopenharmony_ci /* 1810cabdff1aSopenharmony_ci dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7]; 1811cabdff1aSopenharmony_ci dst[ 1] = cm[(filter[2]*src[ 1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[ 2] - filter[4]*src[ 3] + filter[5]*src[ 4] + 64) >> 7]; 1812cabdff1aSopenharmony_ci dst[ 2] = cm[(filter[2]*src[ 2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[ 3] - filter[4]*src[ 4] + filter[5]*src[ 5] + 64) >> 7]; 1813cabdff1aSopenharmony_ci dst[ 3] = cm[(filter[2]*src[ 3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[ 4] - filter[4]*src[ 5] + filter[5]*src[ 6] + 64) >> 7]; 1814cabdff1aSopenharmony_ci dst[ 4] = cm[(filter[2]*src[ 4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[ 5] - filter[4]*src[ 6] + filter[5]*src[ 7] + 64) >> 7]; 1815cabdff1aSopenharmony_ci dst[ 5] = cm[(filter[2]*src[ 5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[ 6] - filter[4]*src[ 7] + filter[5]*src[ 8] + 64) >> 7]; 1816cabdff1aSopenharmony_ci dst[ 6] = cm[(filter[2]*src[ 6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[ 7] - filter[4]*src[ 8] + filter[5]*src[ 9] + 64) >> 7]; 1817cabdff1aSopenharmony_ci dst[ 7] = cm[(filter[2]*src[ 7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[ 8] - filter[4]*src[ 9] + filter[5]*src[10] + 64) >> 7]; 1818cabdff1aSopenharmony_ci 1819cabdff1aSopenharmony_ci dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 7] + filter[0]*src[ 6] + filter[3]*src[ 9] - filter[4]*src[10] + filter[5]*src[11] + 64) >> 7]; 1820cabdff1aSopenharmony_ci dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 8] + filter[0]*src[ 7] + filter[3]*src[10] - filter[4]*src[11] + filter[5]*src[12] + 64) >> 7]; 1821cabdff1aSopenharmony_ci dst[10] = cm[(filter[2]*src[10] - filter[1]*src[ 9] + filter[0]*src[ 8] + filter[3]*src[11] - filter[4]*src[12] + filter[5]*src[13] + 64) >> 7]; 1822cabdff1aSopenharmony_ci dst[11] = cm[(filter[2]*src[11] - filter[1]*src[10] + filter[0]*src[ 9] + filter[3]*src[12] - filter[4]*src[13] + filter[5]*src[14] + 64) >> 7]; 1823cabdff1aSopenharmony_ci dst[12] = cm[(filter[2]*src[12] - filter[1]*src[11] + filter[0]*src[10] + filter[3]*src[13] - filter[4]*src[14] + filter[5]*src[15] + 64) >> 7]; 1824cabdff1aSopenharmony_ci dst[13] = cm[(filter[2]*src[13] - filter[1]*src[12] + filter[0]*src[11] + filter[3]*src[14] - filter[4]*src[15] + filter[5]*src[16] + 64) >> 7]; 1825cabdff1aSopenharmony_ci dst[14] = cm[(filter[2]*src[14] - filter[1]*src[13] + filter[0]*src[12] + filter[3]*src[15] - filter[4]*src[16] + filter[5]*src[17] + 64) >> 7]; 1826cabdff1aSopenharmony_ci dst[15] = cm[(filter[2]*src[15] - filter[1]*src[14] + filter[0]*src[13] + filter[3]*src[16] - filter[4]*src[17] + filter[5]*src[18] + 64) >> 7]; 1827cabdff1aSopenharmony_ci */ 1828cabdff1aSopenharmony_ci __asm__ volatile ( 1829cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1830cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1831cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1832cabdff1aSopenharmony_ci 1833cabdff1aSopenharmony_ci "1: \n\t" 1834cabdff1aSopenharmony_ci // 0 - 7 1835cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H6_MMI(%[src], %[dst]) 1836cabdff1aSopenharmony_ci PTR_ADDIU "%[src1], %[src], 0x08 \n\t" 1837cabdff1aSopenharmony_ci PTR_ADDIU "%[dst1], %[dst], 0x08 \n\t" 1838cabdff1aSopenharmony_ci // 8 - 15 1839cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H6_MMI(%[src1], %[dst1]) 1840cabdff1aSopenharmony_ci 1841cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1842cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1843cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1844cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1845cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1846cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1847cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1848cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1849cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 1850cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1851cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1852cabdff1aSopenharmony_ci [dst1]"=&r"(dst1), [src1]"=&r"(src1), 1853cabdff1aSopenharmony_ci [h]"+&r"(h), 1854cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1855cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1856cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1857cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 1858cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 1859cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 1860cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 1861cabdff1aSopenharmony_ci : "memory" 1862cabdff1aSopenharmony_ci ); 1863cabdff1aSopenharmony_ci#else 1864cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 1865cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 1866cabdff1aSopenharmony_ci int x, y; 1867cabdff1aSopenharmony_ci 1868cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 1869cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 1870cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, 1); 1871cabdff1aSopenharmony_ci dst += dststride; 1872cabdff1aSopenharmony_ci src += srcstride; 1873cabdff1aSopenharmony_ci } 1874cabdff1aSopenharmony_ci#endif 1875cabdff1aSopenharmony_ci} 1876cabdff1aSopenharmony_ci 1877cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1878cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1879cabdff1aSopenharmony_ci{ 1880cabdff1aSopenharmony_ci#if 1 1881cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1882cabdff1aSopenharmony_ci double ftmp[9]; 1883cabdff1aSopenharmony_ci uint32_t tmp[1]; 1884cabdff1aSopenharmony_ci union av_intfloat64 filter0; 1885cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1886cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1887cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1888cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1889cabdff1aSopenharmony_ci union av_intfloat64 filter5; 1890cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 1891cabdff1aSopenharmony_ci filter0.i = filter[0]; 1892cabdff1aSopenharmony_ci filter1.i = filter[1]; 1893cabdff1aSopenharmony_ci filter2.i = filter[2]; 1894cabdff1aSopenharmony_ci filter3.i = filter[3]; 1895cabdff1aSopenharmony_ci filter4.i = filter[4]; 1896cabdff1aSopenharmony_ci filter5.i = filter[5]; 1897cabdff1aSopenharmony_ci 1898cabdff1aSopenharmony_ci /* 1899cabdff1aSopenharmony_ci dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7]; 1900cabdff1aSopenharmony_ci dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7]; 1901cabdff1aSopenharmony_ci dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7]; 1902cabdff1aSopenharmony_ci dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7]; 1903cabdff1aSopenharmony_ci dst[4] = cm[(filter[2]*src[4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[5] - filter[4]*src[6] + filter[5]*src[ 7] + 64) >> 7]; 1904cabdff1aSopenharmony_ci dst[5] = cm[(filter[2]*src[5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[6] - filter[4]*src[7] + filter[5]*src[ 8] + 64) >> 7]; 1905cabdff1aSopenharmony_ci dst[6] = cm[(filter[2]*src[6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[7] - filter[4]*src[8] + filter[5]*src[ 9] + 64) >> 7]; 1906cabdff1aSopenharmony_ci dst[7] = cm[(filter[2]*src[7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[8] - filter[4]*src[9] + filter[5]*src[10] + 64) >> 7]; 1907cabdff1aSopenharmony_ci */ 1908cabdff1aSopenharmony_ci __asm__ volatile ( 1909cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1910cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1911cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1912cabdff1aSopenharmony_ci 1913cabdff1aSopenharmony_ci "1: \n\t" 1914cabdff1aSopenharmony_ci PUT_VP8_EPEL8_H6_MMI(%[src], %[dst]) 1915cabdff1aSopenharmony_ci 1916cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1917cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1918cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1919cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1920cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1921cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1922cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1923cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1924cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 1925cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1926cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 1927cabdff1aSopenharmony_ci [h]"+&r"(h), 1928cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1929cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1930cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1931cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 1932cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 1933cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 1934cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 1935cabdff1aSopenharmony_ci : "memory" 1936cabdff1aSopenharmony_ci ); 1937cabdff1aSopenharmony_ci#else 1938cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 1939cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 1940cabdff1aSopenharmony_ci int x, y; 1941cabdff1aSopenharmony_ci 1942cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 1943cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 1944cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, 1); 1945cabdff1aSopenharmony_ci dst += dststride; 1946cabdff1aSopenharmony_ci src += srcstride; 1947cabdff1aSopenharmony_ci } 1948cabdff1aSopenharmony_ci#endif 1949cabdff1aSopenharmony_ci} 1950cabdff1aSopenharmony_ci 1951cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 1952cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 1953cabdff1aSopenharmony_ci{ 1954cabdff1aSopenharmony_ci#if 1 1955cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[mx - 1]; 1956cabdff1aSopenharmony_ci double ftmp[6]; 1957cabdff1aSopenharmony_ci uint32_t tmp[1]; 1958cabdff1aSopenharmony_ci union av_intfloat64 filter0; 1959cabdff1aSopenharmony_ci union av_intfloat64 filter1; 1960cabdff1aSopenharmony_ci union av_intfloat64 filter2; 1961cabdff1aSopenharmony_ci union av_intfloat64 filter3; 1962cabdff1aSopenharmony_ci union av_intfloat64 filter4; 1963cabdff1aSopenharmony_ci union av_intfloat64 filter5; 1964cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1965cabdff1aSopenharmony_ci filter0.i = filter[0]; 1966cabdff1aSopenharmony_ci filter1.i = filter[1]; 1967cabdff1aSopenharmony_ci filter2.i = filter[2]; 1968cabdff1aSopenharmony_ci filter3.i = filter[3]; 1969cabdff1aSopenharmony_ci filter4.i = filter[4]; 1970cabdff1aSopenharmony_ci filter5.i = filter[5]; 1971cabdff1aSopenharmony_ci 1972cabdff1aSopenharmony_ci /* 1973cabdff1aSopenharmony_ci dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7]; 1974cabdff1aSopenharmony_ci dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7]; 1975cabdff1aSopenharmony_ci dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7]; 1976cabdff1aSopenharmony_ci dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7]; 1977cabdff1aSopenharmony_ci */ 1978cabdff1aSopenharmony_ci __asm__ volatile ( 1979cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1980cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1981cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1982cabdff1aSopenharmony_ci 1983cabdff1aSopenharmony_ci "1: \n\t" 1984cabdff1aSopenharmony_ci PUT_VP8_EPEL4_H6_MMI(%[src], %[dst]) 1985cabdff1aSopenharmony_ci 1986cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 1987cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1988cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1989cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 1990cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1991cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1992cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1993cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 1994cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1995cabdff1aSopenharmony_ci [h]"+&r"(h), 1996cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 1997cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 1998cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 1999cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2000cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 2001cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 2002cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 2003cabdff1aSopenharmony_ci : "memory" 2004cabdff1aSopenharmony_ci ); 2005cabdff1aSopenharmony_ci#else 2006cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2007cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2008cabdff1aSopenharmony_ci int x, y; 2009cabdff1aSopenharmony_ci 2010cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2011cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2012cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, 1); 2013cabdff1aSopenharmony_ci dst += dststride; 2014cabdff1aSopenharmony_ci src += srcstride; 2015cabdff1aSopenharmony_ci } 2016cabdff1aSopenharmony_ci#endif 2017cabdff1aSopenharmony_ci} 2018cabdff1aSopenharmony_ci 2019cabdff1aSopenharmony_civoid ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2020cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2021cabdff1aSopenharmony_ci{ 2022cabdff1aSopenharmony_ci#if 1 2023cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2024cabdff1aSopenharmony_ci double ftmp[9]; 2025cabdff1aSopenharmony_ci uint32_t tmp[1]; 2026cabdff1aSopenharmony_ci mips_reg src0, src1, dst0; 2027cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2028cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2029cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2030cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2031cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2032cabdff1aSopenharmony_ci filter1.i = filter[1]; 2033cabdff1aSopenharmony_ci filter2.i = filter[2]; 2034cabdff1aSopenharmony_ci filter3.i = filter[3]; 2035cabdff1aSopenharmony_ci filter4.i = filter[4]; 2036cabdff1aSopenharmony_ci 2037cabdff1aSopenharmony_ci /* 2038cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; 2039cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7]; 2040cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7]; 2041cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7]; 2042cabdff1aSopenharmony_ci dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7]; 2043cabdff1aSopenharmony_ci dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7]; 2044cabdff1aSopenharmony_ci dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7]; 2045cabdff1aSopenharmony_ci dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7]; 2046cabdff1aSopenharmony_ci 2047cabdff1aSopenharmony_ci dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 8-srcstride] + filter[3] * src[ 8+srcstride] - filter[4] * src[ 8+2*srcstride] + 64) >> 7]; 2048cabdff1aSopenharmony_ci dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 9-srcstride] + filter[3] * src[ 9+srcstride] - filter[4] * src[ 9+2*srcstride] + 64) >> 7]; 2049cabdff1aSopenharmony_ci dst[10] = cm[(filter[2] * src[10] - filter[1] * src[10-srcstride] + filter[3] * src[10+srcstride] - filter[4] * src[10+2*srcstride] + 64) >> 7]; 2050cabdff1aSopenharmony_ci dst[11] = cm[(filter[2] * src[11] - filter[1] * src[11-srcstride] + filter[3] * src[11+srcstride] - filter[4] * src[11+2*srcstride] + 64) >> 7]; 2051cabdff1aSopenharmony_ci dst[12] = cm[(filter[2] * src[12] - filter[1] * src[12-srcstride] + filter[3] * src[12+srcstride] - filter[4] * src[12+2*srcstride] + 64) >> 7]; 2052cabdff1aSopenharmony_ci dst[13] = cm[(filter[2] * src[13] - filter[1] * src[13-srcstride] + filter[3] * src[13+srcstride] - filter[4] * src[13+2*srcstride] + 64) >> 7]; 2053cabdff1aSopenharmony_ci dst[14] = cm[(filter[2] * src[14] - filter[1] * src[14-srcstride] + filter[3] * src[14+srcstride] - filter[4] * src[14+2*srcstride] + 64) >> 7]; 2054cabdff1aSopenharmony_ci dst[15] = cm[(filter[2] * src[15] - filter[1] * src[15-srcstride] + filter[3] * src[15+srcstride] - filter[4] * src[15+2*srcstride] + 64) >> 7]; 2055cabdff1aSopenharmony_ci */ 2056cabdff1aSopenharmony_ci __asm__ volatile ( 2057cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2058cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2059cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2060cabdff1aSopenharmony_ci 2061cabdff1aSopenharmony_ci "1: \n\t" 2062cabdff1aSopenharmony_ci // 0 - 7 2063cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride]) 2064cabdff1aSopenharmony_ci PTR_ADDIU "%[src0], %[src], 0x08 \n\t" 2065cabdff1aSopenharmony_ci PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t" 2066cabdff1aSopenharmony_ci // 8 - 15 2067cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V4_MMI(%[src0], %[src1], %[dst], %[srcstride]) 2068cabdff1aSopenharmony_ci 2069cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2070cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2071cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2072cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2073cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2074cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2075cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2076cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2077cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 2078cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2079cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2080cabdff1aSopenharmony_ci [src0]"=&r"(src0), [dst0]"=&r"(dst0), 2081cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2082cabdff1aSopenharmony_ci [h]"+&r"(h), 2083cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2084cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2085cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2086cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2087cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 2088cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 2089cabdff1aSopenharmony_ci : "memory" 2090cabdff1aSopenharmony_ci ); 2091cabdff1aSopenharmony_ci#else 2092cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2093cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2094cabdff1aSopenharmony_ci int x, y; 2095cabdff1aSopenharmony_ci 2096cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2097cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2098cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, srcstride); 2099cabdff1aSopenharmony_ci dst += dststride; 2100cabdff1aSopenharmony_ci src += srcstride; 2101cabdff1aSopenharmony_ci } 2102cabdff1aSopenharmony_ci#endif 2103cabdff1aSopenharmony_ci} 2104cabdff1aSopenharmony_ci 2105cabdff1aSopenharmony_civoid ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2106cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2107cabdff1aSopenharmony_ci{ 2108cabdff1aSopenharmony_ci#if 1 2109cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2110cabdff1aSopenharmony_ci double ftmp[9]; 2111cabdff1aSopenharmony_ci uint32_t tmp[1]; 2112cabdff1aSopenharmony_ci mips_reg src1; 2113cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2114cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2115cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2116cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2117cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2118cabdff1aSopenharmony_ci filter1.i = filter[1]; 2119cabdff1aSopenharmony_ci filter2.i = filter[2]; 2120cabdff1aSopenharmony_ci filter3.i = filter[3]; 2121cabdff1aSopenharmony_ci filter4.i = filter[4]; 2122cabdff1aSopenharmony_ci 2123cabdff1aSopenharmony_ci /* 2124cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; 2125cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7]; 2126cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7]; 2127cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7]; 2128cabdff1aSopenharmony_ci dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7]; 2129cabdff1aSopenharmony_ci dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7]; 2130cabdff1aSopenharmony_ci dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7]; 2131cabdff1aSopenharmony_ci dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7]; 2132cabdff1aSopenharmony_ci */ 2133cabdff1aSopenharmony_ci __asm__ volatile ( 2134cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2135cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2136cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2137cabdff1aSopenharmony_ci 2138cabdff1aSopenharmony_ci "1: \n\t" 2139cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride]) 2140cabdff1aSopenharmony_ci 2141cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2142cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2143cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2144cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2145cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2146cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2147cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2148cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2149cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 2150cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2151cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2152cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2153cabdff1aSopenharmony_ci [h]"+&r"(h), 2154cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2155cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2156cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2157cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2158cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 2159cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 2160cabdff1aSopenharmony_ci : "memory" 2161cabdff1aSopenharmony_ci ); 2162cabdff1aSopenharmony_ci#else 2163cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2164cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2165cabdff1aSopenharmony_ci int x, y; 2166cabdff1aSopenharmony_ci 2167cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2168cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2169cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, srcstride); 2170cabdff1aSopenharmony_ci dst += dststride; 2171cabdff1aSopenharmony_ci src += srcstride; 2172cabdff1aSopenharmony_ci } 2173cabdff1aSopenharmony_ci#endif 2174cabdff1aSopenharmony_ci} 2175cabdff1aSopenharmony_ci 2176cabdff1aSopenharmony_civoid ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2177cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2178cabdff1aSopenharmony_ci{ 2179cabdff1aSopenharmony_ci#if 1 2180cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2181cabdff1aSopenharmony_ci double ftmp[6]; 2182cabdff1aSopenharmony_ci uint32_t tmp[1]; 2183cabdff1aSopenharmony_ci mips_reg src1; 2184cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2185cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2186cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2187cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2188cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 2189cabdff1aSopenharmony_ci filter1.i = filter[1]; 2190cabdff1aSopenharmony_ci filter2.i = filter[2]; 2191cabdff1aSopenharmony_ci filter3.i = filter[3]; 2192cabdff1aSopenharmony_ci filter4.i = filter[4]; 2193cabdff1aSopenharmony_ci 2194cabdff1aSopenharmony_ci /* 2195cabdff1aSopenharmony_ci dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; 2196cabdff1aSopenharmony_ci dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7]; 2197cabdff1aSopenharmony_ci dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7]; 2198cabdff1aSopenharmony_ci dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7]; 2199cabdff1aSopenharmony_ci */ 2200cabdff1aSopenharmony_ci __asm__ volatile ( 2201cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2202cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2203cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2204cabdff1aSopenharmony_ci 2205cabdff1aSopenharmony_ci "1: \n\t" 2206cabdff1aSopenharmony_ci PUT_VP8_EPEL4_V4_MMI(%[src], %[src1], %[dst], %[srcstride]) 2207cabdff1aSopenharmony_ci 2208cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2209cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2210cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2211cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2212cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2213cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2214cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2215cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2216cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 2217cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2218cabdff1aSopenharmony_ci [h]"+&r"(h), 2219cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2220cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2221cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2222cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2223cabdff1aSopenharmony_ci [filter1]"f"(filter1.f), [filter2]"f"(filter2.f), 2224cabdff1aSopenharmony_ci [filter3]"f"(filter3.f), [filter4]"f"(filter4.f) 2225cabdff1aSopenharmony_ci : "memory" 2226cabdff1aSopenharmony_ci ); 2227cabdff1aSopenharmony_ci#else 2228cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2229cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2230cabdff1aSopenharmony_ci int x, y; 2231cabdff1aSopenharmony_ci 2232cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2233cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2234cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(src, filter, srcstride); 2235cabdff1aSopenharmony_ci dst += dststride; 2236cabdff1aSopenharmony_ci src += srcstride; 2237cabdff1aSopenharmony_ci } 2238cabdff1aSopenharmony_ci#endif 2239cabdff1aSopenharmony_ci} 2240cabdff1aSopenharmony_ci 2241cabdff1aSopenharmony_civoid ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2242cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2243cabdff1aSopenharmony_ci{ 2244cabdff1aSopenharmony_ci#if 1 2245cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2246cabdff1aSopenharmony_ci double ftmp[9]; 2247cabdff1aSopenharmony_ci uint32_t tmp[1]; 2248cabdff1aSopenharmony_ci mips_reg src0, src1, dst0; 2249cabdff1aSopenharmony_ci union av_intfloat64 filter0; 2250cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2251cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2252cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2253cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2254cabdff1aSopenharmony_ci union av_intfloat64 filter5; 2255cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2256cabdff1aSopenharmony_ci filter0.i = filter[0]; 2257cabdff1aSopenharmony_ci filter1.i = filter[1]; 2258cabdff1aSopenharmony_ci filter2.i = filter[2]; 2259cabdff1aSopenharmony_ci filter3.i = filter[3]; 2260cabdff1aSopenharmony_ci filter4.i = filter[4]; 2261cabdff1aSopenharmony_ci filter5.i = filter[5]; 2262cabdff1aSopenharmony_ci 2263cabdff1aSopenharmony_ci /* 2264cabdff1aSopenharmony_ci dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; 2265cabdff1aSopenharmony_ci dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7]; 2266cabdff1aSopenharmony_ci dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7]; 2267cabdff1aSopenharmony_ci dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7]; 2268cabdff1aSopenharmony_ci dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7]; 2269cabdff1aSopenharmony_ci dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7]; 2270cabdff1aSopenharmony_ci dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7]; 2271cabdff1aSopenharmony_ci dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7]; 2272cabdff1aSopenharmony_ci 2273cabdff1aSopenharmony_ci dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 8-srcstride] + filter[0]*src[ 8-2*srcstride] + filter[3]*src[ 8+srcstride] - filter[4]*src[ 8+2*srcstride] + filter[5]*src[ 8+3*srcstride] + 64) >> 7]; 2274cabdff1aSopenharmony_ci dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 9-srcstride] + filter[0]*src[ 9-2*srcstride] + filter[3]*src[ 9+srcstride] - filter[4]*src[ 9+2*srcstride] + filter[5]*src[ 9+3*srcstride] + 64) >> 7]; 2275cabdff1aSopenharmony_ci dst[10] = cm[(filter[2]*src[10] - filter[1]*src[10-srcstride] + filter[0]*src[10-2*srcstride] + filter[3]*src[10+srcstride] - filter[4]*src[10+2*srcstride] + filter[5]*src[10+3*srcstride] + 64) >> 7]; 2276cabdff1aSopenharmony_ci dst[11] = cm[(filter[2]*src[11] - filter[1]*src[11-srcstride] + filter[0]*src[11-2*srcstride] + filter[3]*src[11+srcstride] - filter[4]*src[11+2*srcstride] + filter[5]*src[11+3*srcstride] + 64) >> 7]; 2277cabdff1aSopenharmony_ci dst[12] = cm[(filter[2]*src[12] - filter[1]*src[12-srcstride] + filter[0]*src[12-2*srcstride] + filter[3]*src[12+srcstride] - filter[4]*src[12+2*srcstride] + filter[5]*src[12+3*srcstride] + 64) >> 7]; 2278cabdff1aSopenharmony_ci dst[13] = cm[(filter[2]*src[13] - filter[1]*src[13-srcstride] + filter[0]*src[13-2*srcstride] + filter[3]*src[13+srcstride] - filter[4]*src[13+2*srcstride] + filter[5]*src[13+3*srcstride] + 64) >> 7]; 2279cabdff1aSopenharmony_ci dst[14] = cm[(filter[2]*src[14] - filter[1]*src[14-srcstride] + filter[0]*src[14-2*srcstride] + filter[3]*src[14+srcstride] - filter[4]*src[14+2*srcstride] + filter[5]*src[14+3*srcstride] + 64) >> 7]; 2280cabdff1aSopenharmony_ci dst[15] = cm[(filter[2]*src[15] - filter[1]*src[15-srcstride] + filter[0]*src[15-2*srcstride] + filter[3]*src[15+srcstride] - filter[4]*src[15+2*srcstride] + filter[5]*src[15+3*srcstride] + 64) >> 7]; 2281cabdff1aSopenharmony_ci */ 2282cabdff1aSopenharmony_ci __asm__ volatile ( 2283cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2284cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2285cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2286cabdff1aSopenharmony_ci 2287cabdff1aSopenharmony_ci "1: \n\t" 2288cabdff1aSopenharmony_ci // 0 - 7 2289cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride]) 2290cabdff1aSopenharmony_ci PTR_ADDIU "%[src0], %[src], 0x08 \n\t" 2291cabdff1aSopenharmony_ci PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t" 2292cabdff1aSopenharmony_ci // 8 - 15 2293cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V6_MMI(%[src0], %[src1], %[dst0], %[srcstride]) 2294cabdff1aSopenharmony_ci 2295cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2296cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2297cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2298cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2299cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2300cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2301cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2302cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2303cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 2304cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2305cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2306cabdff1aSopenharmony_ci [src0]"=&r"(src0), [dst0]"=&r"(dst0), 2307cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2308cabdff1aSopenharmony_ci [h]"+&r"(h), 2309cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2310cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2311cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2312cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2313cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 2314cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 2315cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 2316cabdff1aSopenharmony_ci : "memory" 2317cabdff1aSopenharmony_ci ); 2318cabdff1aSopenharmony_ci#else 2319cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2320cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2321cabdff1aSopenharmony_ci int x, y; 2322cabdff1aSopenharmony_ci 2323cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2324cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2325cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, srcstride); 2326cabdff1aSopenharmony_ci dst += dststride; 2327cabdff1aSopenharmony_ci src += srcstride; 2328cabdff1aSopenharmony_ci } 2329cabdff1aSopenharmony_ci#endif 2330cabdff1aSopenharmony_ci} 2331cabdff1aSopenharmony_ci 2332cabdff1aSopenharmony_civoid ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2333cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2334cabdff1aSopenharmony_ci{ 2335cabdff1aSopenharmony_ci#if 1 2336cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2337cabdff1aSopenharmony_ci double ftmp[9]; 2338cabdff1aSopenharmony_ci uint32_t tmp[1]; 2339cabdff1aSopenharmony_ci mips_reg src1; 2340cabdff1aSopenharmony_ci union av_intfloat64 filter0; 2341cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2342cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2343cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2344cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2345cabdff1aSopenharmony_ci union av_intfloat64 filter5; 2346cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2347cabdff1aSopenharmony_ci filter0.i = filter[0]; 2348cabdff1aSopenharmony_ci filter1.i = filter[1]; 2349cabdff1aSopenharmony_ci filter2.i = filter[2]; 2350cabdff1aSopenharmony_ci filter3.i = filter[3]; 2351cabdff1aSopenharmony_ci filter4.i = filter[4]; 2352cabdff1aSopenharmony_ci filter5.i = filter[5]; 2353cabdff1aSopenharmony_ci 2354cabdff1aSopenharmony_ci /* 2355cabdff1aSopenharmony_ci dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; 2356cabdff1aSopenharmony_ci dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7]; 2357cabdff1aSopenharmony_ci dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7]; 2358cabdff1aSopenharmony_ci dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7]; 2359cabdff1aSopenharmony_ci dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7]; 2360cabdff1aSopenharmony_ci dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7]; 2361cabdff1aSopenharmony_ci dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7]; 2362cabdff1aSopenharmony_ci dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7]; 2363cabdff1aSopenharmony_ci */ 2364cabdff1aSopenharmony_ci __asm__ volatile ( 2365cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2366cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2367cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2368cabdff1aSopenharmony_ci 2369cabdff1aSopenharmony_ci "1: \n\t" 2370cabdff1aSopenharmony_ci PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride]) 2371cabdff1aSopenharmony_ci 2372cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2373cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2374cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2375cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2376cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2377cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2378cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2379cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2380cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 2381cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2382cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2383cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2384cabdff1aSopenharmony_ci [h]"+&r"(h), 2385cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2386cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2387cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2388cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2389cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 2390cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 2391cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 2392cabdff1aSopenharmony_ci : "memory" 2393cabdff1aSopenharmony_ci ); 2394cabdff1aSopenharmony_ci#else 2395cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2396cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2397cabdff1aSopenharmony_ci int x, y; 2398cabdff1aSopenharmony_ci 2399cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2400cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2401cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, srcstride); 2402cabdff1aSopenharmony_ci dst += dststride; 2403cabdff1aSopenharmony_ci src += srcstride; 2404cabdff1aSopenharmony_ci } 2405cabdff1aSopenharmony_ci#endif 2406cabdff1aSopenharmony_ci} 2407cabdff1aSopenharmony_ci 2408cabdff1aSopenharmony_civoid ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2409cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2410cabdff1aSopenharmony_ci{ 2411cabdff1aSopenharmony_ci#if 1 2412cabdff1aSopenharmony_ci const uint64_t *filter = fourtap_subpel_filters[my - 1]; 2413cabdff1aSopenharmony_ci double ftmp[6]; 2414cabdff1aSopenharmony_ci uint32_t tmp[1]; 2415cabdff1aSopenharmony_ci mips_reg src1; 2416cabdff1aSopenharmony_ci union av_intfloat64 filter0; 2417cabdff1aSopenharmony_ci union av_intfloat64 filter1; 2418cabdff1aSopenharmony_ci union av_intfloat64 filter2; 2419cabdff1aSopenharmony_ci union av_intfloat64 filter3; 2420cabdff1aSopenharmony_ci union av_intfloat64 filter4; 2421cabdff1aSopenharmony_ci union av_intfloat64 filter5; 2422cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 2423cabdff1aSopenharmony_ci filter0.i = filter[0]; 2424cabdff1aSopenharmony_ci filter1.i = filter[1]; 2425cabdff1aSopenharmony_ci filter2.i = filter[2]; 2426cabdff1aSopenharmony_ci filter3.i = filter[3]; 2427cabdff1aSopenharmony_ci filter4.i = filter[4]; 2428cabdff1aSopenharmony_ci filter5.i = filter[5]; 2429cabdff1aSopenharmony_ci 2430cabdff1aSopenharmony_ci /* 2431cabdff1aSopenharmony_ci dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; 2432cabdff1aSopenharmony_ci dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7]; 2433cabdff1aSopenharmony_ci dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7]; 2434cabdff1aSopenharmony_ci dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7]; 2435cabdff1aSopenharmony_ci */ 2436cabdff1aSopenharmony_ci __asm__ volatile ( 2437cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2438cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 2439cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2440cabdff1aSopenharmony_ci 2441cabdff1aSopenharmony_ci "1: \n\t" 2442cabdff1aSopenharmony_ci PUT_VP8_EPEL4_V6_MMI(%[src], %[src1], %[dst], %[srcstride]) 2443cabdff1aSopenharmony_ci 2444cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2445cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2446cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2447cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2448cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2449cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2450cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2451cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2452cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 2453cabdff1aSopenharmony_ci [src1]"=&r"(src1), 2454cabdff1aSopenharmony_ci [h]"+&r"(h), 2455cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src) 2456cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_64.f), 2457cabdff1aSopenharmony_ci [srcstride]"r"((mips_reg)srcstride), 2458cabdff1aSopenharmony_ci [dststride]"r"((mips_reg)dststride), 2459cabdff1aSopenharmony_ci [filter0]"f"(filter0.f), [filter1]"f"(filter1.f), 2460cabdff1aSopenharmony_ci [filter2]"f"(filter2.f), [filter3]"f"(filter3.f), 2461cabdff1aSopenharmony_ci [filter4]"f"(filter4.f), [filter5]"f"(filter5.f) 2462cabdff1aSopenharmony_ci : "memory" 2463cabdff1aSopenharmony_ci ); 2464cabdff1aSopenharmony_ci#else 2465cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; 2466cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2467cabdff1aSopenharmony_ci int x, y; 2468cabdff1aSopenharmony_ci 2469cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2470cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2471cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(src, filter, srcstride); 2472cabdff1aSopenharmony_ci dst += dststride; 2473cabdff1aSopenharmony_ci src += srcstride; 2474cabdff1aSopenharmony_ci } 2475cabdff1aSopenharmony_ci#endif 2476cabdff1aSopenharmony_ci} 2477cabdff1aSopenharmony_ci 2478cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2479cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2480cabdff1aSopenharmony_ci{ 2481cabdff1aSopenharmony_ci#if 1 2482cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[560]); 2483cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2484cabdff1aSopenharmony_ci 2485cabdff1aSopenharmony_ci src -= srcstride; 2486cabdff1aSopenharmony_ci ff_put_vp8_epel16_h4_mmi(tmp, 16, src, srcstride, h + 3, mx, my); 2487cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2488cabdff1aSopenharmony_ci ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my); 2489cabdff1aSopenharmony_ci#else 2490cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2491cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2492cabdff1aSopenharmony_ci int x, y; 2493cabdff1aSopenharmony_ci uint8_t tmp_array[560]; 2494cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2495cabdff1aSopenharmony_ci 2496cabdff1aSopenharmony_ci src -= srcstride; 2497cabdff1aSopenharmony_ci 2498cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2499cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2500cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2501cabdff1aSopenharmony_ci tmp += 16; 2502cabdff1aSopenharmony_ci src += srcstride; 2503cabdff1aSopenharmony_ci } 2504cabdff1aSopenharmony_ci 2505cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2506cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2507cabdff1aSopenharmony_ci 2508cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2509cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2510cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 16); 2511cabdff1aSopenharmony_ci dst += dststride; 2512cabdff1aSopenharmony_ci tmp += 16; 2513cabdff1aSopenharmony_ci } 2514cabdff1aSopenharmony_ci#endif 2515cabdff1aSopenharmony_ci} 2516cabdff1aSopenharmony_ci 2517cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2518cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2519cabdff1aSopenharmony_ci{ 2520cabdff1aSopenharmony_ci#if 1 2521cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[152]); 2522cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2523cabdff1aSopenharmony_ci 2524cabdff1aSopenharmony_ci src -= srcstride; 2525cabdff1aSopenharmony_ci ff_put_vp8_epel8_h4_mmi(tmp, 8, src, srcstride, h + 3, mx, my); 2526cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2527cabdff1aSopenharmony_ci ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my); 2528cabdff1aSopenharmony_ci#else 2529cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2530cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2531cabdff1aSopenharmony_ci int x, y; 2532cabdff1aSopenharmony_ci uint8_t tmp_array[152]; 2533cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2534cabdff1aSopenharmony_ci 2535cabdff1aSopenharmony_ci src -= srcstride; 2536cabdff1aSopenharmony_ci 2537cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2538cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2539cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2540cabdff1aSopenharmony_ci tmp += 8; 2541cabdff1aSopenharmony_ci src += srcstride; 2542cabdff1aSopenharmony_ci } 2543cabdff1aSopenharmony_ci 2544cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2545cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2546cabdff1aSopenharmony_ci 2547cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2548cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2549cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 8); 2550cabdff1aSopenharmony_ci dst += dststride; 2551cabdff1aSopenharmony_ci tmp += 8; 2552cabdff1aSopenharmony_ci } 2553cabdff1aSopenharmony_ci#endif 2554cabdff1aSopenharmony_ci} 2555cabdff1aSopenharmony_ci 2556cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2557cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2558cabdff1aSopenharmony_ci{ 2559cabdff1aSopenharmony_ci#if 1 2560cabdff1aSopenharmony_ci DECLARE_ALIGNED(4, uint8_t, tmp_array[44]); 2561cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2562cabdff1aSopenharmony_ci 2563cabdff1aSopenharmony_ci src -= srcstride; 2564cabdff1aSopenharmony_ci ff_put_vp8_epel4_h4_mmi(tmp, 4, src, srcstride, h + 3, mx, my); 2565cabdff1aSopenharmony_ci tmp = tmp_array + 4; 2566cabdff1aSopenharmony_ci ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my); 2567cabdff1aSopenharmony_ci#else 2568cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2569cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2570cabdff1aSopenharmony_ci int x, y; 2571cabdff1aSopenharmony_ci uint8_t tmp_array[44]; 2572cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2573cabdff1aSopenharmony_ci 2574cabdff1aSopenharmony_ci src -= srcstride; 2575cabdff1aSopenharmony_ci 2576cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2577cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2578cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2579cabdff1aSopenharmony_ci tmp += 4; 2580cabdff1aSopenharmony_ci src += srcstride; 2581cabdff1aSopenharmony_ci } 2582cabdff1aSopenharmony_ci tmp = tmp_array + 4; 2583cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2584cabdff1aSopenharmony_ci 2585cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2586cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2587cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 4); 2588cabdff1aSopenharmony_ci dst += dststride; 2589cabdff1aSopenharmony_ci tmp += 4; 2590cabdff1aSopenharmony_ci } 2591cabdff1aSopenharmony_ci#endif 2592cabdff1aSopenharmony_ci} 2593cabdff1aSopenharmony_ci 2594cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2595cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2596cabdff1aSopenharmony_ci{ 2597cabdff1aSopenharmony_ci#if 1 2598cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[592]); 2599cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2600cabdff1aSopenharmony_ci 2601cabdff1aSopenharmony_ci src -= 2 * srcstride; 2602cabdff1aSopenharmony_ci ff_put_vp8_epel16_h4_mmi(tmp, 16, src, srcstride, h + 5, mx, my); 2603cabdff1aSopenharmony_ci tmp = tmp_array + 32; 2604cabdff1aSopenharmony_ci ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my); 2605cabdff1aSopenharmony_ci#else 2606cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2607cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2608cabdff1aSopenharmony_ci int x, y; 2609cabdff1aSopenharmony_ci uint8_t tmp_array[592]; 2610cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2611cabdff1aSopenharmony_ci 2612cabdff1aSopenharmony_ci src -= 2 * srcstride; 2613cabdff1aSopenharmony_ci 2614cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2615cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2616cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2617cabdff1aSopenharmony_ci tmp += 16; 2618cabdff1aSopenharmony_ci src += srcstride; 2619cabdff1aSopenharmony_ci } 2620cabdff1aSopenharmony_ci 2621cabdff1aSopenharmony_ci tmp = tmp_array + 32; 2622cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2623cabdff1aSopenharmony_ci 2624cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2625cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2626cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 16); 2627cabdff1aSopenharmony_ci dst += dststride; 2628cabdff1aSopenharmony_ci tmp += 16; 2629cabdff1aSopenharmony_ci } 2630cabdff1aSopenharmony_ci#endif 2631cabdff1aSopenharmony_ci} 2632cabdff1aSopenharmony_ci 2633cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2634cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2635cabdff1aSopenharmony_ci{ 2636cabdff1aSopenharmony_ci#if 1 2637cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[168]); 2638cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2639cabdff1aSopenharmony_ci 2640cabdff1aSopenharmony_ci src -= 2 * srcstride; 2641cabdff1aSopenharmony_ci ff_put_vp8_epel8_h4_mmi(tmp, 8, src, srcstride, h + 5, mx, my); 2642cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2643cabdff1aSopenharmony_ci ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my); 2644cabdff1aSopenharmony_ci#else 2645cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2646cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2647cabdff1aSopenharmony_ci int x, y; 2648cabdff1aSopenharmony_ci uint8_t tmp_array[168]; 2649cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2650cabdff1aSopenharmony_ci 2651cabdff1aSopenharmony_ci src -= 2 * srcstride; 2652cabdff1aSopenharmony_ci 2653cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2654cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2655cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2656cabdff1aSopenharmony_ci tmp += 8; 2657cabdff1aSopenharmony_ci src += srcstride; 2658cabdff1aSopenharmony_ci } 2659cabdff1aSopenharmony_ci 2660cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2661cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2662cabdff1aSopenharmony_ci 2663cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2664cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2665cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 8); 2666cabdff1aSopenharmony_ci dst += dststride; 2667cabdff1aSopenharmony_ci tmp += 8; 2668cabdff1aSopenharmony_ci } 2669cabdff1aSopenharmony_ci#endif 2670cabdff1aSopenharmony_ci} 2671cabdff1aSopenharmony_ci 2672cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2673cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2674cabdff1aSopenharmony_ci{ 2675cabdff1aSopenharmony_ci#if 1 2676cabdff1aSopenharmony_ci DECLARE_ALIGNED(4, uint8_t, tmp_array[52]); 2677cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2678cabdff1aSopenharmony_ci 2679cabdff1aSopenharmony_ci src -= 2 * srcstride; 2680cabdff1aSopenharmony_ci ff_put_vp8_epel4_h4_mmi(tmp, 4, src, srcstride, h + 5, mx, my); 2681cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2682cabdff1aSopenharmony_ci ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my); 2683cabdff1aSopenharmony_ci#else 2684cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2685cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2686cabdff1aSopenharmony_ci int x, y; 2687cabdff1aSopenharmony_ci uint8_t tmp_array[52]; 2688cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2689cabdff1aSopenharmony_ci 2690cabdff1aSopenharmony_ci src -= 2 * srcstride; 2691cabdff1aSopenharmony_ci 2692cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2693cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2694cabdff1aSopenharmony_ci tmp[x] = FILTER_4TAP(src, filter, 1); 2695cabdff1aSopenharmony_ci tmp += 4; 2696cabdff1aSopenharmony_ci src += srcstride; 2697cabdff1aSopenharmony_ci } 2698cabdff1aSopenharmony_ci 2699cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2700cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2701cabdff1aSopenharmony_ci 2702cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2703cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2704cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 4); 2705cabdff1aSopenharmony_ci dst += dststride; 2706cabdff1aSopenharmony_ci tmp += 4; 2707cabdff1aSopenharmony_ci } 2708cabdff1aSopenharmony_ci#endif 2709cabdff1aSopenharmony_ci} 2710cabdff1aSopenharmony_ci 2711cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2712cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2713cabdff1aSopenharmony_ci{ 2714cabdff1aSopenharmony_ci#if 1 2715cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[560]); 2716cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2717cabdff1aSopenharmony_ci 2718cabdff1aSopenharmony_ci src -= srcstride; 2719cabdff1aSopenharmony_ci ff_put_vp8_epel16_h6_mmi(tmp, 16, src, srcstride, h + 3, mx, my); 2720cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2721cabdff1aSopenharmony_ci ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my); 2722cabdff1aSopenharmony_ci#else 2723cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2724cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2725cabdff1aSopenharmony_ci int x, y; 2726cabdff1aSopenharmony_ci uint8_t tmp_array[560]; 2727cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2728cabdff1aSopenharmony_ci 2729cabdff1aSopenharmony_ci src -= srcstride; 2730cabdff1aSopenharmony_ci 2731cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2732cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2733cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2734cabdff1aSopenharmony_ci tmp += 16; 2735cabdff1aSopenharmony_ci src += srcstride; 2736cabdff1aSopenharmony_ci } 2737cabdff1aSopenharmony_ci 2738cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2739cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2740cabdff1aSopenharmony_ci 2741cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2742cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2743cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 16); 2744cabdff1aSopenharmony_ci dst += dststride; 2745cabdff1aSopenharmony_ci tmp += 16; 2746cabdff1aSopenharmony_ci } 2747cabdff1aSopenharmony_ci#endif 2748cabdff1aSopenharmony_ci} 2749cabdff1aSopenharmony_ci 2750cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2751cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2752cabdff1aSopenharmony_ci{ 2753cabdff1aSopenharmony_ci#if 1 2754cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[152]); 2755cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2756cabdff1aSopenharmony_ci 2757cabdff1aSopenharmony_ci src -= srcstride; 2758cabdff1aSopenharmony_ci ff_put_vp8_epel8_h6_mmi(tmp, 8, src, srcstride, h + 3, mx, my); 2759cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2760cabdff1aSopenharmony_ci ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my); 2761cabdff1aSopenharmony_ci#else 2762cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2763cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2764cabdff1aSopenharmony_ci int x, y; 2765cabdff1aSopenharmony_ci uint8_t tmp_array[152]; 2766cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2767cabdff1aSopenharmony_ci 2768cabdff1aSopenharmony_ci src -= srcstride; 2769cabdff1aSopenharmony_ci 2770cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2771cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2772cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2773cabdff1aSopenharmony_ci tmp += 8; 2774cabdff1aSopenharmony_ci src += srcstride; 2775cabdff1aSopenharmony_ci } 2776cabdff1aSopenharmony_ci 2777cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2778cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2779cabdff1aSopenharmony_ci 2780cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2781cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2782cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 8); 2783cabdff1aSopenharmony_ci dst += dststride; 2784cabdff1aSopenharmony_ci tmp += 8; 2785cabdff1aSopenharmony_ci } 2786cabdff1aSopenharmony_ci#endif 2787cabdff1aSopenharmony_ci} 2788cabdff1aSopenharmony_ci 2789cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2790cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2791cabdff1aSopenharmony_ci{ 2792cabdff1aSopenharmony_ci#if 1 2793cabdff1aSopenharmony_ci DECLARE_ALIGNED(4, uint8_t, tmp_array[44]); 2794cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2795cabdff1aSopenharmony_ci 2796cabdff1aSopenharmony_ci src -= srcstride; 2797cabdff1aSopenharmony_ci ff_put_vp8_epel4_h6_mmi(tmp, 4, src, srcstride, h + 3, mx, my); 2798cabdff1aSopenharmony_ci tmp = tmp_array + 4; 2799cabdff1aSopenharmony_ci ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my); 2800cabdff1aSopenharmony_ci#else 2801cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2802cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2803cabdff1aSopenharmony_ci int x, y; 2804cabdff1aSopenharmony_ci uint8_t tmp_array[44]; 2805cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2806cabdff1aSopenharmony_ci 2807cabdff1aSopenharmony_ci src -= srcstride; 2808cabdff1aSopenharmony_ci 2809cabdff1aSopenharmony_ci for (y = 0; y < h + 3; y++) { 2810cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2811cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2812cabdff1aSopenharmony_ci tmp += 4; 2813cabdff1aSopenharmony_ci src += srcstride; 2814cabdff1aSopenharmony_ci } 2815cabdff1aSopenharmony_ci 2816cabdff1aSopenharmony_ci tmp = tmp_array + 4; 2817cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2818cabdff1aSopenharmony_ci 2819cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2820cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2821cabdff1aSopenharmony_ci dst[x] = FILTER_4TAP(tmp, filter, 4); 2822cabdff1aSopenharmony_ci dst += dststride; 2823cabdff1aSopenharmony_ci tmp += 4; 2824cabdff1aSopenharmony_ci } 2825cabdff1aSopenharmony_ci#endif 2826cabdff1aSopenharmony_ci} 2827cabdff1aSopenharmony_ci 2828cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2829cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2830cabdff1aSopenharmony_ci{ 2831cabdff1aSopenharmony_ci#if 1 2832cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[592]); 2833cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2834cabdff1aSopenharmony_ci 2835cabdff1aSopenharmony_ci src -= 2 * srcstride; 2836cabdff1aSopenharmony_ci ff_put_vp8_epel16_h6_mmi(tmp, 16, src, srcstride, h + 5, mx, my); 2837cabdff1aSopenharmony_ci tmp = tmp_array + 32; 2838cabdff1aSopenharmony_ci ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my); 2839cabdff1aSopenharmony_ci#else 2840cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2841cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2842cabdff1aSopenharmony_ci int x, y; 2843cabdff1aSopenharmony_ci uint8_t tmp_array[592]; 2844cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2845cabdff1aSopenharmony_ci 2846cabdff1aSopenharmony_ci src -= 2 * srcstride; 2847cabdff1aSopenharmony_ci 2848cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2849cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2850cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2851cabdff1aSopenharmony_ci tmp += 16; 2852cabdff1aSopenharmony_ci src += srcstride; 2853cabdff1aSopenharmony_ci } 2854cabdff1aSopenharmony_ci 2855cabdff1aSopenharmony_ci tmp = tmp_array + 32; 2856cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2857cabdff1aSopenharmony_ci 2858cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2859cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 2860cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 16); 2861cabdff1aSopenharmony_ci dst += dststride; 2862cabdff1aSopenharmony_ci tmp += 16; 2863cabdff1aSopenharmony_ci } 2864cabdff1aSopenharmony_ci#endif 2865cabdff1aSopenharmony_ci} 2866cabdff1aSopenharmony_ci 2867cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2868cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2869cabdff1aSopenharmony_ci{ 2870cabdff1aSopenharmony_ci#if 1 2871cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[168]); 2872cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2873cabdff1aSopenharmony_ci 2874cabdff1aSopenharmony_ci src -= 2 * srcstride; 2875cabdff1aSopenharmony_ci ff_put_vp8_epel8_h6_mmi(tmp, 8, src, srcstride, h + 5, mx, my); 2876cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2877cabdff1aSopenharmony_ci ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my); 2878cabdff1aSopenharmony_ci#else 2879cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2880cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2881cabdff1aSopenharmony_ci int x, y; 2882cabdff1aSopenharmony_ci uint8_t tmp_array[168]; 2883cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2884cabdff1aSopenharmony_ci 2885cabdff1aSopenharmony_ci src -= 2 * srcstride; 2886cabdff1aSopenharmony_ci 2887cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2888cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2889cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2890cabdff1aSopenharmony_ci tmp += 8; 2891cabdff1aSopenharmony_ci src += srcstride; 2892cabdff1aSopenharmony_ci } 2893cabdff1aSopenharmony_ci 2894cabdff1aSopenharmony_ci tmp = tmp_array + 16; 2895cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2896cabdff1aSopenharmony_ci 2897cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2898cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 2899cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 8); 2900cabdff1aSopenharmony_ci dst += dststride; 2901cabdff1aSopenharmony_ci tmp += 8; 2902cabdff1aSopenharmony_ci } 2903cabdff1aSopenharmony_ci#endif 2904cabdff1aSopenharmony_ci} 2905cabdff1aSopenharmony_ci 2906cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, 2907cabdff1aSopenharmony_ci ptrdiff_t srcstride, int h, int mx, int my) 2908cabdff1aSopenharmony_ci{ 2909cabdff1aSopenharmony_ci#if 1 2910cabdff1aSopenharmony_ci DECLARE_ALIGNED(4, uint8_t, tmp_array[52]); 2911cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2912cabdff1aSopenharmony_ci 2913cabdff1aSopenharmony_ci src -= 2 * srcstride; 2914cabdff1aSopenharmony_ci ff_put_vp8_epel4_h6_mmi(tmp, 4, src, srcstride, h + 5, mx, my); 2915cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2916cabdff1aSopenharmony_ci ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my); 2917cabdff1aSopenharmony_ci#else 2918cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; 2919cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 2920cabdff1aSopenharmony_ci int x, y; 2921cabdff1aSopenharmony_ci uint8_t tmp_array[52]; 2922cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 2923cabdff1aSopenharmony_ci 2924cabdff1aSopenharmony_ci src -= 2 * srcstride; 2925cabdff1aSopenharmony_ci 2926cabdff1aSopenharmony_ci for (y = 0; y < h + 5; y++) { 2927cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2928cabdff1aSopenharmony_ci tmp[x] = FILTER_6TAP(src, filter, 1); 2929cabdff1aSopenharmony_ci tmp += 4; 2930cabdff1aSopenharmony_ci src += srcstride; 2931cabdff1aSopenharmony_ci } 2932cabdff1aSopenharmony_ci 2933cabdff1aSopenharmony_ci tmp = tmp_array + 8; 2934cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; 2935cabdff1aSopenharmony_ci 2936cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 2937cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 2938cabdff1aSopenharmony_ci dst[x] = FILTER_6TAP(tmp, filter, 4); 2939cabdff1aSopenharmony_ci dst += dststride; 2940cabdff1aSopenharmony_ci tmp += 4; 2941cabdff1aSopenharmony_ci } 2942cabdff1aSopenharmony_ci#endif 2943cabdff1aSopenharmony_ci} 2944cabdff1aSopenharmony_ci 2945cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 2946cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 2947cabdff1aSopenharmony_ci{ 2948cabdff1aSopenharmony_ci#if 1 2949cabdff1aSopenharmony_ci union mmi_intfloat64 a, b; 2950cabdff1aSopenharmony_ci double ftmp[7]; 2951cabdff1aSopenharmony_ci uint32_t tmp[1]; 2952cabdff1aSopenharmony_ci mips_reg dst0, src0; 2953cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2954cabdff1aSopenharmony_ci a.i = 8 - mx; 2955cabdff1aSopenharmony_ci b.i = mx; 2956cabdff1aSopenharmony_ci 2957cabdff1aSopenharmony_ci /* 2958cabdff1aSopenharmony_ci dst[0] = (a * src[0] + b * src[1] + 4) >> 3; 2959cabdff1aSopenharmony_ci dst[1] = (a * src[1] + b * src[2] + 4) >> 3; 2960cabdff1aSopenharmony_ci dst[2] = (a * src[2] + b * src[3] + 4) >> 3; 2961cabdff1aSopenharmony_ci dst[3] = (a * src[3] + b * src[4] + 4) >> 3; 2962cabdff1aSopenharmony_ci dst[4] = (a * src[4] + b * src[5] + 4) >> 3; 2963cabdff1aSopenharmony_ci dst[5] = (a * src[5] + b * src[6] + 4) >> 3; 2964cabdff1aSopenharmony_ci dst[6] = (a * src[6] + b * src[7] + 4) >> 3; 2965cabdff1aSopenharmony_ci dst[7] = (a * src[7] + b * src[8] + 4) >> 3; 2966cabdff1aSopenharmony_ci 2967cabdff1aSopenharmony_ci dst[ 8] = (a * src[ 8] + b * src[ 9] + 4) >> 3; 2968cabdff1aSopenharmony_ci dst[ 9] = (a * src[ 9] + b * src[10] + 4) >> 3; 2969cabdff1aSopenharmony_ci dst[10] = (a * src[10] + b * src[11] + 4) >> 3; 2970cabdff1aSopenharmony_ci dst[11] = (a * src[11] + b * src[12] + 4) >> 3; 2971cabdff1aSopenharmony_ci dst[12] = (a * src[12] + b * src[13] + 4) >> 3; 2972cabdff1aSopenharmony_ci dst[13] = (a * src[13] + b * src[14] + 4) >> 3; 2973cabdff1aSopenharmony_ci dst[14] = (a * src[14] + b * src[15] + 4) >> 3; 2974cabdff1aSopenharmony_ci dst[15] = (a * src[15] + b * src[16] + 4) >> 3; 2975cabdff1aSopenharmony_ci */ 2976cabdff1aSopenharmony_ci __asm__ volatile ( 2977cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2978cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 2979cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 2980cabdff1aSopenharmony_ci "pshufh %[a], %[a], %[ftmp0] \n\t" 2981cabdff1aSopenharmony_ci "pshufh %[b], %[b], %[ftmp0] \n\t" 2982cabdff1aSopenharmony_ci 2983cabdff1aSopenharmony_ci "1: \n\t" 2984cabdff1aSopenharmony_ci // 0 - 7 2985cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst]) 2986cabdff1aSopenharmony_ci PTR_ADDIU "%[src0], %[src], 0x08 \n\t" 2987cabdff1aSopenharmony_ci PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t" 2988cabdff1aSopenharmony_ci // 8 - 15 2989cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_H_MMI(%[src0], %[dst0]) 2990cabdff1aSopenharmony_ci 2991cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2992cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 2993cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 2994cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2995cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2996cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2997cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2998cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), 2999cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3000cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3001cabdff1aSopenharmony_ci [dst0]"=&r"(dst0), [src0]"=&r"(src0), 3002cabdff1aSopenharmony_ci [h]"+&r"(h), 3003cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3004cabdff1aSopenharmony_ci [a]"+&f"(a.f), [b]"+&f"(b.f) 3005cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3006cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3007cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3008cabdff1aSopenharmony_ci : "memory" 3009cabdff1aSopenharmony_ci ); 3010cabdff1aSopenharmony_ci#else 3011cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3012cabdff1aSopenharmony_ci int x, y; 3013cabdff1aSopenharmony_ci 3014cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3015cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 3016cabdff1aSopenharmony_ci dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3017cabdff1aSopenharmony_ci dst += dstride; 3018cabdff1aSopenharmony_ci src += sstride; 3019cabdff1aSopenharmony_ci } 3020cabdff1aSopenharmony_ci#endif 3021cabdff1aSopenharmony_ci} 3022cabdff1aSopenharmony_ci 3023cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3024cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3025cabdff1aSopenharmony_ci{ 3026cabdff1aSopenharmony_ci#if 1 3027cabdff1aSopenharmony_ci union mmi_intfloat64 c, d; 3028cabdff1aSopenharmony_ci double ftmp[7]; 3029cabdff1aSopenharmony_ci uint32_t tmp[1]; 3030cabdff1aSopenharmony_ci mips_reg src0, src1, dst0; 3031cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 3032cabdff1aSopenharmony_ci c.i = 8 - my; 3033cabdff1aSopenharmony_ci d.i = my; 3034cabdff1aSopenharmony_ci 3035cabdff1aSopenharmony_ci /* 3036cabdff1aSopenharmony_ci dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; 3037cabdff1aSopenharmony_ci dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3; 3038cabdff1aSopenharmony_ci dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3; 3039cabdff1aSopenharmony_ci dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3; 3040cabdff1aSopenharmony_ci dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3; 3041cabdff1aSopenharmony_ci dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3; 3042cabdff1aSopenharmony_ci dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3; 3043cabdff1aSopenharmony_ci dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3; 3044cabdff1aSopenharmony_ci */ 3045cabdff1aSopenharmony_ci __asm__ volatile ( 3046cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3047cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 3048cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 3049cabdff1aSopenharmony_ci "pshufh %[c], %[c], %[ftmp0] \n\t" 3050cabdff1aSopenharmony_ci "pshufh %[d], %[d], %[ftmp0] \n\t" 3051cabdff1aSopenharmony_ci 3052cabdff1aSopenharmony_ci "1: \n\t" 3053cabdff1aSopenharmony_ci // 0 - 7 3054cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride]) 3055cabdff1aSopenharmony_ci PTR_ADDIU "%[src0], %[src], 0x08 \n\t" 3056cabdff1aSopenharmony_ci PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t" 3057cabdff1aSopenharmony_ci // 8 - 15 3058cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_V_MMI(%[src0], %[src1], %[dst0], %[sstride]) 3059cabdff1aSopenharmony_ci 3060cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 3061cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3062cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3063cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 3064cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 3065cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 3066cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 3067cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), 3068cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3069cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3070cabdff1aSopenharmony_ci [src0]"=&r"(src0), [dst0]"=&r"(dst0), 3071cabdff1aSopenharmony_ci [src1]"=&r"(src1), 3072cabdff1aSopenharmony_ci [h]"+&r"(h), 3073cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3074cabdff1aSopenharmony_ci [c]"+&f"(c.f), [d]"+&f"(d.f) 3075cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3076cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3077cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3078cabdff1aSopenharmony_ci : "memory" 3079cabdff1aSopenharmony_ci ); 3080cabdff1aSopenharmony_ci#else 3081cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3082cabdff1aSopenharmony_ci int x, y; 3083cabdff1aSopenharmony_ci 3084cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3085cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 3086cabdff1aSopenharmony_ci dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; 3087cabdff1aSopenharmony_ci dst += dstride; 3088cabdff1aSopenharmony_ci src += sstride; 3089cabdff1aSopenharmony_ci } 3090cabdff1aSopenharmony_ci#endif 3091cabdff1aSopenharmony_ci} 3092cabdff1aSopenharmony_ci 3093cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3094cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3095cabdff1aSopenharmony_ci{ 3096cabdff1aSopenharmony_ci#if 1 3097cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[528]); 3098cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3099cabdff1aSopenharmony_ci 3100cabdff1aSopenharmony_ci ff_put_vp8_bilinear16_h_mmi(tmp, 16, src, sstride, h + 1, mx, my); 3101cabdff1aSopenharmony_ci ff_put_vp8_bilinear16_v_mmi(dst, dstride, tmp, 16, h, mx, my); 3102cabdff1aSopenharmony_ci#else 3103cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3104cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3105cabdff1aSopenharmony_ci int x, y; 3106cabdff1aSopenharmony_ci uint8_t tmp_array[528]; 3107cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3108cabdff1aSopenharmony_ci 3109cabdff1aSopenharmony_ci for (y = 0; y < h + 1; y++) { 3110cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 3111cabdff1aSopenharmony_ci tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3112cabdff1aSopenharmony_ci tmp += 16; 3113cabdff1aSopenharmony_ci src += sstride; 3114cabdff1aSopenharmony_ci } 3115cabdff1aSopenharmony_ci 3116cabdff1aSopenharmony_ci tmp = tmp_array; 3117cabdff1aSopenharmony_ci 3118cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3119cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 3120cabdff1aSopenharmony_ci dst[x] = (c * tmp[x] + d * tmp[x + 16] + 4) >> 3; 3121cabdff1aSopenharmony_ci dst += dstride; 3122cabdff1aSopenharmony_ci tmp += 16; 3123cabdff1aSopenharmony_ci } 3124cabdff1aSopenharmony_ci#endif 3125cabdff1aSopenharmony_ci} 3126cabdff1aSopenharmony_ci 3127cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3128cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3129cabdff1aSopenharmony_ci{ 3130cabdff1aSopenharmony_ci#if 1 3131cabdff1aSopenharmony_ci union mmi_intfloat64 a, b; 3132cabdff1aSopenharmony_ci double ftmp[7]; 3133cabdff1aSopenharmony_ci uint32_t tmp[1]; 3134cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 3135cabdff1aSopenharmony_ci a.i = 8 - mx; 3136cabdff1aSopenharmony_ci b.i = mx; 3137cabdff1aSopenharmony_ci 3138cabdff1aSopenharmony_ci /* 3139cabdff1aSopenharmony_ci dst[0] = (a * src[0] + b * src[1] + 4) >> 3; 3140cabdff1aSopenharmony_ci dst[1] = (a * src[1] + b * src[2] + 4) >> 3; 3141cabdff1aSopenharmony_ci dst[2] = (a * src[2] + b * src[3] + 4) >> 3; 3142cabdff1aSopenharmony_ci dst[3] = (a * src[3] + b * src[4] + 4) >> 3; 3143cabdff1aSopenharmony_ci dst[4] = (a * src[4] + b * src[5] + 4) >> 3; 3144cabdff1aSopenharmony_ci dst[5] = (a * src[5] + b * src[6] + 4) >> 3; 3145cabdff1aSopenharmony_ci dst[6] = (a * src[6] + b * src[7] + 4) >> 3; 3146cabdff1aSopenharmony_ci dst[7] = (a * src[7] + b * src[8] + 4) >> 3; 3147cabdff1aSopenharmony_ci */ 3148cabdff1aSopenharmony_ci __asm__ volatile ( 3149cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3150cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 3151cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 3152cabdff1aSopenharmony_ci "pshufh %[a], %[a], %[ftmp0] \n\t" 3153cabdff1aSopenharmony_ci "pshufh %[b], %[b], %[ftmp0] \n\t" 3154cabdff1aSopenharmony_ci 3155cabdff1aSopenharmony_ci "1: \n\t" 3156cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst]) 3157cabdff1aSopenharmony_ci 3158cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 3159cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3160cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3161cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 3162cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 3163cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 3164cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 3165cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), 3166cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3167cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3168cabdff1aSopenharmony_ci [h]"+&r"(h), 3169cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3170cabdff1aSopenharmony_ci [a]"+&f"(a.f), [b]"+&f"(b.f) 3171cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3172cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3173cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3174cabdff1aSopenharmony_ci : "memory" 3175cabdff1aSopenharmony_ci ); 3176cabdff1aSopenharmony_ci#else 3177cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3178cabdff1aSopenharmony_ci int x, y; 3179cabdff1aSopenharmony_ci 3180cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3181cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 3182cabdff1aSopenharmony_ci dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3183cabdff1aSopenharmony_ci dst += dstride; 3184cabdff1aSopenharmony_ci src += sstride; 3185cabdff1aSopenharmony_ci } 3186cabdff1aSopenharmony_ci#endif 3187cabdff1aSopenharmony_ci} 3188cabdff1aSopenharmony_ci 3189cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3190cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3191cabdff1aSopenharmony_ci{ 3192cabdff1aSopenharmony_ci#if 1 3193cabdff1aSopenharmony_ci union mmi_intfloat64 c, d; 3194cabdff1aSopenharmony_ci double ftmp[7]; 3195cabdff1aSopenharmony_ci uint32_t tmp[1]; 3196cabdff1aSopenharmony_ci mips_reg src1; 3197cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 3198cabdff1aSopenharmony_ci c.i = 8 - my; 3199cabdff1aSopenharmony_ci d.i = my; 3200cabdff1aSopenharmony_ci 3201cabdff1aSopenharmony_ci /* 3202cabdff1aSopenharmony_ci dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; 3203cabdff1aSopenharmony_ci dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3; 3204cabdff1aSopenharmony_ci dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3; 3205cabdff1aSopenharmony_ci dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3; 3206cabdff1aSopenharmony_ci dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3; 3207cabdff1aSopenharmony_ci dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3; 3208cabdff1aSopenharmony_ci dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3; 3209cabdff1aSopenharmony_ci dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3; 3210cabdff1aSopenharmony_ci */ 3211cabdff1aSopenharmony_ci __asm__ volatile ( 3212cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3213cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 3214cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 3215cabdff1aSopenharmony_ci "pshufh %[c], %[c], %[ftmp0] \n\t" 3216cabdff1aSopenharmony_ci "pshufh %[d], %[d], %[ftmp0] \n\t" 3217cabdff1aSopenharmony_ci 3218cabdff1aSopenharmony_ci "1: \n\t" 3219cabdff1aSopenharmony_ci PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride]) 3220cabdff1aSopenharmony_ci 3221cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 3222cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3223cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3224cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 3225cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 3226cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 3227cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 3228cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), 3229cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3230cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3231cabdff1aSopenharmony_ci [src1]"=&r"(src1), 3232cabdff1aSopenharmony_ci [h]"+&r"(h), 3233cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3234cabdff1aSopenharmony_ci [c]"+&f"(c.f), [d]"+&f"(d.f) 3235cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3236cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3237cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3238cabdff1aSopenharmony_ci : "memory" 3239cabdff1aSopenharmony_ci ); 3240cabdff1aSopenharmony_ci#else 3241cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3242cabdff1aSopenharmony_ci int x, y; 3243cabdff1aSopenharmony_ci 3244cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3245cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 3246cabdff1aSopenharmony_ci dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; 3247cabdff1aSopenharmony_ci dst += dstride; 3248cabdff1aSopenharmony_ci src += sstride; 3249cabdff1aSopenharmony_ci } 3250cabdff1aSopenharmony_ci#endif 3251cabdff1aSopenharmony_ci} 3252cabdff1aSopenharmony_ci 3253cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3254cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3255cabdff1aSopenharmony_ci{ 3256cabdff1aSopenharmony_ci#if 1 3257cabdff1aSopenharmony_ci DECLARE_ALIGNED(8, uint8_t, tmp_array[136]); 3258cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3259cabdff1aSopenharmony_ci 3260cabdff1aSopenharmony_ci ff_put_vp8_bilinear8_h_mmi(tmp, 8, src, sstride, h + 1, mx, my); 3261cabdff1aSopenharmony_ci ff_put_vp8_bilinear8_v_mmi(dst, dstride, tmp, 8, h, mx, my); 3262cabdff1aSopenharmony_ci#else 3263cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3264cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3265cabdff1aSopenharmony_ci int x, y; 3266cabdff1aSopenharmony_ci uint8_t tmp_array[136]; 3267cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3268cabdff1aSopenharmony_ci 3269cabdff1aSopenharmony_ci for (y = 0; y < h + 1; y++) { 3270cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 3271cabdff1aSopenharmony_ci tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3272cabdff1aSopenharmony_ci tmp += 8; 3273cabdff1aSopenharmony_ci src += sstride; 3274cabdff1aSopenharmony_ci } 3275cabdff1aSopenharmony_ci 3276cabdff1aSopenharmony_ci tmp = tmp_array; 3277cabdff1aSopenharmony_ci 3278cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3279cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 3280cabdff1aSopenharmony_ci dst[x] = (c * tmp[x] + d * tmp[x + 8] + 4) >> 3; 3281cabdff1aSopenharmony_ci dst += dstride; 3282cabdff1aSopenharmony_ci tmp += 8; 3283cabdff1aSopenharmony_ci } 3284cabdff1aSopenharmony_ci#endif 3285cabdff1aSopenharmony_ci} 3286cabdff1aSopenharmony_ci 3287cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3288cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3289cabdff1aSopenharmony_ci{ 3290cabdff1aSopenharmony_ci#if 1 3291cabdff1aSopenharmony_ci union mmi_intfloat64 a, b; 3292cabdff1aSopenharmony_ci double ftmp[5]; 3293cabdff1aSopenharmony_ci uint32_t tmp[1]; 3294cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 3295cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 3296cabdff1aSopenharmony_ci a.i = 8 - mx; 3297cabdff1aSopenharmony_ci b.i = mx; 3298cabdff1aSopenharmony_ci 3299cabdff1aSopenharmony_ci /* 3300cabdff1aSopenharmony_ci dst[0] = (a * src[0] + b * src[1] + 4) >> 3; 3301cabdff1aSopenharmony_ci dst[1] = (a * src[1] + b * src[2] + 4) >> 3; 3302cabdff1aSopenharmony_ci dst[2] = (a * src[2] + b * src[3] + 4) >> 3; 3303cabdff1aSopenharmony_ci dst[3] = (a * src[3] + b * src[4] + 4) >> 3; 3304cabdff1aSopenharmony_ci */ 3305cabdff1aSopenharmony_ci __asm__ volatile ( 3306cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3307cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 3308cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 3309cabdff1aSopenharmony_ci "pshufh %[a], %[a], %[ftmp0] \n\t" 3310cabdff1aSopenharmony_ci "pshufh %[b], %[b], %[ftmp0] \n\t" 3311cabdff1aSopenharmony_ci 3312cabdff1aSopenharmony_ci "1: \n\t" 3313cabdff1aSopenharmony_ci PUT_VP8_BILINEAR4_H_MMI(%[src], %[dst]) 3314cabdff1aSopenharmony_ci 3315cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 3316cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3317cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3318cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 3319cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 3320cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 3321cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), 3322cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3323cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 3324cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3325cabdff1aSopenharmony_ci [h]"+&r"(h), 3326cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3327cabdff1aSopenharmony_ci [a]"+&f"(a.f), [b]"+&f"(b.f) 3328cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3329cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3330cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3331cabdff1aSopenharmony_ci : "memory" 3332cabdff1aSopenharmony_ci ); 3333cabdff1aSopenharmony_ci#else 3334cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3335cabdff1aSopenharmony_ci int x, y; 3336cabdff1aSopenharmony_ci 3337cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3338cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 3339cabdff1aSopenharmony_ci dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3340cabdff1aSopenharmony_ci dst += dstride; 3341cabdff1aSopenharmony_ci src += sstride; 3342cabdff1aSopenharmony_ci } 3343cabdff1aSopenharmony_ci#endif 3344cabdff1aSopenharmony_ci} 3345cabdff1aSopenharmony_ci 3346cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3347cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3348cabdff1aSopenharmony_ci{ 3349cabdff1aSopenharmony_ci#if 1 3350cabdff1aSopenharmony_ci union mmi_intfloat64 c, d; 3351cabdff1aSopenharmony_ci double ftmp[7]; 3352cabdff1aSopenharmony_ci uint32_t tmp[1]; 3353cabdff1aSopenharmony_ci mips_reg src1; 3354cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 3355cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 3356cabdff1aSopenharmony_ci c.i = 8 - my; 3357cabdff1aSopenharmony_ci d.i = my; 3358cabdff1aSopenharmony_ci 3359cabdff1aSopenharmony_ci /* 3360cabdff1aSopenharmony_ci dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; 3361cabdff1aSopenharmony_ci dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3; 3362cabdff1aSopenharmony_ci dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3; 3363cabdff1aSopenharmony_ci dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3; 3364cabdff1aSopenharmony_ci */ 3365cabdff1aSopenharmony_ci __asm__ volatile ( 3366cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3367cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 3368cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 3369cabdff1aSopenharmony_ci "pshufh %[c], %[c], %[ftmp0] \n\t" 3370cabdff1aSopenharmony_ci "pshufh %[d], %[d], %[ftmp0] \n\t" 3371cabdff1aSopenharmony_ci 3372cabdff1aSopenharmony_ci "1: \n\t" 3373cabdff1aSopenharmony_ci PUT_VP8_BILINEAR4_V_MMI(%[src], %[src1], %[dst], %[sstride]) 3374cabdff1aSopenharmony_ci 3375cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 3376cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3377cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3378cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 3379cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 3380cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 3381cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), 3382cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 3383cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 3384cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 3385cabdff1aSopenharmony_ci [src1]"=&r"(src1), 3386cabdff1aSopenharmony_ci [h]"+&r"(h), 3387cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 3388cabdff1aSopenharmony_ci [c]"+&f"(c.f), [d]"+&f"(d.f) 3389cabdff1aSopenharmony_ci : [sstride]"r"((mips_reg)sstride), 3390cabdff1aSopenharmony_ci [dstride]"r"((mips_reg)dstride), 3391cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_4.f) 3392cabdff1aSopenharmony_ci : "memory" 3393cabdff1aSopenharmony_ci ); 3394cabdff1aSopenharmony_ci#else 3395cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3396cabdff1aSopenharmony_ci int x, y; 3397cabdff1aSopenharmony_ci 3398cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3399cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 3400cabdff1aSopenharmony_ci dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; 3401cabdff1aSopenharmony_ci dst += dstride; 3402cabdff1aSopenharmony_ci src += sstride; 3403cabdff1aSopenharmony_ci } 3404cabdff1aSopenharmony_ci#endif 3405cabdff1aSopenharmony_ci} 3406cabdff1aSopenharmony_ci 3407cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, 3408cabdff1aSopenharmony_ci ptrdiff_t sstride, int h, int mx, int my) 3409cabdff1aSopenharmony_ci{ 3410cabdff1aSopenharmony_ci#if 1 3411cabdff1aSopenharmony_ci DECLARE_ALIGNED(4, uint8_t, tmp_array[36]); 3412cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3413cabdff1aSopenharmony_ci 3414cabdff1aSopenharmony_ci ff_put_vp8_bilinear4_h_mmi(tmp, 4, src, sstride, h + 1, mx, my); 3415cabdff1aSopenharmony_ci ff_put_vp8_bilinear4_v_mmi(dst, dstride, tmp, 4, h, mx, my); 3416cabdff1aSopenharmony_ci#else 3417cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; 3418cabdff1aSopenharmony_ci int c = 8 - my, d = my; 3419cabdff1aSopenharmony_ci int x, y; 3420cabdff1aSopenharmony_ci uint8_t tmp_array[36]; 3421cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; 3422cabdff1aSopenharmony_ci 3423cabdff1aSopenharmony_ci for (y = 0; y < h + 1; y++) { 3424cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 3425cabdff1aSopenharmony_ci tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; 3426cabdff1aSopenharmony_ci tmp += 4; 3427cabdff1aSopenharmony_ci src += sstride; 3428cabdff1aSopenharmony_ci } 3429cabdff1aSopenharmony_ci 3430cabdff1aSopenharmony_ci tmp = tmp_array; 3431cabdff1aSopenharmony_ci 3432cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 3433cabdff1aSopenharmony_ci for (x = 0; x < 4; x++) 3434cabdff1aSopenharmony_ci dst[x] = (c * tmp[x] + d * tmp[x + 4] + 4) >> 3; 3435cabdff1aSopenharmony_ci dst += dstride; 3436cabdff1aSopenharmony_ci tmp += 4; 3437cabdff1aSopenharmony_ci } 3438cabdff1aSopenharmony_ci#endif 3439cabdff1aSopenharmony_ci} 3440