1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2019 Shiyou Yin (yinshiyou-hf@loongson.cn) 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavcodec/hevcdec.h" 22cabdff1aSopenharmony_ci#include "libavcodec/bit_depth_template.c" 23cabdff1aSopenharmony_ci#include "libavcodec/mips/hevcdsp_mips.h" 24cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_H(w, x_step, src_step, dst_step) \ 27cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src, \ 28cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 29cabdff1aSopenharmony_ci int height, intptr_t mx, \ 30cabdff1aSopenharmony_ci intptr_t my, int width) \ 31cabdff1aSopenharmony_ci{ \ 32cabdff1aSopenharmony_ci int x, y; \ 33cabdff1aSopenharmony_ci pixel *src = (pixel*)_src - 3; \ 34cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 35cabdff1aSopenharmony_ci double ftmp[15]; \ 36cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 37cabdff1aSopenharmony_ci const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \ 38cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 39cabdff1aSopenharmony_ci \ 40cabdff1aSopenharmony_ci x = x_step; \ 41cabdff1aSopenharmony_ci y = height; \ 42cabdff1aSopenharmony_ci __asm__ volatile( \ 43cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 44cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 45cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 46cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 47cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 48cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 49cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 50cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 51cabdff1aSopenharmony_ci \ 52cabdff1aSopenharmony_ci "1: \n\t" \ 53cabdff1aSopenharmony_ci "2: \n\t" \ 54cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 55cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 56cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 57cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 58cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 59cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 60cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 61cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 62cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp7], %[ftmp8] \n\t" \ 63cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 64cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 65cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 66cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 67cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp7], %[ftmp8] \n\t" \ 68cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 69cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 70cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 71cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 72cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp7], %[ftmp8] \n\t" \ 73cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 74cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ 75cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 76cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 77cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp7], %[ftmp8] \n\t" \ 78cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 79cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 80cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 81cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 82cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 83cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[dst], 0x00) \ 84cabdff1aSopenharmony_ci \ 85cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 86cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 87cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x08 \n\t" \ 88cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 89cabdff1aSopenharmony_ci \ 90cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 91cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 92cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 93cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 94cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 95cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x80 \n\t" \ 96cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 97cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 98cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 99cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 100cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 101cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 102cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 103cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]), \ 104cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), [y]"+&r"(y), \ 105cabdff1aSopenharmony_ci [x]"+&r"(x) \ 106cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 107cabdff1aSopenharmony_ci : "memory" \ 108cabdff1aSopenharmony_ci ); \ 109cabdff1aSopenharmony_ci} 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(4, 1, -4, -8); 112cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(8, 2, -8, -16); 113cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(12, 3, -12, -24); 114cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(16, 4, -16, -32); 115cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(24, 6, -24, -48); 116cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(32, 8, -32, -64); 117cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(48, 12, -48, -96); 118cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(64, 16, -64, -128); 119cabdff1aSopenharmony_ci 120cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_HV(w, x_step, src_step, dst_step) \ 121cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src, \ 122cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 123cabdff1aSopenharmony_ci int height, intptr_t mx, \ 124cabdff1aSopenharmony_ci intptr_t my, int width) \ 125cabdff1aSopenharmony_ci{ \ 126cabdff1aSopenharmony_ci int x, y; \ 127cabdff1aSopenharmony_ci const int8_t *filter; \ 128cabdff1aSopenharmony_ci pixel *src = (pixel*)_src; \ 129cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 130cabdff1aSopenharmony_ci int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ 131cabdff1aSopenharmony_ci int16_t *tmp = tmp_array; \ 132cabdff1aSopenharmony_ci double ftmp[15]; \ 133cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 134cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 135cabdff1aSopenharmony_ci \ 136cabdff1aSopenharmony_ci src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ 137cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[mx - 1]; \ 138cabdff1aSopenharmony_ci x = x_step; \ 139cabdff1aSopenharmony_ci y = height + QPEL_EXTRA; \ 140cabdff1aSopenharmony_ci __asm__ volatile( \ 141cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 142cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 143cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 144cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 145cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 146cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 147cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 148cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 149cabdff1aSopenharmony_ci \ 150cabdff1aSopenharmony_ci "1: \n\t" \ 151cabdff1aSopenharmony_ci "2: \n\t" \ 152cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 153cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 154cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 155cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 156cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 157cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 158cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 159cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 160cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp7], %[ftmp8] \n\t" \ 161cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 162cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 163cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 164cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 165cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp7], %[ftmp8] \n\t" \ 166cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 167cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 168cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 169cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 170cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp7], %[ftmp8] \n\t" \ 171cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 172cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ 173cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 174cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 175cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp7], %[ftmp8] \n\t" \ 176cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 177cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 178cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 179cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 180cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 181cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[tmp], 0x00) \ 182cabdff1aSopenharmony_ci \ 183cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 184cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 185cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 186cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 187cabdff1aSopenharmony_ci \ 188cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 189cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 190cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 191cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #dst_step " \n\t" \ 192cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 193cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 194cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 195cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 196cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 197cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 198cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 199cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 200cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 201cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]), \ 202cabdff1aSopenharmony_ci [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y), \ 203cabdff1aSopenharmony_ci [x]"+&r"(x) \ 204cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 205cabdff1aSopenharmony_ci : "memory" \ 206cabdff1aSopenharmony_ci ); \ 207cabdff1aSopenharmony_ci \ 208cabdff1aSopenharmony_ci tmp = tmp_array + QPEL_EXTRA_BEFORE * 4 -12; \ 209cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[my - 1]; \ 210cabdff1aSopenharmony_ci x = x_step; \ 211cabdff1aSopenharmony_ci y = height; \ 212cabdff1aSopenharmony_ci __asm__ volatile( \ 213cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 214cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 215cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 216cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 217cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 218cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 219cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 220cabdff1aSopenharmony_ci "li %[rtmp0], 0x06 \n\t" \ 221cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 222cabdff1aSopenharmony_ci \ 223cabdff1aSopenharmony_ci "1: \n\t" \ 224cabdff1aSopenharmony_ci "2: \n\t" \ 225cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[tmp], 0x00) \ 226cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 227cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[tmp], 0x00) \ 228cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 229cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[tmp], 0x00) \ 230cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 231cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[tmp], 0x00) \ 232cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 233cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp7], %[tmp], 0x00) \ 234cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 235cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp8], %[tmp], 0x00) \ 236cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 237cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp9], %[tmp], 0x00) \ 238cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 239cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp10], %[tmp], 0x00) \ 240cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], -0x380 \n\t" \ 241cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 242cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 243cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10], \ 244cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 245cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp3], %[ftmp1] \n\t" \ 246cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp7], %[ftmp2] \n\t" \ 247cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp4], %[ftmp1] \n\t" \ 248cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp8], %[ftmp2] \n\t" \ 249cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 250cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 251cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4]) \ 252cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 253cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 254cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp5], %[ftmp1] \n\t" \ 255cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp9], %[ftmp2] \n\t" \ 256cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp6], %[ftmp1] \n\t" \ 257cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp10], %[ftmp2] \n\t" \ 258cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 259cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 260cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6]) \ 261cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 262cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 263cabdff1aSopenharmony_ci "packsswh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 264cabdff1aSopenharmony_ci MMI_USDC1(%[ftmp3], %[dst], 0x00) \ 265cabdff1aSopenharmony_ci \ 266cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 267cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x08 \n\t" \ 268cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 269cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 270cabdff1aSopenharmony_ci \ 271cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 272cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 273cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 274cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #dst_step " \n\t" \ 275cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x80 \n\t" \ 276cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 277cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 278cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 279cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 280cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 281cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 282cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 283cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 284cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \ 285cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \ 286cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [rtmp0]"=&r"(rtmp[0]), \ 287cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), \ 288cabdff1aSopenharmony_ci [x]"+&r"(x) \ 289cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 290cabdff1aSopenharmony_ci : "memory" \ 291cabdff1aSopenharmony_ci ); \ 292cabdff1aSopenharmony_ci} 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(4, 1, -4, -8); 295cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(8, 2, -8, -16); 296cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(12, 3, -12, -24); 297cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(16, 4, -16, -32); 298cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(24, 6, -24, -48); 299cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(32, 8, -32, -64); 300cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(48, 12, -48, -96); 301cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(64, 16, -64, -128); 302cabdff1aSopenharmony_ci 303cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_BI_H(w, x_step, src_step, src2_step, dst_step) \ 304cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \ 305cabdff1aSopenharmony_ci ptrdiff_t _dststride, \ 306cabdff1aSopenharmony_ci uint8_t *_src, \ 307cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 308cabdff1aSopenharmony_ci int16_t *src2, int height, \ 309cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, \ 310cabdff1aSopenharmony_ci int width) \ 311cabdff1aSopenharmony_ci{ \ 312cabdff1aSopenharmony_ci int x, y; \ 313cabdff1aSopenharmony_ci pixel *src = (pixel*)_src - 3; \ 314cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 315cabdff1aSopenharmony_ci pixel *dst = (pixel *)_dst; \ 316cabdff1aSopenharmony_ci ptrdiff_t dststride = _dststride / sizeof(pixel); \ 317cabdff1aSopenharmony_ci const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \ 318cabdff1aSopenharmony_ci double ftmp[20]; \ 319cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 320cabdff1aSopenharmony_ci union av_intfloat64 shift; \ 321cabdff1aSopenharmony_ci union av_intfloat64 offset; \ 322cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 323cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 324cabdff1aSopenharmony_ci shift.i = 7; \ 325cabdff1aSopenharmony_ci offset.i = 64; \ 326cabdff1aSopenharmony_ci \ 327cabdff1aSopenharmony_ci x = width >> 2; \ 328cabdff1aSopenharmony_ci y = height; \ 329cabdff1aSopenharmony_ci __asm__ volatile( \ 330cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 331cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 332cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 333cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 334cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 335cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 336cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 337cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 338cabdff1aSopenharmony_ci "punpcklhw %[offset], %[offset], %[offset] \n\t" \ 339cabdff1aSopenharmony_ci "punpcklwd %[offset], %[offset], %[offset] \n\t" \ 340cabdff1aSopenharmony_ci \ 341cabdff1aSopenharmony_ci "1: \n\t" \ 342cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 343cabdff1aSopenharmony_ci "2: \n\t" \ 344cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 345cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 346cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 347cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 348cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 349cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 350cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 351cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 352cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp7], %[ftmp8] \n\t" \ 353cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 354cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 355cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 356cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 357cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp7], %[ftmp8] \n\t" \ 358cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 359cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 360cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 361cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 362cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp7], %[ftmp8] \n\t" \ 363cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 364cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ 365cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 366cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 367cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp7], %[ftmp8] \n\t" \ 368cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 369cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 370cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 371cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 372cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 373cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[offset] \n\t" \ 374cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src2], 0x00) \ 375cabdff1aSopenharmony_ci "li %[rtmp0], 0x10 \n\t" \ 376cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp8] \n\t" \ 377cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp0], %[ftmp3] \n\t" \ 378cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp0], %[ftmp3] \n\t" \ 379cabdff1aSopenharmony_ci "punpckhhw %[ftmp3], %[ftmp0], %[ftmp4] \n\t" \ 380cabdff1aSopenharmony_ci "punpcklhw %[ftmp4], %[ftmp0], %[ftmp4] \n\t" \ 381cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t" \ 382cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 383cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t" \ 384cabdff1aSopenharmony_ci "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t" \ 385cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 386cabdff1aSopenharmony_ci "paddw %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 387cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[shift] \n\t" \ 388cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[shift] \n\t" \ 389cabdff1aSopenharmony_ci "packsswh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 390cabdff1aSopenharmony_ci "pcmpgth %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 391cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp5], %[ftmp7] \n\t" \ 392cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 393cabdff1aSopenharmony_ci MMI_USWC1(%[ftmp3], %[dst], 0x00) \ 394cabdff1aSopenharmony_ci \ 395cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 396cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 397cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" \ 398cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x08 \n\t" \ 399cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 400cabdff1aSopenharmony_ci \ 401cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 402cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 403cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 404cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], " #src2_step " \n\t" \ 405cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[src_stride] \n\t" \ 406cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" \ 407cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x80 \n\t" \ 408cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 409cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32 \ 410cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 411cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 412cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 413cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 414cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 415cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \ 416cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2), \ 417cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x), \ 418cabdff1aSopenharmony_ci [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \ 419cabdff1aSopenharmony_ci : [src_stride]"r"(srcstride), [dst_stride]"r"(dststride), \ 420cabdff1aSopenharmony_ci [filter]"r"(filter), [shift]"f"(shift.f) \ 421cabdff1aSopenharmony_ci : "memory" \ 422cabdff1aSopenharmony_ci ); \ 423cabdff1aSopenharmony_ci} 424cabdff1aSopenharmony_ci 425cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(4, 1, -4, -8, -4); 426cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(8, 2, -8, -16, -8); 427cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(12, 3, -12, -24, -12); 428cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(16, 4, -16, -32, -16); 429cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(24, 6, -24, -48, -24); 430cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(32, 8, -32, -64, -32); 431cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(48, 12, -48, -96, -48); 432cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(64, 16, -64, -128, -64); 433cabdff1aSopenharmony_ci 434cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_BI_HV(w, x_step, src_step, src2_step, dst_step) \ 435cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \ 436cabdff1aSopenharmony_ci ptrdiff_t _dststride, \ 437cabdff1aSopenharmony_ci uint8_t *_src, \ 438cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 439cabdff1aSopenharmony_ci int16_t *src2, int height, \ 440cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, \ 441cabdff1aSopenharmony_ci int width) \ 442cabdff1aSopenharmony_ci{ \ 443cabdff1aSopenharmony_ci int x, y; \ 444cabdff1aSopenharmony_ci const int8_t *filter; \ 445cabdff1aSopenharmony_ci pixel *src = (pixel*)_src; \ 446cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 447cabdff1aSopenharmony_ci pixel *dst = (pixel *)_dst; \ 448cabdff1aSopenharmony_ci ptrdiff_t dststride = _dststride / sizeof(pixel); \ 449cabdff1aSopenharmony_ci int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ 450cabdff1aSopenharmony_ci int16_t *tmp = tmp_array; \ 451cabdff1aSopenharmony_ci double ftmp[20]; \ 452cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 453cabdff1aSopenharmony_ci union av_intfloat64 shift; \ 454cabdff1aSopenharmony_ci union av_intfloat64 offset; \ 455cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 456cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 457cabdff1aSopenharmony_ci shift.i = 7; \ 458cabdff1aSopenharmony_ci offset.i = 64; \ 459cabdff1aSopenharmony_ci \ 460cabdff1aSopenharmony_ci src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ 461cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[mx - 1]; \ 462cabdff1aSopenharmony_ci x = width >> 2; \ 463cabdff1aSopenharmony_ci y = height + QPEL_EXTRA; \ 464cabdff1aSopenharmony_ci __asm__ volatile( \ 465cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 466cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 467cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 468cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 469cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 470cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 471cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 472cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 473cabdff1aSopenharmony_ci \ 474cabdff1aSopenharmony_ci "1: \n\t" \ 475cabdff1aSopenharmony_ci "2: \n\t" \ 476cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 477cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 478cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 479cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 480cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 481cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 482cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 483cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 484cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp7], %[ftmp8] \n\t" \ 485cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 486cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 487cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 488cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 489cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp7], %[ftmp8] \n\t" \ 490cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 491cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 492cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 493cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 494cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp7], %[ftmp8] \n\t" \ 495cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 496cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ 497cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 498cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 499cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp7], %[ftmp8] \n\t" \ 500cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 501cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 502cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 503cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 504cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 505cabdff1aSopenharmony_ci MMI_USDC1(%[ftmp3], %[tmp], 0x00) \ 506cabdff1aSopenharmony_ci \ 507cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 508cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 509cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 510cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 511cabdff1aSopenharmony_ci \ 512cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 513cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 514cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 515cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #src2_step " \n\t" \ 516cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 517cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 518cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 519cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 520cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 521cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 522cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 523cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 524cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 525cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]), \ 526cabdff1aSopenharmony_ci [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y), \ 527cabdff1aSopenharmony_ci [x]"+&r"(x) \ 528cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 529cabdff1aSopenharmony_ci : "memory" \ 530cabdff1aSopenharmony_ci ); \ 531cabdff1aSopenharmony_ci \ 532cabdff1aSopenharmony_ci tmp = tmp_array; \ 533cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[my - 1]; \ 534cabdff1aSopenharmony_ci x = width >> 2; \ 535cabdff1aSopenharmony_ci y = height; \ 536cabdff1aSopenharmony_ci __asm__ volatile( \ 537cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 538cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 539cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 540cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 541cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 542cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 543cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 544cabdff1aSopenharmony_ci "li %[rtmp0], 0x06 \n\t" \ 545cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 546cabdff1aSopenharmony_ci "punpcklwd %[offset], %[offset], %[offset] \n\t" \ 547cabdff1aSopenharmony_ci \ 548cabdff1aSopenharmony_ci "1: \n\t" \ 549cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 550cabdff1aSopenharmony_ci "2: \n\t" \ 551cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[tmp], 0x00) \ 552cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 553cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[tmp], 0x00) \ 554cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 555cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[tmp], 0x00) \ 556cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 557cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[tmp], 0x00) \ 558cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 559cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp7], %[tmp], 0x00) \ 560cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 561cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp8], %[tmp], 0x00) \ 562cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 563cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp9], %[tmp], 0x00) \ 564cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 565cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp10], %[tmp], 0x00) \ 566cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], -0x380 \n\t" \ 567cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 568cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 569cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10], \ 570cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 571cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp3], %[ftmp1] \n\t" \ 572cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp7], %[ftmp2] \n\t" \ 573cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp4], %[ftmp1] \n\t" \ 574cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp8], %[ftmp2] \n\t" \ 575cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 576cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 577cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4]) \ 578cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 579cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 580cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp5], %[ftmp1] \n\t" \ 581cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp9], %[ftmp2] \n\t" \ 582cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp6], %[ftmp1] \n\t" \ 583cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp10], %[ftmp2] \n\t" \ 584cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 585cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 586cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6]) \ 587cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 588cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 589cabdff1aSopenharmony_ci "packsswh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 590cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src2], 0x00) \ 591cabdff1aSopenharmony_ci "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \ 592cabdff1aSopenharmony_ci "li %[rtmp0], 0x10 \n\t" \ 593cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp8] \n\t" \ 594cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp7], %[ftmp3] \n\t" \ 595cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp7], %[ftmp3] \n\t" \ 596cabdff1aSopenharmony_ci "punpckhhw %[ftmp3], %[ftmp7], %[ftmp4] \n\t" \ 597cabdff1aSopenharmony_ci "punpcklhw %[ftmp4], %[ftmp7], %[ftmp4] \n\t" \ 598cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t" \ 599cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 600cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t" \ 601cabdff1aSopenharmony_ci "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t" \ 602cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 603cabdff1aSopenharmony_ci "paddw %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 604cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[offset] \n\t" \ 605cabdff1aSopenharmony_ci "paddw %[ftmp6], %[ftmp6], %[offset] \n\t" \ 606cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[shift] \n\t" \ 607cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[shift] \n\t" \ 608cabdff1aSopenharmony_ci "packsswh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 609cabdff1aSopenharmony_ci "pcmpgth %[ftmp7], %[ftmp5], %[ftmp7] \n\t" \ 610cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp5], %[ftmp7] \n\t" \ 611cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 612cabdff1aSopenharmony_ci MMI_USWC1(%[ftmp3], %[dst], 0x00) \ 613cabdff1aSopenharmony_ci \ 614cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 615cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x08 \n\t" \ 616cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 617cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" \ 618cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 619cabdff1aSopenharmony_ci \ 620cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 621cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], " #src2_step " \n\t" \ 622cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #src2_step " \n\t" \ 623cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 624cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x80 \n\t" \ 625cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 626cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 627cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 628cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32 \ 629cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 630cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 631cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 632cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 633cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 634cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \ 635cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \ 636cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2), \ 637cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ 638cabdff1aSopenharmony_ci [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \ 639cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(dststride), \ 640cabdff1aSopenharmony_ci [shift]"f"(shift.f) \ 641cabdff1aSopenharmony_ci : "memory" \ 642cabdff1aSopenharmony_ci ); \ 643cabdff1aSopenharmony_ci} 644cabdff1aSopenharmony_ci 645cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(4, 1, -4, -8, -4); 646cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(8, 2, -8, -16, -8); 647cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(12, 3, -12, -24, -12); 648cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(16, 4, -16, -32, -16); 649cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(24, 6, -24, -48, -24); 650cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(32, 8, -32, -64, -32); 651cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(48, 12, -48, -96, -48); 652cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(64, 16, -64, -128, -64); 653cabdff1aSopenharmony_ci 654cabdff1aSopenharmony_ci#define PUT_HEVC_EPEL_BI_HV(w, x_step, src_step, src2_step, dst_step) \ 655cabdff1aSopenharmony_civoid ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \ 656cabdff1aSopenharmony_ci ptrdiff_t _dststride, \ 657cabdff1aSopenharmony_ci uint8_t *_src, \ 658cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 659cabdff1aSopenharmony_ci int16_t *src2, int height, \ 660cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, \ 661cabdff1aSopenharmony_ci int width) \ 662cabdff1aSopenharmony_ci{ \ 663cabdff1aSopenharmony_ci int x, y; \ 664cabdff1aSopenharmony_ci pixel *src = (pixel *)_src; \ 665cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 666cabdff1aSopenharmony_ci pixel *dst = (pixel *)_dst; \ 667cabdff1aSopenharmony_ci ptrdiff_t dststride = _dststride / sizeof(pixel); \ 668cabdff1aSopenharmony_ci const int8_t *filter = ff_hevc_epel_filters[mx - 1]; \ 669cabdff1aSopenharmony_ci int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; \ 670cabdff1aSopenharmony_ci int16_t *tmp = tmp_array; \ 671cabdff1aSopenharmony_ci double ftmp[12]; \ 672cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 673cabdff1aSopenharmony_ci union av_intfloat64 shift; \ 674cabdff1aSopenharmony_ci union av_intfloat64 offset; \ 675cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 676cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 677cabdff1aSopenharmony_ci shift.i = 7; \ 678cabdff1aSopenharmony_ci offset.i = 64; \ 679cabdff1aSopenharmony_ci \ 680cabdff1aSopenharmony_ci src -= (EPEL_EXTRA_BEFORE * srcstride + 1); \ 681cabdff1aSopenharmony_ci x = width >> 2; \ 682cabdff1aSopenharmony_ci y = height + EPEL_EXTRA; \ 683cabdff1aSopenharmony_ci __asm__ volatile( \ 684cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[filter], 0x00) \ 685cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 686cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 687cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 688cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 689cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 690cabdff1aSopenharmony_ci \ 691cabdff1aSopenharmony_ci "1: \n\t" \ 692cabdff1aSopenharmony_ci "2: \n\t" \ 693cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 694cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 695cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 696cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 697cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 698cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" \ 699cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 700cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" \ 701cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 702cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" \ 703cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 704cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 705cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], \ 706cabdff1aSopenharmony_ci %[ftmp6], %[ftmp7], %[ftmp8], %[ftmp9]) \ 707cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 708cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 709cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 710cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[tmp], 0x00) \ 711cabdff1aSopenharmony_ci \ 712cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 713cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 714cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 715cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 716cabdff1aSopenharmony_ci \ 717cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 718cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 719cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 720cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #src2_step " \n\t" \ 721cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 722cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 723cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 724cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 725cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 726cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 727cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 728cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 729cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 730cabdff1aSopenharmony_ci [rtmp0]"=&r"(rtmp[0]), \ 731cabdff1aSopenharmony_ci [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y), \ 732cabdff1aSopenharmony_ci [x]"+&r"(x) \ 733cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 734cabdff1aSopenharmony_ci : "memory" \ 735cabdff1aSopenharmony_ci ); \ 736cabdff1aSopenharmony_ci \ 737cabdff1aSopenharmony_ci tmp = tmp_array; \ 738cabdff1aSopenharmony_ci filter = ff_hevc_epel_filters[my - 1]; \ 739cabdff1aSopenharmony_ci x = width >> 2; \ 740cabdff1aSopenharmony_ci y = height; \ 741cabdff1aSopenharmony_ci __asm__ volatile( \ 742cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[filter], 0x00) \ 743cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 744cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 745cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 746cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 747cabdff1aSopenharmony_ci "li %[rtmp0], 0x06 \n\t" \ 748cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 749cabdff1aSopenharmony_ci "punpcklwd %[offset], %[offset], %[offset] \n\t" \ 750cabdff1aSopenharmony_ci "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" \ 751cabdff1aSopenharmony_ci \ 752cabdff1aSopenharmony_ci "1: \n\t" \ 753cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 754cabdff1aSopenharmony_ci "2: \n\t" \ 755cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[tmp], 0x00) \ 756cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 757cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[tmp], 0x00) \ 758cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 759cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[tmp], 0x00) \ 760cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 761cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[tmp], 0x00) \ 762cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], -0x180 \n\t" \ 763cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 764cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 765cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" \ 766cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t" \ 767cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp3], %[ftmp4]) \ 768cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 769cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 770cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp5], %[ftmp1] \n\t" \ 771cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp6], %[ftmp1] \n\t" \ 772cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp5], %[ftmp6]) \ 773cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 774cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 775cabdff1aSopenharmony_ci "packsswh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 776cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[tmp], 0x02) \ 777cabdff1aSopenharmony_ci "li %[rtmp0], 0x10 \n\t" \ 778cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp8] \n\t" \ 779cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp2], %[ftmp3] \n\t" \ 780cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp2], %[ftmp3] \n\t" \ 781cabdff1aSopenharmony_ci "punpckhhw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \ 782cabdff1aSopenharmony_ci "punpcklhw %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ 783cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t" \ 784cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 785cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t" \ 786cabdff1aSopenharmony_ci "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t" \ 787cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 788cabdff1aSopenharmony_ci "paddw %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 789cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[offset] \n\t" \ 790cabdff1aSopenharmony_ci "paddw %[ftmp6], %[ftmp6], %[offset] \n\t" \ 791cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[shift] \n\t" \ 792cabdff1aSopenharmony_ci "psraw %[ftmp6], %[ftmp6], %[shift] \n\t" \ 793cabdff1aSopenharmony_ci "packsswh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 794cabdff1aSopenharmony_ci "pcmpgth %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 795cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp5], %[ftmp7] \n\t" \ 796cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 797cabdff1aSopenharmony_ci MMI_USWC1(%[ftmp3], %[dst], 0x0) \ 798cabdff1aSopenharmony_ci \ 799cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 800cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x08 \n\t" \ 801cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 802cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" \ 803cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 804cabdff1aSopenharmony_ci \ 805cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 806cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], " #src2_step " \n\t" \ 807cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #src2_step " \n\t" \ 808cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 809cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x80 \n\t" \ 810cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 811cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 812cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 813cabdff1aSopenharmony_ci : RESTRICT_ASM_LOW32 RESTRICT_ASM_ALL64 \ 814cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 815cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 816cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 817cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 818cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 819cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2), \ 820cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ 821cabdff1aSopenharmony_ci [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \ 822cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(dststride), \ 823cabdff1aSopenharmony_ci [shift]"f"(shift.f) \ 824cabdff1aSopenharmony_ci : "memory" \ 825cabdff1aSopenharmony_ci ); \ 826cabdff1aSopenharmony_ci} 827cabdff1aSopenharmony_ci 828cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(4, 1, -4, -8, -4); 829cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(8, 2, -8, -16, -8); 830cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(12, 3, -12, -24, -12); 831cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(16, 4, -16, -32, -16); 832cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(24, 6, -24, -48, -24); 833cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(32, 8, -32, -64, -32); 834cabdff1aSopenharmony_ci 835cabdff1aSopenharmony_ci#define PUT_HEVC_PEL_BI_PIXELS(w, x_step, src_step, dst_step, src2_step) \ 836cabdff1aSopenharmony_civoid ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \ 837cabdff1aSopenharmony_ci ptrdiff_t _dststride, \ 838cabdff1aSopenharmony_ci uint8_t *_src, \ 839cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 840cabdff1aSopenharmony_ci int16_t *src2, int height, \ 841cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, \ 842cabdff1aSopenharmony_ci int width) \ 843cabdff1aSopenharmony_ci{ \ 844cabdff1aSopenharmony_ci int x, y; \ 845cabdff1aSopenharmony_ci pixel *src = (pixel *)_src; \ 846cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 847cabdff1aSopenharmony_ci pixel *dst = (pixel *)_dst; \ 848cabdff1aSopenharmony_ci ptrdiff_t dststride = _dststride / sizeof(pixel); \ 849cabdff1aSopenharmony_ci double ftmp[12]; \ 850cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 851cabdff1aSopenharmony_ci union av_intfloat64 shift; \ 852cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 853cabdff1aSopenharmony_ci shift.i = 7; \ 854cabdff1aSopenharmony_ci \ 855cabdff1aSopenharmony_ci y = height; \ 856cabdff1aSopenharmony_ci x = width >> 3; \ 857cabdff1aSopenharmony_ci __asm__ volatile( \ 858cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 859cabdff1aSopenharmony_ci "li %[rtmp0], 0x06 \n\t" \ 860cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp1] \n\t" \ 861cabdff1aSopenharmony_ci "li %[rtmp0], 0x10 \n\t" \ 862cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp10] \n\t" \ 863cabdff1aSopenharmony_ci "li %[rtmp0], 0x40 \n\t" \ 864cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[offset] \n\t" \ 865cabdff1aSopenharmony_ci "punpcklhw %[offset], %[offset], %[offset] \n\t" \ 866cabdff1aSopenharmony_ci "punpcklwd %[offset], %[offset], %[offset] \n\t" \ 867cabdff1aSopenharmony_ci \ 868cabdff1aSopenharmony_ci "1: \n\t" \ 869cabdff1aSopenharmony_ci "2: \n\t" \ 870cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x00) \ 871cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src2], 0x00) \ 872cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src2], 0x08) \ 873cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" \ 874cabdff1aSopenharmony_ci "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 875cabdff1aSopenharmony_ci "psllh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" \ 876cabdff1aSopenharmony_ci "psllh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ 877cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[offset] \n\t" \ 878cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[offset] \n\t" \ 879cabdff1aSopenharmony_ci "punpcklhw %[ftmp6], %[ftmp4], %[ftmp0] \n\t" \ 880cabdff1aSopenharmony_ci "punpckhhw %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 881cabdff1aSopenharmony_ci "punpcklhw %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 882cabdff1aSopenharmony_ci "punpckhhw %[ftmp9], %[ftmp5], %[ftmp0] \n\t" \ 883cabdff1aSopenharmony_ci "punpcklhw %[ftmp4], %[ftmp0], %[ftmp3] \n\t" \ 884cabdff1aSopenharmony_ci "punpckhhw %[ftmp5], %[ftmp0], %[ftmp3] \n\t" \ 885cabdff1aSopenharmony_ci "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t" \ 886cabdff1aSopenharmony_ci "punpcklhw %[ftmp2], %[ftmp0], %[ftmp2] \n\t" \ 887cabdff1aSopenharmony_ci "psraw %[ftmp2], %[ftmp2], %[ftmp10] \n\t" \ 888cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp10] \n\t" \ 889cabdff1aSopenharmony_ci "psraw %[ftmp4], %[ftmp4], %[ftmp10] \n\t" \ 890cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp10] \n\t" \ 891cabdff1aSopenharmony_ci "paddw %[ftmp2], %[ftmp2], %[ftmp6] \n\t" \ 892cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp7] \n\t" \ 893cabdff1aSopenharmony_ci "paddw %[ftmp4], %[ftmp4], %[ftmp8] \n\t" \ 894cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ 895cabdff1aSopenharmony_ci "psraw %[ftmp2], %[ftmp2], %[shift] \n\t" \ 896cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[shift] \n\t" \ 897cabdff1aSopenharmony_ci "psraw %[ftmp4], %[ftmp4], %[shift] \n\t" \ 898cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[shift] \n\t" \ 899cabdff1aSopenharmony_ci "packsswh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 900cabdff1aSopenharmony_ci "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 901cabdff1aSopenharmony_ci "pcmpgth %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \ 902cabdff1aSopenharmony_ci "pcmpgth %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \ 903cabdff1aSopenharmony_ci "pand %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 904cabdff1aSopenharmony_ci "pand %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 905cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ 906cabdff1aSopenharmony_ci MMI_USDC1(%[ftmp2], %[dst], 0x0) \ 907cabdff1aSopenharmony_ci \ 908cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 909cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x08 \n\t" \ 910cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x08 \n\t" \ 911cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x10 \n\t" \ 912cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 913cabdff1aSopenharmony_ci \ 914cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 915cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 916cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], " #src2_step " \n\t" \ 917cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 918cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 919cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[srcstride] \n\t" \ 920cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" \ 921cabdff1aSopenharmony_ci PTR_ADDIU "%[src2], %[src2], 0x80 \n\t" \ 922cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 923cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 924cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 925cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 926cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 927cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 928cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 929cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]), \ 930cabdff1aSopenharmony_ci [src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src), \ 931cabdff1aSopenharmony_ci [x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0]) \ 932cabdff1aSopenharmony_ci : [dststride]"r"(dststride), [shift]"f"(shift.f), \ 933cabdff1aSopenharmony_ci [srcstride]"r"(srcstride) \ 934cabdff1aSopenharmony_ci : "memory" \ 935cabdff1aSopenharmony_ci ); \ 936cabdff1aSopenharmony_ci} \ 937cabdff1aSopenharmony_ci 938cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(8, 1, -8, -8, -16); 939cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(16, 2, -16, -16, -32); 940cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(24, 3, -24, -24, -48); 941cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(32, 4, -32, -32, -64); 942cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(48, 6, -48, -48, -96); 943cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(64, 8, -64, -64, -128); 944cabdff1aSopenharmony_ci 945cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_UNI_HV(w, x_step, src_step, dst_step, tmp_step) \ 946cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \ 947cabdff1aSopenharmony_ci ptrdiff_t _dststride, \ 948cabdff1aSopenharmony_ci uint8_t *_src, \ 949cabdff1aSopenharmony_ci ptrdiff_t _srcstride, \ 950cabdff1aSopenharmony_ci int height, \ 951cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, \ 952cabdff1aSopenharmony_ci int width) \ 953cabdff1aSopenharmony_ci{ \ 954cabdff1aSopenharmony_ci int x, y; \ 955cabdff1aSopenharmony_ci const int8_t *filter; \ 956cabdff1aSopenharmony_ci pixel *src = (pixel*)_src; \ 957cabdff1aSopenharmony_ci ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ 958cabdff1aSopenharmony_ci pixel *dst = (pixel *)_dst; \ 959cabdff1aSopenharmony_ci ptrdiff_t dststride = _dststride / sizeof(pixel); \ 960cabdff1aSopenharmony_ci int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ 961cabdff1aSopenharmony_ci int16_t *tmp = tmp_array; \ 962cabdff1aSopenharmony_ci double ftmp[20]; \ 963cabdff1aSopenharmony_ci uint64_t rtmp[1]; \ 964cabdff1aSopenharmony_ci union av_intfloat64 shift; \ 965cabdff1aSopenharmony_ci union av_intfloat64 offset; \ 966cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 967cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 968cabdff1aSopenharmony_ci shift.i = 6; \ 969cabdff1aSopenharmony_ci offset.i = 32; \ 970cabdff1aSopenharmony_ci \ 971cabdff1aSopenharmony_ci src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ 972cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[mx - 1]; \ 973cabdff1aSopenharmony_ci x = width >> 2; \ 974cabdff1aSopenharmony_ci y = height + QPEL_EXTRA; \ 975cabdff1aSopenharmony_ci __asm__ volatile( \ 976cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 977cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 978cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 979cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 980cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 981cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 982cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 983cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 984cabdff1aSopenharmony_ci \ 985cabdff1aSopenharmony_ci "1: \n\t" \ 986cabdff1aSopenharmony_ci "2: \n\t" \ 987cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) \ 988cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) \ 989cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[src], 0x02) \ 990cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[src], 0x03) \ 991cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 992cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 993cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 994cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 995cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp7], %[ftmp8] \n\t" \ 996cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" \ 997cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 998cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 999cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 1000cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp7], %[ftmp8] \n\t" \ 1001cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ 1002cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ 1003cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 1004cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 1005cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp7], %[ftmp8] \n\t" \ 1006cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 1007cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ 1008cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" \ 1009cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \ 1010cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp7], %[ftmp8] \n\t" \ 1011cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 1012cabdff1aSopenharmony_ci %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10]) \ 1013cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 1014cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 1015cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 1016cabdff1aSopenharmony_ci MMI_USDC1(%[ftmp3], %[tmp], 0x0) \ 1017cabdff1aSopenharmony_ci \ 1018cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 1019cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x04 \n\t" \ 1020cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 1021cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 1022cabdff1aSopenharmony_ci \ 1023cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 1024cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 1025cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], " #src_step " \n\t" \ 1026cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #tmp_step " \n\t" \ 1027cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 1028cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1029cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 1030cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 \ 1031cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 1032cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 1033cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 1034cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 1035cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 1036cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]), \ 1037cabdff1aSopenharmony_ci [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y), \ 1038cabdff1aSopenharmony_ci [x]"+&r"(x) \ 1039cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(srcstride) \ 1040cabdff1aSopenharmony_ci : "memory" \ 1041cabdff1aSopenharmony_ci ); \ 1042cabdff1aSopenharmony_ci \ 1043cabdff1aSopenharmony_ci tmp = tmp_array; \ 1044cabdff1aSopenharmony_ci filter = ff_hevc_qpel_filters[my - 1]; \ 1045cabdff1aSopenharmony_ci x = width >> 2; \ 1046cabdff1aSopenharmony_ci y = height; \ 1047cabdff1aSopenharmony_ci __asm__ volatile( \ 1048cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[filter], 0x00) \ 1049cabdff1aSopenharmony_ci "li %[rtmp0], 0x08 \n\t" \ 1050cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 1051cabdff1aSopenharmony_ci "punpckhbh %[ftmp2], %[ftmp0], %[ftmp1] \n\t" \ 1052cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 1053cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 1054cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 1055cabdff1aSopenharmony_ci "li %[rtmp0], 0x06 \n\t" \ 1056cabdff1aSopenharmony_ci "dmtc1 %[rtmp0], %[ftmp0] \n\t" \ 1057cabdff1aSopenharmony_ci "punpcklhw %[offset], %[offset], %[offset] \n\t" \ 1058cabdff1aSopenharmony_ci "punpcklwd %[offset], %[offset], %[offset] \n\t" \ 1059cabdff1aSopenharmony_ci \ 1060cabdff1aSopenharmony_ci "1: \n\t" \ 1061cabdff1aSopenharmony_ci "li %[x], " #x_step " \n\t" \ 1062cabdff1aSopenharmony_ci "2: \n\t" \ 1063cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[tmp], 0x00) \ 1064cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1065cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[tmp], 0x00) \ 1066cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1067cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp5], %[tmp], 0x00) \ 1068cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1069cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp6], %[tmp], 0x00) \ 1070cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1071cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp7], %[tmp], 0x00) \ 1072cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1073cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp8], %[tmp], 0x00) \ 1074cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1075cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp9], %[tmp], 0x00) \ 1076cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1077cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp10], %[tmp], 0x00) \ 1078cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], -0x380 \n\t" \ 1079cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], \ 1080cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 1081cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10], \ 1082cabdff1aSopenharmony_ci %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14]) \ 1083cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp3], %[ftmp1] \n\t" \ 1084cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp7], %[ftmp2] \n\t" \ 1085cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp4], %[ftmp1] \n\t" \ 1086cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp8], %[ftmp2] \n\t" \ 1087cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 1088cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 1089cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4]) \ 1090cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 1091cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 1092cabdff1aSopenharmony_ci "pmaddhw %[ftmp11], %[ftmp5], %[ftmp1] \n\t" \ 1093cabdff1aSopenharmony_ci "pmaddhw %[ftmp12], %[ftmp9], %[ftmp2] \n\t" \ 1094cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp6], %[ftmp1] \n\t" \ 1095cabdff1aSopenharmony_ci "pmaddhw %[ftmp14], %[ftmp10], %[ftmp2] \n\t" \ 1096cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" \ 1097cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ftmp14] \n\t" \ 1098cabdff1aSopenharmony_ci TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6]) \ 1099cabdff1aSopenharmony_ci "paddw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 1100cabdff1aSopenharmony_ci "psraw %[ftmp5], %[ftmp5], %[ftmp0] \n\t" \ 1101cabdff1aSopenharmony_ci "packsswh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 1102cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[offset] \n\t" \ 1103cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[shift] \n\t" \ 1104cabdff1aSopenharmony_ci "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \ 1105cabdff1aSopenharmony_ci "pcmpgth %[ftmp7], %[ftmp3], %[ftmp7] \n\t" \ 1106cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp3], %[ftmp7] \n\t" \ 1107cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 1108cabdff1aSopenharmony_ci MMI_USWC1(%[ftmp3], %[dst], 0x00) \ 1109cabdff1aSopenharmony_ci \ 1110cabdff1aSopenharmony_ci "daddi %[x], %[x], -0x01 \n\t" \ 1111cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x08 \n\t" \ 1112cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" \ 1113cabdff1aSopenharmony_ci "bnez %[x], 2b \n\t" \ 1114cabdff1aSopenharmony_ci \ 1115cabdff1aSopenharmony_ci "daddi %[y], %[y], -0x01 \n\t" \ 1116cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], " #tmp_step " \n\t" \ 1117cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], " #dst_step " \n\t" \ 1118cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1119cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp], %[tmp], 0x80 \n\t" \ 1120cabdff1aSopenharmony_ci "bnez %[y], 1b \n\t" \ 1121cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32 \ 1122cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), \ 1123cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), \ 1124cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), \ 1125cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), \ 1126cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ 1127cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \ 1128cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \ 1129cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), \ 1130cabdff1aSopenharmony_ci [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ 1131cabdff1aSopenharmony_ci [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \ 1132cabdff1aSopenharmony_ci : [filter]"r"(filter), [stride]"r"(dststride), \ 1133cabdff1aSopenharmony_ci [shift]"f"(shift.f) \ 1134cabdff1aSopenharmony_ci : "memory" \ 1135cabdff1aSopenharmony_ci ); \ 1136cabdff1aSopenharmony_ci} 1137cabdff1aSopenharmony_ci 1138cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(4, 1, -4, -4, -8); 1139cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(8, 2, -8, -8, -16); 1140cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(12, 3, -12, -12, -24); 1141cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(16, 4, -16, -16, -32); 1142cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(24, 6, -24, -24, -48); 1143cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(32, 8, -32, -32, -64); 1144cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(48, 12, -48, -48, -96); 1145cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(64, 16, -64, -64, -128); 1146