1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2019 Shiyou Yin (yinshiyou-hf@loongson.cn)
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavcodec/hevcdec.h"
22cabdff1aSopenharmony_ci#include "libavcodec/bit_depth_template.c"
23cabdff1aSopenharmony_ci#include "libavcodec/mips/hevcdsp_mips.h"
24cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_H(w, x_step, src_step, dst_step)                   \
27cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
28cabdff1aSopenharmony_ci                                        ptrdiff_t _srcstride,            \
29cabdff1aSopenharmony_ci                                        int height, intptr_t mx,         \
30cabdff1aSopenharmony_ci                                        intptr_t my, int width)          \
31cabdff1aSopenharmony_ci{                                                                        \
32cabdff1aSopenharmony_ci    int x, y;                                                            \
33cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src - 3;                                       \
34cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                    \
35cabdff1aSopenharmony_ci    double ftmp[15];                                                     \
36cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                    \
37cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_qpel_filters[mx - 1];                 \
38cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                   \
39cabdff1aSopenharmony_ci                                                                         \
40cabdff1aSopenharmony_ci    x = x_step;                                                          \
41cabdff1aSopenharmony_ci    y = height;                                                          \
42cabdff1aSopenharmony_ci    __asm__ volatile(                                                    \
43cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
44cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"    \
45cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
46cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
47cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
48cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
49cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
50cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"    \
51cabdff1aSopenharmony_ci                                                                         \
52cabdff1aSopenharmony_ci        "1:                                                     \n\t"    \
53cabdff1aSopenharmony_ci        "2:                                                     \n\t"    \
54cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                                \
55cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                                \
56cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                                \
57cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                                \
58cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
59cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
60cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
61cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
62cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"    \
63cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"    \
64cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"    \
65cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
66cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
67cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"    \
68cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"    \
69cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"    \
70cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
71cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
72cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"    \
73cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"    \
74cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"    \
75cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
76cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
77cabdff1aSopenharmony_ci        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"    \
78cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
79cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])            \
80cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
81cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
82cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
83cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[dst], 0x00)                                \
84cabdff1aSopenharmony_ci                                                                         \
85cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"    \
86cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
87cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"    \
88cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"    \
89cabdff1aSopenharmony_ci                                                                         \
90cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"    \
91cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"    \
92cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"    \
93cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"    \
94cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
95cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
96cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"    \
97cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                             \
98cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
99cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
100cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
101cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
102cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
103cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),                \
104cabdff1aSopenharmony_ci          [src]"+&r"(src), [dst]"+&r"(dst), [y]"+&r"(y),                 \
105cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                    \
106cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
107cabdff1aSopenharmony_ci        : "memory"                                                       \
108cabdff1aSopenharmony_ci    );                                                                   \
109cabdff1aSopenharmony_ci}
110cabdff1aSopenharmony_ci
111cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(4, 1, -4, -8);
112cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(8, 2, -8, -16);
113cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(12, 3, -12, -24);
114cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(16, 4, -16, -32);
115cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(24, 6, -24, -48);
116cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(32, 8, -32, -64);
117cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(48, 12, -48, -96);
118cabdff1aSopenharmony_ciPUT_HEVC_QPEL_H(64, 16, -64, -128);
119cabdff1aSopenharmony_ci
120cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_HV(w, x_step, src_step, dst_step)                  \
121cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
122cabdff1aSopenharmony_ci                                     ptrdiff_t _srcstride,               \
123cabdff1aSopenharmony_ci                                     int height, intptr_t mx,            \
124cabdff1aSopenharmony_ci                                     intptr_t my, int width)             \
125cabdff1aSopenharmony_ci{                                                                        \
126cabdff1aSopenharmony_ci    int x, y;                                                            \
127cabdff1aSopenharmony_ci    const int8_t *filter;                                                \
128cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;                                           \
129cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                    \
130cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];         \
131cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;                                            \
132cabdff1aSopenharmony_ci    double ftmp[15];                                                     \
133cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                    \
134cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                   \
135cabdff1aSopenharmony_ci                                                                         \
136cabdff1aSopenharmony_ci    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                        \
137cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];                               \
138cabdff1aSopenharmony_ci    x = x_step;                                                          \
139cabdff1aSopenharmony_ci    y = height + QPEL_EXTRA;                                             \
140cabdff1aSopenharmony_ci    __asm__ volatile(                                                    \
141cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
142cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"    \
143cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
144cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
145cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
146cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
147cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
148cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"    \
149cabdff1aSopenharmony_ci                                                                         \
150cabdff1aSopenharmony_ci        "1:                                                     \n\t"    \
151cabdff1aSopenharmony_ci        "2:                                                     \n\t"    \
152cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                                \
153cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                                \
154cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                                \
155cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                                \
156cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
157cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
158cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
159cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
160cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"    \
161cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"    \
162cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"    \
163cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
164cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
165cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"    \
166cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"    \
167cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"    \
168cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
169cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
170cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"    \
171cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"    \
172cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"    \
173cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
174cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
175cabdff1aSopenharmony_ci        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"    \
176cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
177cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])            \
178cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
179cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
180cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
181cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                                \
182cabdff1aSopenharmony_ci                                                                         \
183cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"    \
184cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
185cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"    \
186cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"    \
187cabdff1aSopenharmony_ci                                                                         \
188cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"    \
189cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"    \
190cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"    \
191cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #dst_step "  \n\t"    \
192cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
193cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
194cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"    \
195cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                             \
196cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
197cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
198cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
199cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
200cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
201cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),                \
202cabdff1aSopenharmony_ci          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                 \
203cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                    \
204cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
205cabdff1aSopenharmony_ci        : "memory"                                                       \
206cabdff1aSopenharmony_ci    );                                                                   \
207cabdff1aSopenharmony_ci                                                                         \
208cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * 4 -12;                      \
209cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];                               \
210cabdff1aSopenharmony_ci    x = x_step;                                                          \
211cabdff1aSopenharmony_ci    y = height;                                                          \
212cabdff1aSopenharmony_ci    __asm__ volatile(                                                    \
213cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
214cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"    \
215cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
216cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
217cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
218cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
219cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
220cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x06                       \n\t"    \
221cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
222cabdff1aSopenharmony_ci                                                                         \
223cabdff1aSopenharmony_ci        "1:                                                     \n\t"    \
224cabdff1aSopenharmony_ci        "2:                                                     \n\t"    \
225cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                                \
226cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
227cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                                \
228cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
229cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                                \
230cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
231cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                                \
232cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
233cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                                \
234cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
235cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                                \
236cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
237cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                                \
238cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
239cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                               \
240cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"    \
241cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
242cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])         \
243cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],            \
244cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])         \
245cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"    \
246cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"    \
247cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"    \
248cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"    \
249cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"    \
250cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"    \
251cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])           \
252cabdff1aSopenharmony_ci        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
253cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"    \
254cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"    \
255cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"    \
256cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"    \
257cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"    \
258cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"    \
259cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"    \
260cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])           \
261cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
262cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"    \
263cabdff1aSopenharmony_ci        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
264cabdff1aSopenharmony_ci        MMI_USDC1(%[ftmp3], %[dst], 0x00)                               \
265cabdff1aSopenharmony_ci                                                                         \
266cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"    \
267cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"    \
268cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"    \
269cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"    \
270cabdff1aSopenharmony_ci                                                                         \
271cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"    \
272cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"    \
273cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"    \
274cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #dst_step "  \n\t"    \
275cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
276cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
277cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"    \
278cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                             \
279cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
280cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
281cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
282cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
283cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
284cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),              \
285cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),              \
286cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]), [rtmp0]"=&r"(rtmp[0]),                \
287cabdff1aSopenharmony_ci          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y),                 \
288cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                    \
289cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
290cabdff1aSopenharmony_ci        : "memory"                                                       \
291cabdff1aSopenharmony_ci    );                                                                   \
292cabdff1aSopenharmony_ci}
293cabdff1aSopenharmony_ci
294cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(4, 1, -4, -8);
295cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(8, 2, -8, -16);
296cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(12, 3, -12, -24);
297cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(16, 4, -16, -32);
298cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(24, 6, -24, -48);
299cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(32, 8, -32, -64);
300cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(48, 12, -48, -96);
301cabdff1aSopenharmony_ciPUT_HEVC_QPEL_HV(64, 16, -64, -128);
302cabdff1aSopenharmony_ci
303cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_BI_H(w, x_step, src_step, src2_step, dst_step)    \
304cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
305cabdff1aSopenharmony_ci                                           ptrdiff_t _dststride,        \
306cabdff1aSopenharmony_ci                                           uint8_t *_src,               \
307cabdff1aSopenharmony_ci                                           ptrdiff_t _srcstride,        \
308cabdff1aSopenharmony_ci                                           int16_t *src2, int height,   \
309cabdff1aSopenharmony_ci                                           intptr_t mx, intptr_t my,    \
310cabdff1aSopenharmony_ci                                           int width)                   \
311cabdff1aSopenharmony_ci{                                                                       \
312cabdff1aSopenharmony_ci    int x, y;                                                           \
313cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src - 3;                         \
314cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);               \
315cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;                                \
316cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
317cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];             \
318cabdff1aSopenharmony_ci    double ftmp[20];                                                    \
319cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                   \
320cabdff1aSopenharmony_ci    union av_intfloat64 shift;                                          \
321cabdff1aSopenharmony_ci    union av_intfloat64 offset;                                         \
322cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                  \
323cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                  \
324cabdff1aSopenharmony_ci    shift.i = 7;                                                        \
325cabdff1aSopenharmony_ci    offset.i = 64;                                                      \
326cabdff1aSopenharmony_ci                                                                        \
327cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
328cabdff1aSopenharmony_ci    y = height;                                                         \
329cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
330cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
331cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
332cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
333cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
334cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
335cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
336cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
337cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
338cabdff1aSopenharmony_ci        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"   \
339cabdff1aSopenharmony_ci        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
340cabdff1aSopenharmony_ci                                                                        \
341cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
342cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
343cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
344cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
345cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
346cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
347cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
348cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
349cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
350cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
351cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
352cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
353cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
354cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
355cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
356cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
357cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
358cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
359cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
360cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
361cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
362cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
363cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
364cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
365cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
366cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
367cabdff1aSopenharmony_ci        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
368cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
369cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
370cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
371cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
372cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
373cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[offset]   \n\t"   \
374cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src2], 0x00)                              \
375cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x10                       \n\t"   \
376cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
377cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp5],      %[ftmp0],      %[ftmp3]    \n\t"   \
378cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp6],      %[ftmp0],      %[ftmp3]    \n\t"   \
379cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp3],      %[ftmp0],      %[ftmp4]    \n\t"   \
380cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp4],      %[ftmp0],      %[ftmp4]    \n\t"   \
381cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
382cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
383cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
384cabdff1aSopenharmony_ci        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
385cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
386cabdff1aSopenharmony_ci        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
387cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
388cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
389cabdff1aSopenharmony_ci        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
390cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
391cabdff1aSopenharmony_ci        "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
392cabdff1aSopenharmony_ci        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
393cabdff1aSopenharmony_ci        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
394cabdff1aSopenharmony_ci                                                                        \
395cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
396cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
397cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
398cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
399cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
400cabdff1aSopenharmony_ci                                                                        \
401cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
402cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"   \
403cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
404cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
405cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],    %[src_stride]   \n\t"   \
406cabdff1aSopenharmony_ci        PTR_ADDU     "%[dst],        %[dst],    %[dst_stride]   \n\t"   \
407cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
408cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
409cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
410cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
411cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
412cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
413cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
414cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
415cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
416cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2),                   \
417cabdff1aSopenharmony_ci          [dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x),   \
418cabdff1aSopenharmony_ci          [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0])                \
419cabdff1aSopenharmony_ci        : [src_stride]"r"(srcstride), [dst_stride]"r"(dststride),       \
420cabdff1aSopenharmony_ci          [filter]"r"(filter), [shift]"f"(shift.f)                      \
421cabdff1aSopenharmony_ci        : "memory"                                                      \
422cabdff1aSopenharmony_ci    );                                                                  \
423cabdff1aSopenharmony_ci}
424cabdff1aSopenharmony_ci
425cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(4, 1, -4, -8, -4);
426cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(8, 2, -8, -16, -8);
427cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(12, 3, -12, -24, -12);
428cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(16, 4, -16, -32, -16);
429cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(24, 6, -24, -48, -24);
430cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(32, 8, -32, -64, -32);
431cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(48, 12, -48, -96, -48);
432cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_H(64, 16, -64, -128, -64);
433cabdff1aSopenharmony_ci
434cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_BI_HV(w, x_step, src_step, src2_step, dst_step)   \
435cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
436cabdff1aSopenharmony_ci                                            ptrdiff_t _dststride,       \
437cabdff1aSopenharmony_ci                                            uint8_t *_src,              \
438cabdff1aSopenharmony_ci                                            ptrdiff_t _srcstride,       \
439cabdff1aSopenharmony_ci                                            int16_t *src2, int height,  \
440cabdff1aSopenharmony_ci                                            intptr_t mx, intptr_t my,   \
441cabdff1aSopenharmony_ci                                            int width)                  \
442cabdff1aSopenharmony_ci{                                                                       \
443cabdff1aSopenharmony_ci    int x, y;                                                           \
444cabdff1aSopenharmony_ci    const int8_t *filter;                                               \
445cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;                                          \
446cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
447cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;                                \
448cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
449cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];        \
450cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;                                           \
451cabdff1aSopenharmony_ci    double ftmp[20];                                                    \
452cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                   \
453cabdff1aSopenharmony_ci    union av_intfloat64 shift;                                          \
454cabdff1aSopenharmony_ci    union av_intfloat64 offset;                                         \
455cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                  \
456cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                  \
457cabdff1aSopenharmony_ci    shift.i = 7;                                                        \
458cabdff1aSopenharmony_ci    offset.i = 64;                                                      \
459cabdff1aSopenharmony_ci                                                                        \
460cabdff1aSopenharmony_ci    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
461cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];                              \
462cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
463cabdff1aSopenharmony_ci    y = height + QPEL_EXTRA;                                            \
464cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
465cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
466cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
467cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
468cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
469cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
470cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
471cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
472cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
473cabdff1aSopenharmony_ci                                                                        \
474cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
475cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
476cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
477cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
478cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
479cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
480cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
481cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
482cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
483cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
484cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
485cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
486cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
487cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
488cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
489cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
490cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
491cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
492cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
493cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
494cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
495cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
496cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
497cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
498cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
499cabdff1aSopenharmony_ci        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
500cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
501cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
502cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
503cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
504cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
505cabdff1aSopenharmony_ci        MMI_USDC1(%[ftmp3], %[tmp], 0x00)                               \
506cabdff1aSopenharmony_ci                                                                        \
507cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
508cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
509cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
510cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
511cabdff1aSopenharmony_ci                                                                        \
512cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
513cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
514cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
515cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
516cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
517cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
518cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
519cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                            \
520cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
521cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
522cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
523cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
524cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
525cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),               \
526cabdff1aSopenharmony_ci          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
527cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                   \
528cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
529cabdff1aSopenharmony_ci        : "memory"                                                      \
530cabdff1aSopenharmony_ci    );                                                                  \
531cabdff1aSopenharmony_ci                                                                        \
532cabdff1aSopenharmony_ci    tmp    = tmp_array;                                                 \
533cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];                              \
534cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
535cabdff1aSopenharmony_ci    y = height;                                                         \
536cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
537cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
538cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
539cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
540cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
541cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
542cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
543cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
544cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x06                       \n\t"   \
545cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
546cabdff1aSopenharmony_ci        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
547cabdff1aSopenharmony_ci                                                                        \
548cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
549cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
550cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
551cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
552cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
553cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
554cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
555cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
556cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
557cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
558cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
559cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                               \
560cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
561cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                               \
562cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
563cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                               \
564cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
565cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                              \
566cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
567cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
568cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
569cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],           \
570cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
571cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"   \
572cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"   \
573cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"   \
574cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"   \
575cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
576cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
577cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])          \
578cabdff1aSopenharmony_ci        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
579cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
580cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"   \
581cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"   \
582cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"   \
583cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"   \
584cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
585cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
586cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])          \
587cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
588cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
589cabdff1aSopenharmony_ci        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
590cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src2], 0x00)                              \
591cabdff1aSopenharmony_ci        "pxor         %[ftmp7],      %[ftmp7],      %[ftmp7]    \n\t"   \
592cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x10                       \n\t"   \
593cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
594cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp5],      %[ftmp7],      %[ftmp3]    \n\t"   \
595cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp6],      %[ftmp7],      %[ftmp3]    \n\t"   \
596cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp3],      %[ftmp7],      %[ftmp4]    \n\t"   \
597cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp4],      %[ftmp7],      %[ftmp4]    \n\t"   \
598cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
599cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
600cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
601cabdff1aSopenharmony_ci        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
602cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
603cabdff1aSopenharmony_ci        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
604cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"   \
605cabdff1aSopenharmony_ci        "paddw        %[ftmp6],      %[ftmp6],      %[offset]   \n\t"   \
606cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
607cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
608cabdff1aSopenharmony_ci        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
609cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp7]    \n\t"   \
610cabdff1aSopenharmony_ci        "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
611cabdff1aSopenharmony_ci        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
612cabdff1aSopenharmony_ci        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
613cabdff1aSopenharmony_ci                                                                        \
614cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
615cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
616cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
617cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
618cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
619cabdff1aSopenharmony_ci                                                                        \
620cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
621cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
622cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
623cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
624cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
625cabdff1aSopenharmony_ci        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
626cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
627cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
628cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
629cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
630cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
631cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
632cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
633cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
634cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
635cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),             \
636cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2),                   \
637cabdff1aSopenharmony_ci          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
638cabdff1aSopenharmony_ci          [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0])                \
639cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(dststride),                  \
640cabdff1aSopenharmony_ci          [shift]"f"(shift.f)                                           \
641cabdff1aSopenharmony_ci        : "memory"                                                      \
642cabdff1aSopenharmony_ci    );                                                                  \
643cabdff1aSopenharmony_ci}
644cabdff1aSopenharmony_ci
645cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(4, 1, -4, -8, -4);
646cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(8, 2, -8, -16, -8);
647cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(12, 3, -12, -24, -12);
648cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(16, 4, -16, -32, -16);
649cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(24, 6, -24, -48, -24);
650cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(32, 8, -32, -64, -32);
651cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(48, 12, -48, -96, -48);
652cabdff1aSopenharmony_ciPUT_HEVC_QPEL_BI_HV(64, 16, -64, -128, -64);
653cabdff1aSopenharmony_ci
654cabdff1aSopenharmony_ci#define PUT_HEVC_EPEL_BI_HV(w, x_step, src_step, src2_step, dst_step)   \
655cabdff1aSopenharmony_civoid ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
656cabdff1aSopenharmony_ci                                            ptrdiff_t _dststride,       \
657cabdff1aSopenharmony_ci                                            uint8_t *_src,              \
658cabdff1aSopenharmony_ci                                            ptrdiff_t _srcstride,       \
659cabdff1aSopenharmony_ci                                            int16_t *src2, int height,  \
660cabdff1aSopenharmony_ci                                            intptr_t mx, intptr_t my,   \
661cabdff1aSopenharmony_ci                                            int width)                  \
662cabdff1aSopenharmony_ci{                                                                       \
663cabdff1aSopenharmony_ci    int x, y;                                                           \
664cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;                                         \
665cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
666cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;                                \
667cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
668cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];                \
669cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];        \
670cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;                                           \
671cabdff1aSopenharmony_ci    double  ftmp[12];                                                   \
672cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                   \
673cabdff1aSopenharmony_ci    union av_intfloat64 shift;                                          \
674cabdff1aSopenharmony_ci    union av_intfloat64 offset;                                         \
675cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                  \
676cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                  \
677cabdff1aSopenharmony_ci    shift.i = 7;                                                        \
678cabdff1aSopenharmony_ci    offset.i = 64;                                                      \
679cabdff1aSopenharmony_ci                                                                        \
680cabdff1aSopenharmony_ci    src -= (EPEL_EXTRA_BEFORE * srcstride + 1);                         \
681cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
682cabdff1aSopenharmony_ci    y = height + EPEL_EXTRA;                                            \
683cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
684cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[filter], 0x00)                             \
685cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
686cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
687cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
688cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
689cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
690cabdff1aSopenharmony_ci                                                                        \
691cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
692cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
693cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
694cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
695cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
696cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
697cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
698cabdff1aSopenharmony_ci        "pmullh       %[ftmp2],      %[ftmp2],      %[ftmp1]    \n\t"   \
699cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
700cabdff1aSopenharmony_ci        "pmullh       %[ftmp3],      %[ftmp3],      %[ftmp1]    \n\t"   \
701cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp4],      %[ftmp4],      %[ftmp0]    \n\t"   \
702cabdff1aSopenharmony_ci        "pmullh       %[ftmp4],      %[ftmp4],      %[ftmp1]    \n\t"   \
703cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
704cabdff1aSopenharmony_ci        "pmullh       %[ftmp5],      %[ftmp5],      %[ftmp1]    \n\t"   \
705cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5],            \
706cabdff1aSopenharmony_ci                     %[ftmp6], %[ftmp7], %[ftmp8], %[ftmp9])            \
707cabdff1aSopenharmony_ci        "paddh        %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"   \
708cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"   \
709cabdff1aSopenharmony_ci        "paddh        %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"   \
710cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp2], %[tmp], 0x00)                               \
711cabdff1aSopenharmony_ci                                                                        \
712cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
713cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
714cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
715cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
716cabdff1aSopenharmony_ci                                                                        \
717cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
718cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
719cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
720cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
721cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
722cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
723cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
724cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                            \
725cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
726cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
727cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
728cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
729cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
730cabdff1aSopenharmony_ci          [rtmp0]"=&r"(rtmp[0]),                                        \
731cabdff1aSopenharmony_ci          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
732cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                   \
733cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
734cabdff1aSopenharmony_ci        : "memory"                                                      \
735cabdff1aSopenharmony_ci    );                                                                  \
736cabdff1aSopenharmony_ci                                                                        \
737cabdff1aSopenharmony_ci    tmp      = tmp_array;                                               \
738cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];                              \
739cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
740cabdff1aSopenharmony_ci    y = height;                                                         \
741cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
742cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[filter], 0x00)                             \
743cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
744cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
745cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
746cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
747cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x06                       \n\t"   \
748cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
749cabdff1aSopenharmony_ci        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
750cabdff1aSopenharmony_ci        "pxor         %[ftmp2],      %[ftmp2],      %[ftmp2]    \n\t"   \
751cabdff1aSopenharmony_ci                                                                        \
752cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
753cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
754cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
755cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
756cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
757cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
758cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
759cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
760cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
761cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
762cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],       -0x180       \n\t"   \
763cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
764cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
765cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp7],      %[ftmp3],      %[ftmp1]    \n\t"   \
766cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp8],      %[ftmp4],      %[ftmp1]    \n\t"   \
767cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp3], %[ftmp4])            \
768cabdff1aSopenharmony_ci        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
769cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
770cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp7],      %[ftmp5],      %[ftmp1]    \n\t"   \
771cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp8],      %[ftmp6],      %[ftmp1]    \n\t"   \
772cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp5], %[ftmp6])            \
773cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
774cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
775cabdff1aSopenharmony_ci        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
776cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[tmp], 0x02)                               \
777cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x10                       \n\t"   \
778cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
779cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp5],      %[ftmp2],      %[ftmp3]    \n\t"   \
780cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp6],      %[ftmp2],      %[ftmp3]    \n\t"   \
781cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp3],      %[ftmp2],      %[ftmp4]    \n\t"   \
782cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp4],      %[ftmp2],      %[ftmp4]    \n\t"   \
783cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
784cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
785cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
786cabdff1aSopenharmony_ci        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
787cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
788cabdff1aSopenharmony_ci        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
789cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"   \
790cabdff1aSopenharmony_ci        "paddw        %[ftmp6],      %[ftmp6],      %[offset]   \n\t"   \
791cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
792cabdff1aSopenharmony_ci        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
793cabdff1aSopenharmony_ci        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
794cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp2]    \n\t"   \
795cabdff1aSopenharmony_ci        "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
796cabdff1aSopenharmony_ci        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
797cabdff1aSopenharmony_ci        MMI_USWC1(%[ftmp3], %[dst], 0x0)                                \
798cabdff1aSopenharmony_ci                                                                        \
799cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
800cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
801cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
802cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
803cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
804cabdff1aSopenharmony_ci                                                                        \
805cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
806cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
807cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
808cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
809cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
810cabdff1aSopenharmony_ci        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
811cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
812cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
813cabdff1aSopenharmony_ci        : RESTRICT_ASM_LOW32 RESTRICT_ASM_ALL64                         \
814cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
815cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
816cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
817cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
818cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
819cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2),                   \
820cabdff1aSopenharmony_ci          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
821cabdff1aSopenharmony_ci          [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0])                \
822cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(dststride),                  \
823cabdff1aSopenharmony_ci          [shift]"f"(shift.f)                                           \
824cabdff1aSopenharmony_ci        : "memory"                                                      \
825cabdff1aSopenharmony_ci    );                                                                  \
826cabdff1aSopenharmony_ci}
827cabdff1aSopenharmony_ci
828cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(4, 1, -4, -8, -4);
829cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(8, 2, -8, -16, -8);
830cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(12, 3, -12, -24, -12);
831cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(16, 4, -16, -32, -16);
832cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(24, 6, -24, -48, -24);
833cabdff1aSopenharmony_ciPUT_HEVC_EPEL_BI_HV(32, 8, -32, -64, -32);
834cabdff1aSopenharmony_ci
835cabdff1aSopenharmony_ci#define PUT_HEVC_PEL_BI_PIXELS(w, x_step, src_step, dst_step, src2_step)  \
836cabdff1aSopenharmony_civoid ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
837cabdff1aSopenharmony_ci                                               ptrdiff_t _dststride,      \
838cabdff1aSopenharmony_ci                                               uint8_t *_src,             \
839cabdff1aSopenharmony_ci                                               ptrdiff_t _srcstride,      \
840cabdff1aSopenharmony_ci                                               int16_t *src2, int height, \
841cabdff1aSopenharmony_ci                                               intptr_t mx, intptr_t my,  \
842cabdff1aSopenharmony_ci                                               int width)                 \
843cabdff1aSopenharmony_ci{                                                                         \
844cabdff1aSopenharmony_ci    int x, y;                                                             \
845cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;                                  \
846cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                     \
847cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;                                  \
848cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);                     \
849cabdff1aSopenharmony_ci    double  ftmp[12];                                                     \
850cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                     \
851cabdff1aSopenharmony_ci    union av_intfloat64 shift;                                            \
852cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                    \
853cabdff1aSopenharmony_ci    shift.i = 7;                                                          \
854cabdff1aSopenharmony_ci                                                                          \
855cabdff1aSopenharmony_ci    y = height;                                                           \
856cabdff1aSopenharmony_ci    x = width >> 3;                                                       \
857cabdff1aSopenharmony_ci    __asm__ volatile(                                                     \
858cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"     \
859cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x06                       \n\t"     \
860cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp1]                   \n\t"     \
861cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x10                       \n\t"     \
862cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp10]                  \n\t"     \
863cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x40                       \n\t"     \
864cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[offset]                  \n\t"     \
865cabdff1aSopenharmony_ci        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"     \
866cabdff1aSopenharmony_ci        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"     \
867cabdff1aSopenharmony_ci                                                                          \
868cabdff1aSopenharmony_ci        "1:                                                     \n\t"     \
869cabdff1aSopenharmony_ci        "2:                                                     \n\t"     \
870cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x00)                                 \
871cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp2], %[src2], 0x00)                                \
872cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src2], 0x08)                                \
873cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp4],      %[ftmp5],      %[ftmp0]    \n\t"     \
874cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"     \
875cabdff1aSopenharmony_ci        "psllh        %[ftmp4],      %[ftmp4],      %[ftmp1]    \n\t"     \
876cabdff1aSopenharmony_ci        "psllh        %[ftmp5],      %[ftmp5],      %[ftmp1]    \n\t"     \
877cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp4],      %[offset]   \n\t"     \
878cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"     \
879cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp6],      %[ftmp4],      %[ftmp0]    \n\t"     \
880cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"     \
881cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"     \
882cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp9],      %[ftmp5],      %[ftmp0]    \n\t"     \
883cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp4],      %[ftmp0],      %[ftmp3]    \n\t"     \
884cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp5],      %[ftmp0],      %[ftmp3]    \n\t"     \
885cabdff1aSopenharmony_ci        "punpckhhw    %[ftmp3],      %[ftmp0],      %[ftmp2]    \n\t"     \
886cabdff1aSopenharmony_ci        "punpcklhw    %[ftmp2],      %[ftmp0],      %[ftmp2]    \n\t"     \
887cabdff1aSopenharmony_ci        "psraw        %[ftmp2],      %[ftmp2],      %[ftmp10]   \n\t"     \
888cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp10]   \n\t"     \
889cabdff1aSopenharmony_ci        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp10]   \n\t"     \
890cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp10]   \n\t"     \
891cabdff1aSopenharmony_ci        "paddw        %[ftmp2],      %[ftmp2],      %[ftmp6]    \n\t"     \
892cabdff1aSopenharmony_ci        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp7]    \n\t"     \
893cabdff1aSopenharmony_ci        "paddw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"     \
894cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp9]    \n\t"     \
895cabdff1aSopenharmony_ci        "psraw        %[ftmp2],      %[ftmp2],      %[shift]    \n\t"     \
896cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[shift]    \n\t"     \
897cabdff1aSopenharmony_ci        "psraw        %[ftmp4],      %[ftmp4],      %[shift]    \n\t"     \
898cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"     \
899cabdff1aSopenharmony_ci        "packsswh     %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"     \
900cabdff1aSopenharmony_ci        "packsswh     %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"     \
901cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp3],      %[ftmp2],      %[ftmp0]    \n\t"     \
902cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp5],      %[ftmp4],      %[ftmp0]    \n\t"     \
903cabdff1aSopenharmony_ci        "pand         %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"     \
904cabdff1aSopenharmony_ci        "pand         %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"     \
905cabdff1aSopenharmony_ci        "packushb     %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"     \
906cabdff1aSopenharmony_ci        MMI_USDC1(%[ftmp2], %[dst], 0x0)                                  \
907cabdff1aSopenharmony_ci                                                                          \
908cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"     \
909cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x08        \n\t"     \
910cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"     \
911cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x10        \n\t"     \
912cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"     \
913cabdff1aSopenharmony_ci                                                                          \
914cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"     \
915cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"     \
916cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"     \
917cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"     \
918cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"     \
919cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],       %[srcstride] \n\t"     \
920cabdff1aSopenharmony_ci        PTR_ADDU     "%[dst],        %[dst],       %[dststride] \n\t"     \
921cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"     \
922cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"     \
923cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                              \
924cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                   \
925cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                   \
926cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                   \
927cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                   \
928cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                   \
929cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]),               \
930cabdff1aSopenharmony_ci          [src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src),            \
931cabdff1aSopenharmony_ci          [x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0])                 \
932cabdff1aSopenharmony_ci        : [dststride]"r"(dststride), [shift]"f"(shift.f),                 \
933cabdff1aSopenharmony_ci          [srcstride]"r"(srcstride)                                       \
934cabdff1aSopenharmony_ci        : "memory"                                                        \
935cabdff1aSopenharmony_ci    );                                                                    \
936cabdff1aSopenharmony_ci}                                                                         \
937cabdff1aSopenharmony_ci
938cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(8, 1, -8, -8, -16);
939cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(16, 2, -16, -16, -32);
940cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(24, 3, -24, -24, -48);
941cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(32, 4, -32, -32, -64);
942cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(48, 6, -48, -48, -96);
943cabdff1aSopenharmony_ciPUT_HEVC_PEL_BI_PIXELS(64, 8, -64, -64, -128);
944cabdff1aSopenharmony_ci
945cabdff1aSopenharmony_ci#define PUT_HEVC_QPEL_UNI_HV(w, x_step, src_step, dst_step, tmp_step)   \
946cabdff1aSopenharmony_civoid ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
947cabdff1aSopenharmony_ci                                             ptrdiff_t _dststride,      \
948cabdff1aSopenharmony_ci                                             uint8_t *_src,             \
949cabdff1aSopenharmony_ci                                             ptrdiff_t _srcstride,      \
950cabdff1aSopenharmony_ci                                             int height,                \
951cabdff1aSopenharmony_ci                                             intptr_t mx, intptr_t my,  \
952cabdff1aSopenharmony_ci                                             int width)                 \
953cabdff1aSopenharmony_ci{                                                                       \
954cabdff1aSopenharmony_ci    int x, y;                                                           \
955cabdff1aSopenharmony_ci    const int8_t *filter;                                               \
956cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;                                          \
957cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
958cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;                                \
959cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
960cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];        \
961cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;                                           \
962cabdff1aSopenharmony_ci    double ftmp[20];                                                    \
963cabdff1aSopenharmony_ci    uint64_t rtmp[1];                                                   \
964cabdff1aSopenharmony_ci    union av_intfloat64 shift;                                          \
965cabdff1aSopenharmony_ci    union av_intfloat64 offset;                                         \
966cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                  \
967cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                  \
968cabdff1aSopenharmony_ci    shift.i = 6;                                                        \
969cabdff1aSopenharmony_ci    offset.i = 32;                                                      \
970cabdff1aSopenharmony_ci                                                                        \
971cabdff1aSopenharmony_ci    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
972cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];                              \
973cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
974cabdff1aSopenharmony_ci    y = height + QPEL_EXTRA;                                            \
975cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
976cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
977cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
978cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
979cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
980cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
981cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
982cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
983cabdff1aSopenharmony_ci        "pxor         %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
984cabdff1aSopenharmony_ci                                                                        \
985cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
986cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
987cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
988cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
989cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
990cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
991cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
992cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
993cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
994cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
995cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
996cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
997cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
998cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
999cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
1000cabdff1aSopenharmony_ci        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
1001cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
1002cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
1003cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
1004cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
1005cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
1006cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
1007cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
1008cabdff1aSopenharmony_ci        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
1009cabdff1aSopenharmony_ci        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
1010cabdff1aSopenharmony_ci        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
1011cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
1012cabdff1aSopenharmony_ci                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
1013cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
1014cabdff1aSopenharmony_ci        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
1015cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
1016cabdff1aSopenharmony_ci        MMI_USDC1(%[ftmp3], %[tmp], 0x0)                                \
1017cabdff1aSopenharmony_ci                                                                        \
1018cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
1019cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
1020cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
1021cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
1022cabdff1aSopenharmony_ci                                                                        \
1023cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
1024cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
1025cabdff1aSopenharmony_ci        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
1026cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],      " #tmp_step " \n\t"   \
1027cabdff1aSopenharmony_ci        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
1028cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1029cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
1030cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64                                            \
1031cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
1032cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
1033cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
1034cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
1035cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
1036cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),               \
1037cabdff1aSopenharmony_ci          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
1038cabdff1aSopenharmony_ci          [x]"+&r"(x)                                                   \
1039cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
1040cabdff1aSopenharmony_ci        : "memory"                                                      \
1041cabdff1aSopenharmony_ci    );                                                                  \
1042cabdff1aSopenharmony_ci                                                                        \
1043cabdff1aSopenharmony_ci    tmp    = tmp_array;                                                 \
1044cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];                              \
1045cabdff1aSopenharmony_ci    x = width >> 2;                                                     \
1046cabdff1aSopenharmony_ci    y = height;                                                         \
1047cabdff1aSopenharmony_ci    __asm__ volatile(                                                   \
1048cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
1049cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x08                       \n\t"   \
1050cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
1051cabdff1aSopenharmony_ci        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
1052cabdff1aSopenharmony_ci        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
1053cabdff1aSopenharmony_ci        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
1054cabdff1aSopenharmony_ci        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
1055cabdff1aSopenharmony_ci        "li           %[rtmp0],      0x06                       \n\t"   \
1056cabdff1aSopenharmony_ci        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
1057cabdff1aSopenharmony_ci        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"   \
1058cabdff1aSopenharmony_ci        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
1059cabdff1aSopenharmony_ci                                                                        \
1060cabdff1aSopenharmony_ci        "1:                                                     \n\t"   \
1061cabdff1aSopenharmony_ci        "li           %[x],        " #x_step "                  \n\t"   \
1062cabdff1aSopenharmony_ci        "2:                                                     \n\t"   \
1063cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
1064cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1065cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
1066cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1067cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
1068cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1069cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
1070cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1071cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                               \
1072cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1073cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                               \
1074cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1075cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                               \
1076cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1077cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                              \
1078cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
1079cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
1080cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
1081cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],           \
1082cabdff1aSopenharmony_ci                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
1083cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"   \
1084cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"   \
1085cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"   \
1086cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"   \
1087cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
1088cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
1089cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])          \
1090cabdff1aSopenharmony_ci        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
1091cabdff1aSopenharmony_ci        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
1092cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"   \
1093cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"   \
1094cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"   \
1095cabdff1aSopenharmony_ci        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"   \
1096cabdff1aSopenharmony_ci        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
1097cabdff1aSopenharmony_ci        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
1098cabdff1aSopenharmony_ci        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])          \
1099cabdff1aSopenharmony_ci        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
1100cabdff1aSopenharmony_ci        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
1101cabdff1aSopenharmony_ci        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
1102cabdff1aSopenharmony_ci        "paddh        %[ftmp3],      %[ftmp3],      %[offset]   \n\t"   \
1103cabdff1aSopenharmony_ci        "psrah        %[ftmp3],      %[ftmp3],      %[shift]    \n\t"   \
1104cabdff1aSopenharmony_ci        "pxor         %[ftmp7],      %[ftmp7],      %[ftmp7]    \n\t"   \
1105cabdff1aSopenharmony_ci        "pcmpgth      %[ftmp7],      %[ftmp3],      %[ftmp7]    \n\t"   \
1106cabdff1aSopenharmony_ci        "pand         %[ftmp3],      %[ftmp3],      %[ftmp7]    \n\t"   \
1107cabdff1aSopenharmony_ci        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
1108cabdff1aSopenharmony_ci        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
1109cabdff1aSopenharmony_ci                                                                        \
1110cabdff1aSopenharmony_ci        "daddi        %[x],          %[x],         -0x01        \n\t"   \
1111cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
1112cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
1113cabdff1aSopenharmony_ci        "bnez         %[x],          2b                         \n\t"   \
1114cabdff1aSopenharmony_ci                                                                        \
1115cabdff1aSopenharmony_ci        "daddi        %[y],          %[y],         -0x01        \n\t"   \
1116cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],     " #tmp_step "  \n\t"   \
1117cabdff1aSopenharmony_ci        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
1118cabdff1aSopenharmony_ci        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
1119cabdff1aSopenharmony_ci        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
1120cabdff1aSopenharmony_ci        "bnez         %[y],          1b                         \n\t"   \
1121cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
1122cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
1123cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
1124cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
1125cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
1126cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
1127cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
1128cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),             \
1129cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),                                      \
1130cabdff1aSopenharmony_ci          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
1131cabdff1aSopenharmony_ci          [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0])                \
1132cabdff1aSopenharmony_ci        : [filter]"r"(filter), [stride]"r"(dststride),                  \
1133cabdff1aSopenharmony_ci          [shift]"f"(shift.f)                                           \
1134cabdff1aSopenharmony_ci        : "memory"                                                      \
1135cabdff1aSopenharmony_ci    );                                                                  \
1136cabdff1aSopenharmony_ci}
1137cabdff1aSopenharmony_ci
1138cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(4, 1, -4, -4, -8);
1139cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(8, 2, -8, -8, -16);
1140cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(12, 3, -12, -12, -24);
1141cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(16, 4, -16, -16, -32);
1142cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(24, 6, -24, -24, -48);
1143cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(32, 8, -32, -32, -64);
1144cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(48, 12, -48, -48, -96);
1145cabdff1aSopenharmony_ciPUT_HEVC_QPEL_UNI_HV(64, 16, -64, -64, -128);
1146