1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h"
22cabdff1aSopenharmony_ci#include "qpeldsp_mips.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, coef0, coef1, coef2)  \
25cabdff1aSopenharmony_ci( {                                                                     \
26cabdff1aSopenharmony_ci    v16u8 out, tmp0, tmp1;                                              \
27cabdff1aSopenharmony_ci    v16u8 data0, data1, data2, data3, data4, data5;                     \
28cabdff1aSopenharmony_ci    v8i16 res_r, res_l;                                                 \
29cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
30cabdff1aSopenharmony_ci    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
31cabdff1aSopenharmony_ci                                                                        \
32cabdff1aSopenharmony_ci    VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1);         \
33cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l);                            \
34cabdff1aSopenharmony_ci    data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15);       \
35cabdff1aSopenharmony_ci    data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1);        \
36cabdff1aSopenharmony_ci    HADD_UB2_UH(sum0_r, sum0_l, sum0_r, sum0_l);                        \
37cabdff1aSopenharmony_ci    ILVRL_B2_UH(data3, data0, sum1_r, sum1_l);                          \
38cabdff1aSopenharmony_ci    data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14);       \
39cabdff1aSopenharmony_ci    data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2);        \
40cabdff1aSopenharmony_ci    sum0_r *= (v8u16) (coef0);                                          \
41cabdff1aSopenharmony_ci    sum0_l *= (v8u16) (coef0);                                          \
42cabdff1aSopenharmony_ci    ILVRL_B2_UH(data4, data1, sum2_r, sum2_l);                          \
43cabdff1aSopenharmony_ci    data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13);       \
44cabdff1aSopenharmony_ci    data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3);        \
45cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
46cabdff1aSopenharmony_ci    ILVRL_B2_UH(data5, data2, sum3_r, sum3_l);                          \
47cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
48cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
49cabdff1aSopenharmony_ci    res_r = (v8i16) (sum0_r - sum3_r);                                  \
50cabdff1aSopenharmony_ci    res_l = (v8i16) (sum0_l - sum3_l);                                  \
51cabdff1aSopenharmony_ci    SRARI_H2_SH(res_r, res_l, 5);                                       \
52cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res_r, res_l);                                       \
53cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
54cabdff1aSopenharmony_ci                                                                        \
55cabdff1aSopenharmony_ci    out;                                                                \
56cabdff1aSopenharmony_ci} )
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,                       \
59cabdff1aSopenharmony_ci                                      mask0, mask1, mask2, mask3,       \
60cabdff1aSopenharmony_ci                                      coef0, coef1, coef2)              \
61cabdff1aSopenharmony_ci( {                                                                     \
62cabdff1aSopenharmony_ci    v16u8 out;                                                          \
63cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
64cabdff1aSopenharmony_ci    v8u16 sum4_r, sum5_r, sum6_r, sum7_r;                               \
65cabdff1aSopenharmony_ci    v8i16 res0_r, res1_r;                                               \
66cabdff1aSopenharmony_ci                                                                        \
67cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r);   \
68cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r);   \
69cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum7_r, sum3_r, sum7_r);                        \
70cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum0_r, sum4_r, coef0, coef0, sum0_r, sum4_r);          \
71cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r);   \
72cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r);   \
73cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum6_r, coef2, coef2, sum0_r, sum4_r);         \
74cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum5_r, coef1, coef1, sum3_r, sum7_r);         \
75cabdff1aSopenharmony_ci    res0_r = (v8i16) (sum0_r - sum3_r);                                 \
76cabdff1aSopenharmony_ci    res1_r = (v8i16) (sum4_r - sum7_r);                                 \
77cabdff1aSopenharmony_ci    SRARI_H2_SH(res0_r, res1_r, 5);                                     \
78cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res0_r, res1_r);                                     \
79cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);        \
80cabdff1aSopenharmony_ci                                                                        \
81cabdff1aSopenharmony_ci    out;                                                                \
82cabdff1aSopenharmony_ci} )
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,                        \
85cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,  \
86cabdff1aSopenharmony_ci                                           coef0, coef1, coef2)         \
87cabdff1aSopenharmony_ci( {                                                                     \
88cabdff1aSopenharmony_ci    v16u8 out;                                                          \
89cabdff1aSopenharmony_ci    v8i16 res0_r;                                                       \
90cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
91cabdff1aSopenharmony_ci                                                                        \
92cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r);   \
93cabdff1aSopenharmony_ci    sum3_r = __msa_hadd_u_h((v16u8) sum3_r, (v16u8) sum3_r);            \
94cabdff1aSopenharmony_ci    sum0_r = __msa_dotp_u_h((v16u8) sum0_r, (v16u8) coef0);             \
95cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r);   \
96cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum1_r, coef2, coef1, sum0_r, sum3_r);         \
97cabdff1aSopenharmony_ci    res0_r = (v8i16) (sum0_r - sum3_r);                                 \
98cabdff1aSopenharmony_ci    res0_r = __msa_srari_h(res0_r, 5);                                  \
99cabdff1aSopenharmony_ci    CLIP_SH_0_255(res0_r);                                              \
100cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res0_r, (v16i8) res0_r);        \
101cabdff1aSopenharmony_ci                                                                        \
102cabdff1aSopenharmony_ci    out;                                                                \
103cabdff1aSopenharmony_ci} )
104cabdff1aSopenharmony_ci
105cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,   \
106cabdff1aSopenharmony_ci                                                    mask2, mask3, coef0,  \
107cabdff1aSopenharmony_ci                                                    coef1, coef2)         \
108cabdff1aSopenharmony_ci( {                                                                       \
109cabdff1aSopenharmony_ci    v16u8 out;                                                            \
110cabdff1aSopenharmony_ci    v8i16 res0_r;                                                         \
111cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                                 \
112cabdff1aSopenharmony_ci                                                                          \
113cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r);     \
114cabdff1aSopenharmony_ci    sum3_r = __msa_hadd_u_h((v16u8) sum3_r, (v16u8) sum3_r);              \
115cabdff1aSopenharmony_ci    sum0_r = __msa_dotp_u_h((v16u8) sum0_r, (v16u8) coef0);               \
116cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r);     \
117cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum1_r, coef2, coef1, sum0_r, sum3_r);           \
118cabdff1aSopenharmony_ci    res0_r = (v8i16) (sum0_r - sum3_r);                                   \
119cabdff1aSopenharmony_ci    res0_r += 15;                                                         \
120cabdff1aSopenharmony_ci    res0_r >>= 5;                                                         \
121cabdff1aSopenharmony_ci    CLIP_SH_0_255(res0_r);                                                \
122cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res0_r, (v16i8) res0_r);          \
123cabdff1aSopenharmony_ci                                                                          \
124cabdff1aSopenharmony_ci    out;                                                                  \
125cabdff1aSopenharmony_ci} )
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,              \
128cabdff1aSopenharmony_ci                                         coef0, coef1, coef2)           \
129cabdff1aSopenharmony_ci( {                                                                     \
130cabdff1aSopenharmony_ci    v16u8 out, tmp0, tmp1;                                              \
131cabdff1aSopenharmony_ci    v16u8 data0, data1, data2, data3, data4, data5;                     \
132cabdff1aSopenharmony_ci    v8i16 res_r, res_l;                                                 \
133cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
134cabdff1aSopenharmony_ci    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
135cabdff1aSopenharmony_ci                                                                        \
136cabdff1aSopenharmony_ci    VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1);         \
137cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l);                            \
138cabdff1aSopenharmony_ci    data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15);       \
139cabdff1aSopenharmony_ci    data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1);        \
140cabdff1aSopenharmony_ci    HADD_UB2_UH(sum0_r, sum0_l, sum0_r, sum0_l);                        \
141cabdff1aSopenharmony_ci    ILVRL_B2_UH(data3, data0, sum1_r, sum1_l);                          \
142cabdff1aSopenharmony_ci    data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14);       \
143cabdff1aSopenharmony_ci    data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2);        \
144cabdff1aSopenharmony_ci    sum0_r *= (v8u16) (coef0);                                          \
145cabdff1aSopenharmony_ci    sum0_l *= (v8u16) (coef0);                                          \
146cabdff1aSopenharmony_ci    ILVRL_B2_UH(data4, data1, sum2_r, sum2_l);                          \
147cabdff1aSopenharmony_ci    data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13);       \
148cabdff1aSopenharmony_ci    data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3);        \
149cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
150cabdff1aSopenharmony_ci    ILVRL_B2_UH(data5, data2, sum3_r, sum3_l);                          \
151cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
152cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
153cabdff1aSopenharmony_ci    res_r = (v8i16) (sum0_r - sum3_r);                                  \
154cabdff1aSopenharmony_ci    res_l = (v8i16) (sum0_l - sum3_l);                                  \
155cabdff1aSopenharmony_ci    res_r += 15;                                                        \
156cabdff1aSopenharmony_ci    res_l += 15;                                                        \
157cabdff1aSopenharmony_ci    res_r >>= 5;                                                        \
158cabdff1aSopenharmony_ci    res_l >>= 5;                                                        \
159cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res_r, res_l);                                       \
160cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
161cabdff1aSopenharmony_ci                                                                        \
162cabdff1aSopenharmony_ci    out;                                                                \
163cabdff1aSopenharmony_ci} )
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1,                  \
166cabdff1aSopenharmony_ci                                               mask0, mask1, mask2, mask3,  \
167cabdff1aSopenharmony_ci                                               coef0, coef1, coef2)         \
168cabdff1aSopenharmony_ci( {                                                                         \
169cabdff1aSopenharmony_ci    v16u8 out;                                                              \
170cabdff1aSopenharmony_ci    v8i16 res0_r, res1_r;                                                   \
171cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                                   \
172cabdff1aSopenharmony_ci    v8u16 sum4_r, sum5_r, sum6_r, sum7_r;                                   \
173cabdff1aSopenharmony_ci                                                                            \
174cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r);       \
175cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r);       \
176cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum7_r, sum3_r, sum7_r);                            \
177cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum0_r, sum4_r, coef0, coef0, sum0_r, sum4_r);              \
178cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r);       \
179cabdff1aSopenharmony_ci    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r);       \
180cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum6_r, coef2, coef2, sum0_r, sum4_r);             \
181cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum5_r, coef1, coef1, sum3_r, sum7_r);             \
182cabdff1aSopenharmony_ci    res0_r = (v8i16) (sum0_r - sum3_r);                                     \
183cabdff1aSopenharmony_ci    res1_r = (v8i16) (sum4_r - sum7_r);                                     \
184cabdff1aSopenharmony_ci    res0_r += 15;                                                           \
185cabdff1aSopenharmony_ci    res1_r += 15;                                                           \
186cabdff1aSopenharmony_ci    res0_r >>= 5;                                                           \
187cabdff1aSopenharmony_ci    res1_r >>= 5;                                                           \
188cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res0_r, res1_r);                                         \
189cabdff1aSopenharmony_ci    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);            \
190cabdff1aSopenharmony_ci                                                                            \
191cabdff1aSopenharmony_ci    out;                                                                    \
192cabdff1aSopenharmony_ci} )
193cabdff1aSopenharmony_ci
194cabdff1aSopenharmony_ci#define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3,                  \
195cabdff1aSopenharmony_ci                               inp4, inp5, inp6, inp7,                  \
196cabdff1aSopenharmony_ci                               coef0, coef1, coef2)                     \
197cabdff1aSopenharmony_ci( {                                                                     \
198cabdff1aSopenharmony_ci    v16u8 res;                                                          \
199cabdff1aSopenharmony_ci    v8i16 res_r, res_l;                                                 \
200cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
201cabdff1aSopenharmony_ci    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
202cabdff1aSopenharmony_ci                                                                        \
203cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l);                            \
204cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l);                            \
205cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum0_r, sum0_l, coef0, coef0, sum0_r, sum0_l);          \
206cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
207cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l);                            \
208cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l);                            \
209cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
210cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
211cabdff1aSopenharmony_ci    res_r = (v8i16) (sum0_r - sum3_r);                                  \
212cabdff1aSopenharmony_ci    res_l = (v8i16) (sum0_l - sum3_l);                                  \
213cabdff1aSopenharmony_ci    SRARI_H2_SH(res_r, res_l, 5);                                       \
214cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res_r, res_l);                                       \
215cabdff1aSopenharmony_ci    res = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
216cabdff1aSopenharmony_ci                                                                        \
217cabdff1aSopenharmony_ci    res;                                                                \
218cabdff1aSopenharmony_ci} )
219cabdff1aSopenharmony_ci
220cabdff1aSopenharmony_ci#define APPLY_VERT_QPEL_FILTER_8BYTE(inp00, inp01, inp02, inp03,        \
221cabdff1aSopenharmony_ci                                     inp04, inp05, inp06, inp07,        \
222cabdff1aSopenharmony_ci                                     inp10, inp11, inp12, inp13,        \
223cabdff1aSopenharmony_ci                                     inp14, inp15, inp16, inp17,        \
224cabdff1aSopenharmony_ci                                     coef0, coef1, coef2)               \
225cabdff1aSopenharmony_ci( {                                                                     \
226cabdff1aSopenharmony_ci    v16u8 res;                                                          \
227cabdff1aSopenharmony_ci    v8i16 val0, val1;                                                   \
228cabdff1aSopenharmony_ci    v8u16 sum00, sum01, sum02, sum03;                                   \
229cabdff1aSopenharmony_ci    v8u16 sum10, sum11, sum12, sum13;                                   \
230cabdff1aSopenharmony_ci                                                                        \
231cabdff1aSopenharmony_ci    ILVR_B4_UH(inp04, inp00, inp14, inp10, inp07, inp03, inp17, inp13,  \
232cabdff1aSopenharmony_ci               sum00, sum10, sum03, sum13);                             \
233cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum00, sum10, coef0, coef0, sum00, sum10);              \
234cabdff1aSopenharmony_ci    HADD_UB2_UH(sum03, sum13, sum03, sum13);                            \
235cabdff1aSopenharmony_ci    ILVR_B4_UH(inp06, inp02, inp16, inp12, inp05, inp01, inp15, inp11,  \
236cabdff1aSopenharmony_ci               sum02, sum12, sum01, sum11);                             \
237cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum02, sum12, coef2, coef2, sum00, sum10);             \
238cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum01, sum11, coef1, coef1, sum03, sum13);             \
239cabdff1aSopenharmony_ci    val0 = (v8i16) (sum00 - sum03);                                     \
240cabdff1aSopenharmony_ci    val1 = (v8i16) (sum10 - sum13);                                     \
241cabdff1aSopenharmony_ci    SRARI_H2_SH(val0, val1, 5);                                         \
242cabdff1aSopenharmony_ci    CLIP_SH2_0_255(val0, val1);                                         \
243cabdff1aSopenharmony_ci    res = (v16u8) __msa_pckev_b((v16i8) val1, (v16i8) val0);            \
244cabdff1aSopenharmony_ci                                                                        \
245cabdff1aSopenharmony_ci    res;                                                                \
246cabdff1aSopenharmony_ci} )
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_ci#define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3,         \
249cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,         \
250cabdff1aSopenharmony_ci                                        coef0, coef1, coef2)            \
251cabdff1aSopenharmony_ci( {                                                                     \
252cabdff1aSopenharmony_ci    v16u8 res;                                                          \
253cabdff1aSopenharmony_ci    v8i16 res_r, res_l;                                                 \
254cabdff1aSopenharmony_ci    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
255cabdff1aSopenharmony_ci    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
256cabdff1aSopenharmony_ci                                                                        \
257cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l);                            \
258cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l);                            \
259cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum0_r, sum0_l, coef0, coef0, sum0_r, sum0_l);          \
260cabdff1aSopenharmony_ci    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
261cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l);                            \
262cabdff1aSopenharmony_ci    ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l);                            \
263cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
264cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
265cabdff1aSopenharmony_ci    res_r = (v8i16) (sum0_r - sum3_r);                                  \
266cabdff1aSopenharmony_ci    res_l = (v8i16) (sum0_l - sum3_l);                                  \
267cabdff1aSopenharmony_ci    res_r += 15;                                                        \
268cabdff1aSopenharmony_ci    res_l += 15;                                                        \
269cabdff1aSopenharmony_ci    res_r >>= 5;                                                        \
270cabdff1aSopenharmony_ci    res_l >>= 5;                                                        \
271cabdff1aSopenharmony_ci    CLIP_SH2_0_255(res_r, res_l);                                       \
272cabdff1aSopenharmony_ci    res = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
273cabdff1aSopenharmony_ci                                                                        \
274cabdff1aSopenharmony_ci    res;                                                                \
275cabdff1aSopenharmony_ci} )
276cabdff1aSopenharmony_ci
277cabdff1aSopenharmony_ci#define APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp00, inp01, inp02, inp03,  \
278cabdff1aSopenharmony_ci                                              inp04, inp05, inp06, inp07,  \
279cabdff1aSopenharmony_ci                                              inp10, inp11, inp12, inp13,  \
280cabdff1aSopenharmony_ci                                              inp14, inp15, inp16, inp17,  \
281cabdff1aSopenharmony_ci                                              coef0, coef1, coef2)         \
282cabdff1aSopenharmony_ci( {                                                                        \
283cabdff1aSopenharmony_ci    v16u8 res;                                                             \
284cabdff1aSopenharmony_ci    v8i16 val0, val1;                                                      \
285cabdff1aSopenharmony_ci    v8u16 sum00, sum01, sum02, sum03;                                      \
286cabdff1aSopenharmony_ci    v8u16 sum10, sum11, sum12, sum13;                                      \
287cabdff1aSopenharmony_ci                                                                           \
288cabdff1aSopenharmony_ci    ILVR_B4_UH(inp04, inp00, inp14, inp10, inp07, inp03, inp17, inp13,     \
289cabdff1aSopenharmony_ci               sum00, sum10, sum03, sum13);                                \
290cabdff1aSopenharmony_ci    DOTP_UB2_UH(sum00, sum10, coef0, coef0, sum00, sum10);                 \
291cabdff1aSopenharmony_ci    HADD_UB2_UH(sum03, sum13, sum03, sum13);                               \
292cabdff1aSopenharmony_ci    ILVR_B4_UH(inp06, inp02, inp16, inp12, inp05, inp01, inp15, inp11,     \
293cabdff1aSopenharmony_ci               sum02, sum12, sum01, sum11);                                \
294cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum02, sum12, coef2, coef2, sum00, sum10);                \
295cabdff1aSopenharmony_ci    DPADD_UB2_UH(sum01, sum11, coef1, coef1, sum03, sum13);                \
296cabdff1aSopenharmony_ci    val0 = (v8i16) (sum00 - sum03);                                        \
297cabdff1aSopenharmony_ci    val1 = (v8i16) (sum10 - sum13);                                        \
298cabdff1aSopenharmony_ci    val0 += 15;                                                            \
299cabdff1aSopenharmony_ci    val1 += 15;                                                            \
300cabdff1aSopenharmony_ci    val0 >>= 5;                                                            \
301cabdff1aSopenharmony_ci    val1 >>= 5;                                                            \
302cabdff1aSopenharmony_ci    CLIP_SH2_0_255(val0, val1);                                            \
303cabdff1aSopenharmony_ci    res = (v16u8) __msa_pckev_b((v16i8) val1, (v16i8) val0);               \
304cabdff1aSopenharmony_ci                                                                           \
305cabdff1aSopenharmony_ci    res;                                                                   \
306cabdff1aSopenharmony_ci} )
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_cistatic void horiz_mc_qpel_aver_src0_8width_msa(const uint8_t *src,
309cabdff1aSopenharmony_ci                                               int32_t src_stride,
310cabdff1aSopenharmony_ci                                               uint8_t *dst,
311cabdff1aSopenharmony_ci                                               int32_t dst_stride,
312cabdff1aSopenharmony_ci                                               int32_t height)
313cabdff1aSopenharmony_ci{
314cabdff1aSopenharmony_ci    uint8_t loop_count;
315cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
316cabdff1aSopenharmony_ci    v16u8 res0, res1;
317cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
318cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
319cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
320cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
321cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
322cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
323cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
324cabdff1aSopenharmony_ci
325cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
326cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
327cabdff1aSopenharmony_ci        src += (4 * src_stride);
328cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
329cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
330cabdff1aSopenharmony_ci                                             const20, const6, const3);
331cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
332cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
333cabdff1aSopenharmony_ci                                             const20, const6, const3);
334cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
335cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
336cabdff1aSopenharmony_ci        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
337cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
338cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
339cabdff1aSopenharmony_ci    }
340cabdff1aSopenharmony_ci}
341cabdff1aSopenharmony_ci
342cabdff1aSopenharmony_cistatic void horiz_mc_qpel_aver_src0_16width_msa(const uint8_t *src,
343cabdff1aSopenharmony_ci                                                int32_t src_stride,
344cabdff1aSopenharmony_ci                                                uint8_t *dst,
345cabdff1aSopenharmony_ci                                                int32_t dst_stride,
346cabdff1aSopenharmony_ci                                                int32_t height)
347cabdff1aSopenharmony_ci{
348cabdff1aSopenharmony_ci    uint8_t loop_count;
349cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
350cabdff1aSopenharmony_ci    v16u8 res;
351cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
352cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
353cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
354cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
355cabdff1aSopenharmony_ci
356cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
357cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
358cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
359cabdff1aSopenharmony_ci        src += (4 * src_stride);
360cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
361cabdff1aSopenharmony_ci                                      const20, const6, const3);
362cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp0, res);
363cabdff1aSopenharmony_ci        ST_UB(res, dst);
364cabdff1aSopenharmony_ci        dst += dst_stride;
365cabdff1aSopenharmony_ci
366cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
367cabdff1aSopenharmony_ci                                      const20, const6, const3);
368cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp2, res);
369cabdff1aSopenharmony_ci        ST_UB(res, dst);
370cabdff1aSopenharmony_ci        dst += dst_stride;
371cabdff1aSopenharmony_ci
372cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
373cabdff1aSopenharmony_ci                                      const20, const6, const3);
374cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp4, res);
375cabdff1aSopenharmony_ci        ST_UB(res, dst);
376cabdff1aSopenharmony_ci        dst += dst_stride;
377cabdff1aSopenharmony_ci
378cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
379cabdff1aSopenharmony_ci                                      const20, const6, const3);
380cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp6, res);
381cabdff1aSopenharmony_ci        ST_UB(res, dst);
382cabdff1aSopenharmony_ci        dst += dst_stride;
383cabdff1aSopenharmony_ci    }
384cabdff1aSopenharmony_ci}
385cabdff1aSopenharmony_ci
386cabdff1aSopenharmony_cistatic void horiz_mc_qpel_8width_msa(const uint8_t *src,
387cabdff1aSopenharmony_ci                                     int32_t src_stride,
388cabdff1aSopenharmony_ci                                     uint8_t *dst,
389cabdff1aSopenharmony_ci                                     int32_t dst_stride,
390cabdff1aSopenharmony_ci                                     int32_t height)
391cabdff1aSopenharmony_ci{
392cabdff1aSopenharmony_ci    uint8_t loop_count;
393cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
394cabdff1aSopenharmony_ci    v16u8 res0, res1;
395cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
396cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
397cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
398cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
399cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
400cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
401cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
404cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
405cabdff1aSopenharmony_ci        src += (4 * src_stride);
406cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
407cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
408cabdff1aSopenharmony_ci                                             const20, const6, const3);
409cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
410cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
411cabdff1aSopenharmony_ci                                             const20, const6, const3);
412cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
413cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
414cabdff1aSopenharmony_ci    }
415cabdff1aSopenharmony_ci}
416cabdff1aSopenharmony_ci
417cabdff1aSopenharmony_cistatic void horiz_mc_qpel_16width_msa(const uint8_t *src,
418cabdff1aSopenharmony_ci                                      int32_t src_stride,
419cabdff1aSopenharmony_ci                                      uint8_t *dst,
420cabdff1aSopenharmony_ci                                      int32_t dst_stride,
421cabdff1aSopenharmony_ci                                      int32_t height)
422cabdff1aSopenharmony_ci{
423cabdff1aSopenharmony_ci    uint8_t loop_count;
424cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
425cabdff1aSopenharmony_ci    v16u8 res;
426cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
427cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
428cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
429cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
430cabdff1aSopenharmony_ci
431cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
432cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
433cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
434cabdff1aSopenharmony_ci        src += (4 * src_stride);
435cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
436cabdff1aSopenharmony_ci                                      const20, const6, const3);
437cabdff1aSopenharmony_ci        ST_UB(res, dst);
438cabdff1aSopenharmony_ci        dst += dst_stride;
439cabdff1aSopenharmony_ci
440cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
441cabdff1aSopenharmony_ci                                      const20, const6, const3);
442cabdff1aSopenharmony_ci        ST_UB(res, dst);
443cabdff1aSopenharmony_ci        dst += dst_stride;
444cabdff1aSopenharmony_ci
445cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
446cabdff1aSopenharmony_ci                                      const20, const6, const3);
447cabdff1aSopenharmony_ci        ST_UB(res, dst);
448cabdff1aSopenharmony_ci        dst += dst_stride;
449cabdff1aSopenharmony_ci
450cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
451cabdff1aSopenharmony_ci                                      const20, const6, const3);
452cabdff1aSopenharmony_ci        ST_UB(res, dst);
453cabdff1aSopenharmony_ci        dst += dst_stride;
454cabdff1aSopenharmony_ci    }
455cabdff1aSopenharmony_ci}
456cabdff1aSopenharmony_ci
457cabdff1aSopenharmony_cistatic void horiz_mc_qpel_aver_src1_8width_msa(const uint8_t *src,
458cabdff1aSopenharmony_ci                                               int32_t src_stride,
459cabdff1aSopenharmony_ci                                               uint8_t *dst,
460cabdff1aSopenharmony_ci                                               int32_t dst_stride,
461cabdff1aSopenharmony_ci                                               int32_t height)
462cabdff1aSopenharmony_ci{
463cabdff1aSopenharmony_ci    uint8_t loop_count;
464cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
465cabdff1aSopenharmony_ci    v16u8 res0, res1;
466cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
467cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
468cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
469cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
470cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
471cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
472cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
473cabdff1aSopenharmony_ci
474cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
475cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
476cabdff1aSopenharmony_ci        src += (4 * src_stride);
477cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
478cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
479cabdff1aSopenharmony_ci                                             const20, const6, const3);
480cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
481cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
482cabdff1aSopenharmony_ci                                             const20, const6, const3);
483cabdff1aSopenharmony_ci        SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
484cabdff1aSopenharmony_ci                   inp0, inp1, inp2, inp3);
485cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
486cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
487cabdff1aSopenharmony_ci        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
488cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
489cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
490cabdff1aSopenharmony_ci    }
491cabdff1aSopenharmony_ci}
492cabdff1aSopenharmony_ci
493cabdff1aSopenharmony_cistatic void horiz_mc_qpel_aver_src1_16width_msa(const uint8_t *src,
494cabdff1aSopenharmony_ci                                                int32_t src_stride,
495cabdff1aSopenharmony_ci                                                uint8_t *dst,
496cabdff1aSopenharmony_ci                                                int32_t dst_stride,
497cabdff1aSopenharmony_ci                                                int32_t height)
498cabdff1aSopenharmony_ci{
499cabdff1aSopenharmony_ci    uint8_t loop_count;
500cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
501cabdff1aSopenharmony_ci    v16u8 res;
502cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
503cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
504cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
505cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
506cabdff1aSopenharmony_ci
507cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
508cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
509cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
510cabdff1aSopenharmony_ci        src += (4 * src_stride);
511cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
512cabdff1aSopenharmony_ci                                      const20, const6, const3);
513cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp1);
514cabdff1aSopenharmony_ci        ST_UB(res, dst);
515cabdff1aSopenharmony_ci        dst += dst_stride;
516cabdff1aSopenharmony_ci
517cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
518cabdff1aSopenharmony_ci                                      const20, const6, const3);
519cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp3);
520cabdff1aSopenharmony_ci        ST_UB(res, dst);
521cabdff1aSopenharmony_ci        dst += dst_stride;
522cabdff1aSopenharmony_ci
523cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
524cabdff1aSopenharmony_ci                                      const20, const6, const3);
525cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp5);
526cabdff1aSopenharmony_ci        ST_UB(res, dst);
527cabdff1aSopenharmony_ci        dst += dst_stride;
528cabdff1aSopenharmony_ci
529cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
530cabdff1aSopenharmony_ci                                      const20, const6, const3);
531cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp7);
532cabdff1aSopenharmony_ci        ST_UB(res, dst);
533cabdff1aSopenharmony_ci        dst += dst_stride;
534cabdff1aSopenharmony_ci    }
535cabdff1aSopenharmony_ci}
536cabdff1aSopenharmony_ci
537cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_aver_src0_8width_msa(const uint8_t *src,
538cabdff1aSopenharmony_ci                                                      int32_t src_stride,
539cabdff1aSopenharmony_ci                                                      uint8_t *dst,
540cabdff1aSopenharmony_ci                                                      int32_t dst_stride,
541cabdff1aSopenharmony_ci                                                      int32_t height)
542cabdff1aSopenharmony_ci{
543cabdff1aSopenharmony_ci    uint8_t loop_count;
544cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
545cabdff1aSopenharmony_ci    v16u8 res0, res1;
546cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
547cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
548cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
549cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
550cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
551cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
552cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
553cabdff1aSopenharmony_ci
554cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
555cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
556cabdff1aSopenharmony_ci        src += (4 * src_stride);
557cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
558cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
559cabdff1aSopenharmony_ci                                                      const6, const3);
560cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
561cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
562cabdff1aSopenharmony_ci                                                      const6, const3);
563cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
564cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
565cabdff1aSopenharmony_ci        res0 = __msa_ave_u_b(inp0, res0);
566cabdff1aSopenharmony_ci        res1 = __msa_ave_u_b(inp2, res1);
567cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
568cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
569cabdff1aSopenharmony_ci    }
570cabdff1aSopenharmony_ci}
571cabdff1aSopenharmony_ci
572cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_aver_src0_16width_msa(const uint8_t *src,
573cabdff1aSopenharmony_ci                                                       int32_t src_stride,
574cabdff1aSopenharmony_ci                                                       uint8_t *dst,
575cabdff1aSopenharmony_ci                                                       int32_t dst_stride,
576cabdff1aSopenharmony_ci                                                       int32_t height)
577cabdff1aSopenharmony_ci{
578cabdff1aSopenharmony_ci    uint8_t loop_count;
579cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
580cabdff1aSopenharmony_ci    v16u8 res;
581cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
582cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
583cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
584cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
585cabdff1aSopenharmony_ci
586cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
587cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
588cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
589cabdff1aSopenharmony_ci        src += (4 * src_stride);
590cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
591cabdff1aSopenharmony_ci                                               const20, const6, const3);
592cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp0, res);
593cabdff1aSopenharmony_ci        ST_UB(res, dst);
594cabdff1aSopenharmony_ci        dst += dst_stride;
595cabdff1aSopenharmony_ci
596cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
597cabdff1aSopenharmony_ci                                               const20, const6, const3);
598cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp2, res);
599cabdff1aSopenharmony_ci        ST_UB(res, dst);
600cabdff1aSopenharmony_ci        dst += dst_stride;
601cabdff1aSopenharmony_ci
602cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
603cabdff1aSopenharmony_ci                                               const20, const6, const3);
604cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp4, res);
605cabdff1aSopenharmony_ci        ST_UB(res, dst);
606cabdff1aSopenharmony_ci        dst += dst_stride;
607cabdff1aSopenharmony_ci
608cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
609cabdff1aSopenharmony_ci                                               const20, const6, const3);
610cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp6, res);
611cabdff1aSopenharmony_ci        ST_UB(res, dst);
612cabdff1aSopenharmony_ci        dst += dst_stride;
613cabdff1aSopenharmony_ci    }
614cabdff1aSopenharmony_ci}
615cabdff1aSopenharmony_ci
616cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_8width_msa(const uint8_t *src,
617cabdff1aSopenharmony_ci                                            int32_t src_stride,
618cabdff1aSopenharmony_ci                                            uint8_t *dst,
619cabdff1aSopenharmony_ci                                            int32_t dst_stride,
620cabdff1aSopenharmony_ci                                            int32_t height)
621cabdff1aSopenharmony_ci{
622cabdff1aSopenharmony_ci    uint8_t loop_count;
623cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
624cabdff1aSopenharmony_ci    v16u8 res0, res1;
625cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
626cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
627cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
628cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
629cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
630cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
631cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
634cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
635cabdff1aSopenharmony_ci        src += (4 * src_stride);
636cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
637cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
638cabdff1aSopenharmony_ci                                                      const6, const3);
639cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
640cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
641cabdff1aSopenharmony_ci                                                      const6, const3);
642cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
643cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
644cabdff1aSopenharmony_ci    }
645cabdff1aSopenharmony_ci}
646cabdff1aSopenharmony_ci
647cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_16width_msa(const uint8_t *src,
648cabdff1aSopenharmony_ci                                             int32_t src_stride,
649cabdff1aSopenharmony_ci                                             uint8_t *dst,
650cabdff1aSopenharmony_ci                                             int32_t dst_stride,
651cabdff1aSopenharmony_ci                                             int32_t height)
652cabdff1aSopenharmony_ci{
653cabdff1aSopenharmony_ci    uint8_t loop_count;
654cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
655cabdff1aSopenharmony_ci    v16u8 res;
656cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
657cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
658cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
659cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
660cabdff1aSopenharmony_ci
661cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
662cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
663cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
664cabdff1aSopenharmony_ci        src += (4 * src_stride);
665cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
666cabdff1aSopenharmony_ci                                               const20, const6, const3);
667cabdff1aSopenharmony_ci        ST_UB(res, dst);
668cabdff1aSopenharmony_ci        dst += dst_stride;
669cabdff1aSopenharmony_ci
670cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
671cabdff1aSopenharmony_ci                                               const20, const6, const3);
672cabdff1aSopenharmony_ci        ST_UB(res, dst);
673cabdff1aSopenharmony_ci        dst += dst_stride;
674cabdff1aSopenharmony_ci
675cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
676cabdff1aSopenharmony_ci                                               const20, const6, const3);
677cabdff1aSopenharmony_ci        ST_UB(res, dst);
678cabdff1aSopenharmony_ci        dst += dst_stride;
679cabdff1aSopenharmony_ci
680cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
681cabdff1aSopenharmony_ci                                               const20, const6, const3);
682cabdff1aSopenharmony_ci        ST_UB(res, dst);
683cabdff1aSopenharmony_ci        dst += dst_stride;
684cabdff1aSopenharmony_ci    }
685cabdff1aSopenharmony_ci}
686cabdff1aSopenharmony_ci
687cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_aver_src1_8width_msa(const uint8_t *src,
688cabdff1aSopenharmony_ci                                                      int32_t src_stride,
689cabdff1aSopenharmony_ci                                                      uint8_t *dst,
690cabdff1aSopenharmony_ci                                                      int32_t dst_stride,
691cabdff1aSopenharmony_ci                                                      int32_t height)
692cabdff1aSopenharmony_ci{
693cabdff1aSopenharmony_ci    uint8_t loop_count;
694cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
695cabdff1aSopenharmony_ci    v16u8 res0, res1;
696cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
697cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
698cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
699cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
700cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
701cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
702cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
703cabdff1aSopenharmony_ci
704cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
705cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
706cabdff1aSopenharmony_ci        src += (4 * src_stride);
707cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
708cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
709cabdff1aSopenharmony_ci                                                      const6, const3);
710cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
711cabdff1aSopenharmony_ci                                                      mask2, mask3, const20,
712cabdff1aSopenharmony_ci                                                      const6, const3);
713cabdff1aSopenharmony_ci        SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
714cabdff1aSopenharmony_ci                   inp0, inp1, inp2, inp3);
715cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
716cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
717cabdff1aSopenharmony_ci        res0 = __msa_ave_u_b(inp0, res0);
718cabdff1aSopenharmony_ci        res1 = __msa_ave_u_b(inp2, res1);
719cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
720cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
721cabdff1aSopenharmony_ci    }
722cabdff1aSopenharmony_ci}
723cabdff1aSopenharmony_ci
724cabdff1aSopenharmony_cistatic void horiz_mc_qpel_no_rnd_aver_src1_16width_msa(const uint8_t *src,
725cabdff1aSopenharmony_ci                                                       int32_t src_stride,
726cabdff1aSopenharmony_ci                                                       uint8_t *dst,
727cabdff1aSopenharmony_ci                                                       int32_t dst_stride,
728cabdff1aSopenharmony_ci                                                       int32_t height)
729cabdff1aSopenharmony_ci{
730cabdff1aSopenharmony_ci    uint8_t loop_count;
731cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
732cabdff1aSopenharmony_ci    v16u8 res;
733cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
734cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
735cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
736cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
737cabdff1aSopenharmony_ci
738cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
739cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
740cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
741cabdff1aSopenharmony_ci        src += (4 * src_stride);
742cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
743cabdff1aSopenharmony_ci                                               const20, const6, const3);
744cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp1);
745cabdff1aSopenharmony_ci        ST_UB(res, dst);
746cabdff1aSopenharmony_ci        dst += dst_stride;
747cabdff1aSopenharmony_ci
748cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
749cabdff1aSopenharmony_ci                                               const20, const6, const3);
750cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp3);
751cabdff1aSopenharmony_ci        ST_UB(res, dst);
752cabdff1aSopenharmony_ci        dst += dst_stride;
753cabdff1aSopenharmony_ci
754cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
755cabdff1aSopenharmony_ci                                               const20, const6, const3);
756cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp5);
757cabdff1aSopenharmony_ci        ST_UB(res, dst);
758cabdff1aSopenharmony_ci        dst += dst_stride;
759cabdff1aSopenharmony_ci
760cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
761cabdff1aSopenharmony_ci                                               const20, const6, const3);
762cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp7);
763cabdff1aSopenharmony_ci        ST_UB(res, dst);
764cabdff1aSopenharmony_ci        dst += dst_stride;
765cabdff1aSopenharmony_ci    }
766cabdff1aSopenharmony_ci}
767cabdff1aSopenharmony_ci
768cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_aver_src0_8width_msa(const uint8_t *src,
769cabdff1aSopenharmony_ci                                                       int32_t src_stride,
770cabdff1aSopenharmony_ci                                                       uint8_t *dst,
771cabdff1aSopenharmony_ci                                                       int32_t dst_stride,
772cabdff1aSopenharmony_ci                                                       int32_t height)
773cabdff1aSopenharmony_ci{
774cabdff1aSopenharmony_ci    uint8_t loop_count;
775cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
776cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
777cabdff1aSopenharmony_ci    v16u8 res0, res1;
778cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
779cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
780cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
781cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
782cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
783cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
784cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
785cabdff1aSopenharmony_ci
786cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
787cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
788cabdff1aSopenharmony_ci        src += (4 * src_stride);
789cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
790cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
791cabdff1aSopenharmony_ci                                             const20, const6, const3);
792cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
793cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
794cabdff1aSopenharmony_ci                                             const20, const6, const3);
795cabdff1aSopenharmony_ci        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
796cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
797cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
798cabdff1aSopenharmony_ci        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
799cabdff1aSopenharmony_ci        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
800cabdff1aSopenharmony_ci        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
801cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
802cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
803cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
804cabdff1aSopenharmony_ci    }
805cabdff1aSopenharmony_ci}
806cabdff1aSopenharmony_ci
807cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_aver_src0_16width_msa(const uint8_t *src,
808cabdff1aSopenharmony_ci                                                        int32_t src_stride,
809cabdff1aSopenharmony_ci                                                        uint8_t *dst,
810cabdff1aSopenharmony_ci                                                        int32_t dst_stride,
811cabdff1aSopenharmony_ci                                                        int32_t height)
812cabdff1aSopenharmony_ci{
813cabdff1aSopenharmony_ci    uint8_t loop_count;
814cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
815cabdff1aSopenharmony_ci    v16u8 res0, res1;
816cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
817cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
818cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
819cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
820cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
821cabdff1aSopenharmony_ci
822cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
823cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
824cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
825cabdff1aSopenharmony_ci        src += (4 * src_stride);
826cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
827cabdff1aSopenharmony_ci                                       const20, const6, const3);
828cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
829cabdff1aSopenharmony_ci                                       const20, const6, const3);
830cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
831cabdff1aSopenharmony_ci        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
832cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
833cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
834cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
835cabdff1aSopenharmony_ci
836cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
837cabdff1aSopenharmony_ci                                       const20, const6, const3);
838cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
839cabdff1aSopenharmony_ci                                       const20, const6, const3);
840cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
841cabdff1aSopenharmony_ci        AVER_UB2_UB(inp4, res0, inp6, res1, res0, res1);
842cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
843cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
844cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
845cabdff1aSopenharmony_ci    }
846cabdff1aSopenharmony_ci}
847cabdff1aSopenharmony_ci
848cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_8width_msa(const uint8_t *src,
849cabdff1aSopenharmony_ci                                             int32_t src_stride,
850cabdff1aSopenharmony_ci                                             uint8_t *dst,
851cabdff1aSopenharmony_ci                                             int32_t dst_stride,
852cabdff1aSopenharmony_ci                                             int32_t height)
853cabdff1aSopenharmony_ci{
854cabdff1aSopenharmony_ci    uint8_t loop_count;
855cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
856cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
857cabdff1aSopenharmony_ci    v16u8 res0, res1;
858cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
859cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
860cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
861cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
862cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
863cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
864cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
865cabdff1aSopenharmony_ci
866cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
867cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
868cabdff1aSopenharmony_ci        src += (4 * src_stride);
869cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
870cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
871cabdff1aSopenharmony_ci                                             const20, const6, const3);
872cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
873cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
874cabdff1aSopenharmony_ci                                             const20, const6, const3);
875cabdff1aSopenharmony_ci        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
876cabdff1aSopenharmony_ci        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
877cabdff1aSopenharmony_ci        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
878cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
879cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
880cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
881cabdff1aSopenharmony_ci    }
882cabdff1aSopenharmony_ci}
883cabdff1aSopenharmony_ci
884cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_16width_msa(const uint8_t *src,
885cabdff1aSopenharmony_ci                                              int32_t src_stride,
886cabdff1aSopenharmony_ci                                              uint8_t *dst,
887cabdff1aSopenharmony_ci                                              int32_t dst_stride,
888cabdff1aSopenharmony_ci                                              int32_t height)
889cabdff1aSopenharmony_ci{
890cabdff1aSopenharmony_ci    uint8_t loop_count;
891cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
892cabdff1aSopenharmony_ci    v16u8 res0, res1;
893cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
894cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
895cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
896cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
897cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
898cabdff1aSopenharmony_ci
899cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
900cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
901cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
902cabdff1aSopenharmony_ci        src += (4 * src_stride);
903cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
904cabdff1aSopenharmony_ci                                       const20, const6, const3);
905cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
906cabdff1aSopenharmony_ci                                       const20, const6, const3);
907cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
908cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
909cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
910cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
911cabdff1aSopenharmony_ci
912cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
913cabdff1aSopenharmony_ci                                       const20, const6, const3);
914cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
915cabdff1aSopenharmony_ci                                       const20, const6, const3);
916cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
917cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
918cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
919cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
920cabdff1aSopenharmony_ci    }
921cabdff1aSopenharmony_ci}
922cabdff1aSopenharmony_ci
923cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_aver_src1_8width_msa(const uint8_t *src,
924cabdff1aSopenharmony_ci                                                       int32_t src_stride,
925cabdff1aSopenharmony_ci                                                       uint8_t *dst,
926cabdff1aSopenharmony_ci                                                       int32_t dst_stride,
927cabdff1aSopenharmony_ci                                                       int32_t height)
928cabdff1aSopenharmony_ci{
929cabdff1aSopenharmony_ci    uint8_t loop_count;
930cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
931cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
932cabdff1aSopenharmony_ci    v16u8 res0, res1;
933cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
934cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
935cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
936cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
937cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
938cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
939cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
940cabdff1aSopenharmony_ci
941cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
942cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
943cabdff1aSopenharmony_ci        src += (4 * src_stride);
944cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
945cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
946cabdff1aSopenharmony_ci                                             const20, const6, const3);
947cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
948cabdff1aSopenharmony_ci                                             mask0, mask1, mask2, mask3,
949cabdff1aSopenharmony_ci                                             const20, const6, const3);
950cabdff1aSopenharmony_ci        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
951cabdff1aSopenharmony_ci        SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
952cabdff1aSopenharmony_ci                   inp0, inp1, inp2, inp3);
953cabdff1aSopenharmony_ci        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
954cabdff1aSopenharmony_ci        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
955cabdff1aSopenharmony_ci        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
956cabdff1aSopenharmony_ci        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
957cabdff1aSopenharmony_ci        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
958cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
959cabdff1aSopenharmony_ci        ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
960cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
961cabdff1aSopenharmony_ci    }
962cabdff1aSopenharmony_ci}
963cabdff1aSopenharmony_ci
964cabdff1aSopenharmony_cistatic void horiz_mc_qpel_avg_dst_aver_src1_16width_msa(const uint8_t *src,
965cabdff1aSopenharmony_ci                                                        int32_t src_stride,
966cabdff1aSopenharmony_ci                                                        uint8_t *dst,
967cabdff1aSopenharmony_ci                                                        int32_t dst_stride,
968cabdff1aSopenharmony_ci                                                        int32_t height)
969cabdff1aSopenharmony_ci{
970cabdff1aSopenharmony_ci    uint8_t loop_count;
971cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
972cabdff1aSopenharmony_ci    v16u8 res0, res1, dst0, dst1;
973cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
974cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
975cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
976cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
977cabdff1aSopenharmony_ci
978cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
979cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
980cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
981cabdff1aSopenharmony_ci        src += (4 * src_stride);
982cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
983cabdff1aSopenharmony_ci                                       const20, const6, const3);
984cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
985cabdff1aSopenharmony_ci                                       const20, const6, const3);
986cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
987cabdff1aSopenharmony_ci        AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
988cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
989cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
990cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
991cabdff1aSopenharmony_ci        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
992cabdff1aSopenharmony_ci                                       const20, const6, const3);
993cabdff1aSopenharmony_ci        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
994cabdff1aSopenharmony_ci                                       const20, const6, const3);
995cabdff1aSopenharmony_ci        LD_UB2(dst, dst_stride, dst0, dst1);
996cabdff1aSopenharmony_ci        AVER_UB2_UB(res0, inp5, res1, inp7, res0, res1);
997cabdff1aSopenharmony_ci        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
998cabdff1aSopenharmony_ci        ST_UB2(res0, res1, dst, dst_stride);
999cabdff1aSopenharmony_ci        dst += (2 * dst_stride);
1000cabdff1aSopenharmony_ci    }
1001cabdff1aSopenharmony_ci}
1002cabdff1aSopenharmony_ci
1003cabdff1aSopenharmony_ci
1004cabdff1aSopenharmony_cistatic void vert_mc_qpel_aver_src0_8x8_msa(const uint8_t *src,
1005cabdff1aSopenharmony_ci                                           int32_t src_stride,
1006cabdff1aSopenharmony_ci                                           uint8_t *dst,
1007cabdff1aSopenharmony_ci                                           int32_t dst_stride)
1008cabdff1aSopenharmony_ci{
1009cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1010cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
1011cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1012cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1013cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1014cabdff1aSopenharmony_ci
1015cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1016cabdff1aSopenharmony_ci    src += (4 * src_stride);
1017cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1018cabdff1aSopenharmony_ci    src += (2 * src_stride);
1019cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1020cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
1021cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
1022cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
1023cabdff1aSopenharmony_ci                                        const20, const6, const3);
1024cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1025cabdff1aSopenharmony_ci    src += (2 * src_stride);
1026cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1027cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
1028cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
1029cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
1030cabdff1aSopenharmony_ci                                        const20, const6, const3);
1031cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1032cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1033cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1034cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1035cabdff1aSopenharmony_ci
1036cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1037cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1038cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
1039cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
1040cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
1041cabdff1aSopenharmony_ci                                        const20, const6, const3);
1042cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1043cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
1044cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
1045cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
1046cabdff1aSopenharmony_ci                                        const20, const6, const3);
1047cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
1048cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
1049cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1050cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1051cabdff1aSopenharmony_ci}
1052cabdff1aSopenharmony_ci
1053cabdff1aSopenharmony_cistatic void vert_mc_qpel_aver_src0_16x16_msa(const uint8_t *src,
1054cabdff1aSopenharmony_ci                                             int32_t src_stride,
1055cabdff1aSopenharmony_ci                                             uint8_t *dst,
1056cabdff1aSopenharmony_ci                                             int32_t dst_stride)
1057cabdff1aSopenharmony_ci{
1058cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1059cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
1060cabdff1aSopenharmony_ci    v16u8 res0;
1061cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1062cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1063cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1064cabdff1aSopenharmony_ci
1065cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1066cabdff1aSopenharmony_ci    src += (5 * src_stride);
1067cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1068cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
1069cabdff1aSopenharmony_ci                                  const20, const6, const3);
1070cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp0);
1071cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1072cabdff1aSopenharmony_ci    dst += dst_stride;
1073cabdff1aSopenharmony_ci
1074cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
1075cabdff1aSopenharmony_ci    src += src_stride;
1076cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1077cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
1078cabdff1aSopenharmony_ci                                  const20, const6, const3);
1079cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp1);
1080cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1081cabdff1aSopenharmony_ci    dst += dst_stride;
1082cabdff1aSopenharmony_ci
1083cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
1084cabdff1aSopenharmony_ci    src += src_stride;
1085cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1086cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
1087cabdff1aSopenharmony_ci                                  const20, const6, const3);
1088cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp2);
1089cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1090cabdff1aSopenharmony_ci    dst += dst_stride;
1091cabdff1aSopenharmony_ci
1092cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
1093cabdff1aSopenharmony_ci    src += src_stride;
1094cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1095cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
1096cabdff1aSopenharmony_ci                                  const20, const6, const3);
1097cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp3);
1098cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1099cabdff1aSopenharmony_ci    dst += dst_stride;
1100cabdff1aSopenharmony_ci
1101cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp8, inp9);
1102cabdff1aSopenharmony_ci    src += (2 * src_stride);
1103cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1104cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
1105cabdff1aSopenharmony_ci                                  const20, const6, const3);
1106cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp4);
1107cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1108cabdff1aSopenharmony_ci    dst += dst_stride;
1109cabdff1aSopenharmony_ci
1110cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1111cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
1112cabdff1aSopenharmony_ci                                  const20, const6, const3);
1113cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp5);
1114cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1115cabdff1aSopenharmony_ci    dst += dst_stride;
1116cabdff1aSopenharmony_ci
1117cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp10, inp11);
1118cabdff1aSopenharmony_ci    src += (2 * src_stride);
1119cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1120cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
1121cabdff1aSopenharmony_ci                                  const20, const6, const3);
1122cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp6);
1123cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1124cabdff1aSopenharmony_ci    dst += dst_stride;
1125cabdff1aSopenharmony_ci
1126cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1127cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
1128cabdff1aSopenharmony_ci                                  const20, const6, const3);
1129cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp7);
1130cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1131cabdff1aSopenharmony_ci    dst += dst_stride;
1132cabdff1aSopenharmony_ci
1133cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp12, inp13);
1134cabdff1aSopenharmony_ci    src += (2 * src_stride);
1135cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1136cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
1137cabdff1aSopenharmony_ci                                  const20, const6, const3);
1138cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp8);
1139cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1140cabdff1aSopenharmony_ci    dst += dst_stride;
1141cabdff1aSopenharmony_ci
1142cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1143cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
1144cabdff1aSopenharmony_ci                                  const20, const6, const3);
1145cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp9);
1146cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1147cabdff1aSopenharmony_ci    dst += dst_stride;
1148cabdff1aSopenharmony_ci
1149cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp14, inp15);
1150cabdff1aSopenharmony_ci    src += (2 * src_stride);
1151cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1152cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
1153cabdff1aSopenharmony_ci                                  const20, const6, const3);
1154cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp10);
1155cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1156cabdff1aSopenharmony_ci    dst += dst_stride;
1157cabdff1aSopenharmony_ci
1158cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1159cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
1160cabdff1aSopenharmony_ci                                  const20, const6, const3);
1161cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp11);
1162cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1163cabdff1aSopenharmony_ci    dst += dst_stride;
1164cabdff1aSopenharmony_ci
1165cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
1166cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1167cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
1168cabdff1aSopenharmony_ci                                  const20, const6, const3);
1169cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp12);
1170cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1171cabdff1aSopenharmony_ci    dst += dst_stride;
1172cabdff1aSopenharmony_ci
1173cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1174cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
1175cabdff1aSopenharmony_ci                                  const20, const6, const3);
1176cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp13);
1177cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1178cabdff1aSopenharmony_ci    dst += dst_stride;
1179cabdff1aSopenharmony_ci
1180cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1181cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
1182cabdff1aSopenharmony_ci                                  const20, const6, const3);
1183cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp14);
1184cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1185cabdff1aSopenharmony_ci    dst += dst_stride;
1186cabdff1aSopenharmony_ci
1187cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1188cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
1189cabdff1aSopenharmony_ci                                  const20, const6, const3);
1190cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp15);
1191cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1192cabdff1aSopenharmony_ci}
1193cabdff1aSopenharmony_ci
1194cabdff1aSopenharmony_cistatic void vert_mc_qpel_8x8_msa(const uint8_t *src,
1195cabdff1aSopenharmony_ci                                 int32_t src_stride,
1196cabdff1aSopenharmony_ci                                 uint8_t *dst,
1197cabdff1aSopenharmony_ci                                 int32_t dst_stride)
1198cabdff1aSopenharmony_ci{
1199cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1200cabdff1aSopenharmony_ci    v16u8 res0, res1;
1201cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1202cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1203cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1204cabdff1aSopenharmony_ci
1205cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1206cabdff1aSopenharmony_ci    src += (4 * src_stride);
1207cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1208cabdff1aSopenharmony_ci    src += (2 * src_stride);
1209cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1210cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
1211cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
1212cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
1213cabdff1aSopenharmony_ci                                        const20, const6, const3);
1214cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1215cabdff1aSopenharmony_ci    src += (2 * src_stride);
1216cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1217cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
1218cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
1219cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
1220cabdff1aSopenharmony_ci                                        const20, const6, const3);
1221cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1222cabdff1aSopenharmony_ci
1223cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1224cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1225cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
1226cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
1227cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
1228cabdff1aSopenharmony_ci                                        const20, const6, const3);
1229cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1230cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
1231cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
1232cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
1233cabdff1aSopenharmony_ci                                        const20, const6, const3);
1234cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1235cabdff1aSopenharmony_ci}
1236cabdff1aSopenharmony_ci
1237cabdff1aSopenharmony_cistatic void vert_mc_qpel_16x16_msa(const uint8_t *src,
1238cabdff1aSopenharmony_ci                                   int32_t src_stride,
1239cabdff1aSopenharmony_ci                                   uint8_t *dst,
1240cabdff1aSopenharmony_ci                                   int32_t dst_stride)
1241cabdff1aSopenharmony_ci{
1242cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1243cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
1244cabdff1aSopenharmony_ci    v16u8 res0;
1245cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1246cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1247cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1248cabdff1aSopenharmony_ci
1249cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1250cabdff1aSopenharmony_ci    src += (4 * src_stride);
1251cabdff1aSopenharmony_ci    inp4 = LD_UB(src);
1252cabdff1aSopenharmony_ci    src += src_stride;
1253cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1254cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
1255cabdff1aSopenharmony_ci                                  const20, const6, const3);
1256cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1257cabdff1aSopenharmony_ci    dst += dst_stride;
1258cabdff1aSopenharmony_ci
1259cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
1260cabdff1aSopenharmony_ci    src += src_stride;
1261cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1262cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
1263cabdff1aSopenharmony_ci                                  const20, const6, const3);
1264cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1265cabdff1aSopenharmony_ci    dst += dst_stride;
1266cabdff1aSopenharmony_ci
1267cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
1268cabdff1aSopenharmony_ci    src += src_stride;
1269cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1270cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
1271cabdff1aSopenharmony_ci                                  const20, const6, const3);
1272cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1273cabdff1aSopenharmony_ci    dst += dst_stride;
1274cabdff1aSopenharmony_ci
1275cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
1276cabdff1aSopenharmony_ci    src += src_stride;
1277cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1278cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
1279cabdff1aSopenharmony_ci                                  const20, const6, const3);
1280cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1281cabdff1aSopenharmony_ci    dst += dst_stride;
1282cabdff1aSopenharmony_ci
1283cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1284cabdff1aSopenharmony_ci    src += src_stride;
1285cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1286cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
1287cabdff1aSopenharmony_ci                                  const20, const6, const3);
1288cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1289cabdff1aSopenharmony_ci    dst += dst_stride;
1290cabdff1aSopenharmony_ci
1291cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
1292cabdff1aSopenharmony_ci    src += src_stride;
1293cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1294cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
1295cabdff1aSopenharmony_ci                                  const20, const6, const3);
1296cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1297cabdff1aSopenharmony_ci    dst += dst_stride;
1298cabdff1aSopenharmony_ci
1299cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
1300cabdff1aSopenharmony_ci    src += src_stride;
1301cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1302cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
1303cabdff1aSopenharmony_ci                                  const20, const6, const3);
1304cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1305cabdff1aSopenharmony_ci    dst += dst_stride;
1306cabdff1aSopenharmony_ci
1307cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
1308cabdff1aSopenharmony_ci    src += src_stride;
1309cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1310cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
1311cabdff1aSopenharmony_ci                                  const20, const6, const3);
1312cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1313cabdff1aSopenharmony_ci    dst += dst_stride;
1314cabdff1aSopenharmony_ci
1315cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
1316cabdff1aSopenharmony_ci    src += src_stride;
1317cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1318cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
1319cabdff1aSopenharmony_ci                                  const20, const6, const3);
1320cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1321cabdff1aSopenharmony_ci    dst += dst_stride;
1322cabdff1aSopenharmony_ci
1323cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
1324cabdff1aSopenharmony_ci    src += src_stride;
1325cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1326cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
1327cabdff1aSopenharmony_ci                                  const20, const6, const3);
1328cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1329cabdff1aSopenharmony_ci    dst += dst_stride;
1330cabdff1aSopenharmony_ci
1331cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
1332cabdff1aSopenharmony_ci    src += src_stride;
1333cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1334cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
1335cabdff1aSopenharmony_ci                                  const20, const6, const3);
1336cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1337cabdff1aSopenharmony_ci    dst += dst_stride;
1338cabdff1aSopenharmony_ci
1339cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
1340cabdff1aSopenharmony_ci    src += src_stride;
1341cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1342cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
1343cabdff1aSopenharmony_ci                                  const20, const6, const3);
1344cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1345cabdff1aSopenharmony_ci    dst += dst_stride;
1346cabdff1aSopenharmony_ci
1347cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
1348cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1349cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
1350cabdff1aSopenharmony_ci                                  const20, const6, const3);
1351cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1352cabdff1aSopenharmony_ci    dst += dst_stride;
1353cabdff1aSopenharmony_ci
1354cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1355cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
1356cabdff1aSopenharmony_ci                                  const20, const6, const3);
1357cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1358cabdff1aSopenharmony_ci    dst += dst_stride;
1359cabdff1aSopenharmony_ci
1360cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1361cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
1362cabdff1aSopenharmony_ci                                  const20, const6, const3);
1363cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1364cabdff1aSopenharmony_ci    dst += dst_stride;
1365cabdff1aSopenharmony_ci
1366cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1367cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
1368cabdff1aSopenharmony_ci                                  const20, const6, const3);
1369cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1370cabdff1aSopenharmony_ci    dst += dst_stride;
1371cabdff1aSopenharmony_ci}
1372cabdff1aSopenharmony_ci
1373cabdff1aSopenharmony_cistatic void vert_mc_qpel_aver_src1_8x8_msa(const uint8_t *src,
1374cabdff1aSopenharmony_ci                                           int32_t src_stride,
1375cabdff1aSopenharmony_ci                                           uint8_t *dst,
1376cabdff1aSopenharmony_ci                                           int32_t dst_stride)
1377cabdff1aSopenharmony_ci{
1378cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1379cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
1380cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1381cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1382cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1383cabdff1aSopenharmony_ci
1384cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1385cabdff1aSopenharmony_ci    src += (4 * src_stride);
1386cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1387cabdff1aSopenharmony_ci    src += (2 * src_stride);
1388cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1389cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
1390cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
1391cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
1392cabdff1aSopenharmony_ci                                        const20, const6, const3);
1393cabdff1aSopenharmony_ci
1394cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1395cabdff1aSopenharmony_ci    src += (2 * src_stride);
1396cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1397cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
1398cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
1399cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
1400cabdff1aSopenharmony_ci                                        const20, const6, const3);
1401cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1402cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
1403cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1404cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1405cabdff1aSopenharmony_ci
1406cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1407cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1408cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
1409cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
1410cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
1411cabdff1aSopenharmony_ci                                        const20, const6, const3);
1412cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1413cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
1414cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
1415cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
1416cabdff1aSopenharmony_ci                                        const20, const6, const3);
1417cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
1418cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
1419cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1420cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1421cabdff1aSopenharmony_ci}
1422cabdff1aSopenharmony_ci
1423cabdff1aSopenharmony_cistatic void vert_mc_qpel_aver_src1_16x16_msa(const uint8_t *src,
1424cabdff1aSopenharmony_ci                                             int32_t src_stride,
1425cabdff1aSopenharmony_ci                                             uint8_t *dst,
1426cabdff1aSopenharmony_ci                                             int32_t dst_stride)
1427cabdff1aSopenharmony_ci{
1428cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1429cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
1430cabdff1aSopenharmony_ci    v16u8 res0;
1431cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1432cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1433cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1434cabdff1aSopenharmony_ci
1435cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1436cabdff1aSopenharmony_ci    src += (4 * src_stride);
1437cabdff1aSopenharmony_ci    inp4 = LD_UB(src);
1438cabdff1aSopenharmony_ci    src += src_stride;
1439cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1440cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
1441cabdff1aSopenharmony_ci                                  const20, const6, const3);
1442cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp1);
1443cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1444cabdff1aSopenharmony_ci    dst += dst_stride;
1445cabdff1aSopenharmony_ci
1446cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
1447cabdff1aSopenharmony_ci    src += src_stride;
1448cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1449cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
1450cabdff1aSopenharmony_ci                                  const20, const6, const3);
1451cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp2);
1452cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1453cabdff1aSopenharmony_ci    dst += dst_stride;
1454cabdff1aSopenharmony_ci
1455cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
1456cabdff1aSopenharmony_ci    src += src_stride;
1457cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1458cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
1459cabdff1aSopenharmony_ci                                  const20, const6, const3);
1460cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp3);
1461cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1462cabdff1aSopenharmony_ci    dst += dst_stride;
1463cabdff1aSopenharmony_ci
1464cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
1465cabdff1aSopenharmony_ci    src += src_stride;
1466cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1467cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
1468cabdff1aSopenharmony_ci                                  const20, const6, const3);
1469cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp4);
1470cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1471cabdff1aSopenharmony_ci    dst += dst_stride;
1472cabdff1aSopenharmony_ci
1473cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1474cabdff1aSopenharmony_ci    src += src_stride;
1475cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1476cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
1477cabdff1aSopenharmony_ci                                  const20, const6, const3);
1478cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp5);
1479cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1480cabdff1aSopenharmony_ci    dst += dst_stride;
1481cabdff1aSopenharmony_ci
1482cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
1483cabdff1aSopenharmony_ci    src += src_stride;
1484cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1485cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
1486cabdff1aSopenharmony_ci                                  const20, const6, const3);
1487cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp6);
1488cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1489cabdff1aSopenharmony_ci    dst += dst_stride;
1490cabdff1aSopenharmony_ci
1491cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
1492cabdff1aSopenharmony_ci    src += src_stride;
1493cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1494cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
1495cabdff1aSopenharmony_ci                                  const20, const6, const3);
1496cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp7);
1497cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1498cabdff1aSopenharmony_ci    dst += dst_stride;
1499cabdff1aSopenharmony_ci
1500cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
1501cabdff1aSopenharmony_ci    src += src_stride;
1502cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1503cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
1504cabdff1aSopenharmony_ci                                  const20, const6, const3);
1505cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp8);
1506cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1507cabdff1aSopenharmony_ci    dst += dst_stride;
1508cabdff1aSopenharmony_ci
1509cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
1510cabdff1aSopenharmony_ci    src += src_stride;
1511cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1512cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
1513cabdff1aSopenharmony_ci                                  const20, const6, const3);
1514cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp9);
1515cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1516cabdff1aSopenharmony_ci    dst += dst_stride;
1517cabdff1aSopenharmony_ci
1518cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
1519cabdff1aSopenharmony_ci    src += src_stride;
1520cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1521cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
1522cabdff1aSopenharmony_ci                                  const20, const6, const3);
1523cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp10);
1524cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1525cabdff1aSopenharmony_ci    dst += dst_stride;
1526cabdff1aSopenharmony_ci
1527cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
1528cabdff1aSopenharmony_ci    src += src_stride;
1529cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1530cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
1531cabdff1aSopenharmony_ci                                  const20, const6, const3);
1532cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp11);
1533cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1534cabdff1aSopenharmony_ci    dst += dst_stride;
1535cabdff1aSopenharmony_ci
1536cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
1537cabdff1aSopenharmony_ci    src += src_stride;
1538cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1539cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
1540cabdff1aSopenharmony_ci                                  const20, const6, const3);
1541cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp12);
1542cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1543cabdff1aSopenharmony_ci    dst += dst_stride;
1544cabdff1aSopenharmony_ci
1545cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
1546cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1547cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
1548cabdff1aSopenharmony_ci                                  const20, const6, const3);
1549cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp13);
1550cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1551cabdff1aSopenharmony_ci    dst += dst_stride;
1552cabdff1aSopenharmony_ci
1553cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1554cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
1555cabdff1aSopenharmony_ci                                  const20, const6, const3);
1556cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp14);
1557cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1558cabdff1aSopenharmony_ci    dst += dst_stride;
1559cabdff1aSopenharmony_ci
1560cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1561cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
1562cabdff1aSopenharmony_ci                                  const20, const6, const3);
1563cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp15);
1564cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1565cabdff1aSopenharmony_ci    dst += dst_stride;
1566cabdff1aSopenharmony_ci
1567cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1568cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
1569cabdff1aSopenharmony_ci                                  const20, const6, const3);
1570cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(res0, inp16);
1571cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1572cabdff1aSopenharmony_ci}
1573cabdff1aSopenharmony_ci
1574cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_aver_src0_8x8_msa(const uint8_t *src,
1575cabdff1aSopenharmony_ci                                                  int32_t src_stride,
1576cabdff1aSopenharmony_ci                                                  uint8_t *dst,
1577cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
1578cabdff1aSopenharmony_ci{
1579cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1580cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
1581cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1582cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1583cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1584cabdff1aSopenharmony_ci
1585cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1586cabdff1aSopenharmony_ci    src += (4 * src_stride);
1587cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1588cabdff1aSopenharmony_ci    src += (2 * src_stride);
1589cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1590cabdff1aSopenharmony_ci                                                 inp1, inp2, inp3, inp4,
1591cabdff1aSopenharmony_ci                                                 inp1, inp0, inp0, inp1,
1592cabdff1aSopenharmony_ci                                                 inp2, inp3, inp4, inp5,
1593cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1594cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1595cabdff1aSopenharmony_ci    src += (2 * src_stride);
1596cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1597cabdff1aSopenharmony_ci                                                 inp3, inp4, inp5, inp6,
1598cabdff1aSopenharmony_ci                                                 inp3, inp2, inp1, inp0,
1599cabdff1aSopenharmony_ci                                                 inp4, inp5, inp6, inp7,
1600cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1601cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1602cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1603cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, tmp0);
1604cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(res1, tmp1);
1605cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1606cabdff1aSopenharmony_ci
1607cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1608cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1609cabdff1aSopenharmony_ci                                                 inp5, inp6, inp7, inp8,
1610cabdff1aSopenharmony_ci                                                 inp5, inp4, inp3, inp2,
1611cabdff1aSopenharmony_ci                                                 inp6, inp7, inp8, inp8,
1612cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1613cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1614cabdff1aSopenharmony_ci                                                 inp7, inp8, inp8, inp7,
1615cabdff1aSopenharmony_ci                                                 inp7, inp6, inp5, inp4,
1616cabdff1aSopenharmony_ci                                                 inp8, inp8, inp7, inp6,
1617cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1618cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
1619cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
1620cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, tmp0);
1621cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(res1, tmp1);
1622cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1623cabdff1aSopenharmony_ci}
1624cabdff1aSopenharmony_ci
1625cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_aver_src0_16x16_msa(const uint8_t *src,
1626cabdff1aSopenharmony_ci                                                    int32_t src_stride,
1627cabdff1aSopenharmony_ci                                                    uint8_t *dst,
1628cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
1629cabdff1aSopenharmony_ci{
1630cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1631cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
1632cabdff1aSopenharmony_ci    v16u8 res0;
1633cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1634cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1635cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1636cabdff1aSopenharmony_ci
1637cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1638cabdff1aSopenharmony_ci    src += (5 * src_stride);
1639cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1640cabdff1aSopenharmony_ci                                           inp1, inp2, inp3, inp4,
1641cabdff1aSopenharmony_ci                                           const20, const6, const3);
1642cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp0);
1643cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1644cabdff1aSopenharmony_ci    dst += dst_stride;
1645cabdff1aSopenharmony_ci
1646cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
1647cabdff1aSopenharmony_ci    src += src_stride;
1648cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1649cabdff1aSopenharmony_ci                                           inp2, inp3, inp4, inp5,
1650cabdff1aSopenharmony_ci                                           const20, const6, const3);
1651cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp1);
1652cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1653cabdff1aSopenharmony_ci    dst += dst_stride;
1654cabdff1aSopenharmony_ci
1655cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
1656cabdff1aSopenharmony_ci    src += src_stride;
1657cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1658cabdff1aSopenharmony_ci                                           inp3, inp4, inp5, inp6,
1659cabdff1aSopenharmony_ci                                           const20, const6, const3);
1660cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp2);
1661cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1662cabdff1aSopenharmony_ci    dst += dst_stride;
1663cabdff1aSopenharmony_ci
1664cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
1665cabdff1aSopenharmony_ci    src += src_stride;
1666cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1667cabdff1aSopenharmony_ci                                           inp4, inp5, inp6, inp7,
1668cabdff1aSopenharmony_ci                                           const20, const6, const3);
1669cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp3);
1670cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1671cabdff1aSopenharmony_ci    dst += dst_stride;
1672cabdff1aSopenharmony_ci
1673cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1674cabdff1aSopenharmony_ci    src += src_stride;
1675cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1676cabdff1aSopenharmony_ci                                           inp5, inp6, inp7, inp8,
1677cabdff1aSopenharmony_ci                                           const20, const6, const3);
1678cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp4);
1679cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1680cabdff1aSopenharmony_ci    dst += dst_stride;
1681cabdff1aSopenharmony_ci
1682cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
1683cabdff1aSopenharmony_ci    src += src_stride;
1684cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1685cabdff1aSopenharmony_ci                                           inp6, inp7, inp8, inp9,
1686cabdff1aSopenharmony_ci                                           const20, const6, const3);
1687cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp5);
1688cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1689cabdff1aSopenharmony_ci    dst += dst_stride;
1690cabdff1aSopenharmony_ci
1691cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
1692cabdff1aSopenharmony_ci    src += src_stride;
1693cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1694cabdff1aSopenharmony_ci                                           inp7, inp8, inp9, inp10,
1695cabdff1aSopenharmony_ci                                           const20, const6, const3);
1696cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp6);
1697cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1698cabdff1aSopenharmony_ci    dst += dst_stride;
1699cabdff1aSopenharmony_ci
1700cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
1701cabdff1aSopenharmony_ci    src += src_stride;
1702cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
1703cabdff1aSopenharmony_ci                                           inp8, inp9, inp10, inp11,
1704cabdff1aSopenharmony_ci                                           const20, const6, const3);
1705cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp7);
1706cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1707cabdff1aSopenharmony_ci    dst += dst_stride;
1708cabdff1aSopenharmony_ci
1709cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
1710cabdff1aSopenharmony_ci    src += src_stride;
1711cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
1712cabdff1aSopenharmony_ci                                           inp9, inp10, inp11, inp12,
1713cabdff1aSopenharmony_ci                                           const20, const6, const3);
1714cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp8);
1715cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1716cabdff1aSopenharmony_ci    dst += dst_stride;
1717cabdff1aSopenharmony_ci
1718cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
1719cabdff1aSopenharmony_ci    src += src_stride;
1720cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
1721cabdff1aSopenharmony_ci                                           inp10, inp11, inp12, inp13,
1722cabdff1aSopenharmony_ci                                           const20, const6, const3);
1723cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp9);
1724cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1725cabdff1aSopenharmony_ci    dst += dst_stride;
1726cabdff1aSopenharmony_ci
1727cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
1728cabdff1aSopenharmony_ci    src += src_stride;
1729cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
1730cabdff1aSopenharmony_ci                                           inp11, inp12, inp13, inp14,
1731cabdff1aSopenharmony_ci                                           const20, const6, const3);
1732cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp10);
1733cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1734cabdff1aSopenharmony_ci    dst += dst_stride;
1735cabdff1aSopenharmony_ci
1736cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
1737cabdff1aSopenharmony_ci    src += src_stride;
1738cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
1739cabdff1aSopenharmony_ci                                           inp12, inp13, inp14, inp15,
1740cabdff1aSopenharmony_ci                                           const20, const6, const3);
1741cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp11);
1742cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1743cabdff1aSopenharmony_ci    dst += dst_stride;
1744cabdff1aSopenharmony_ci
1745cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
1746cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
1747cabdff1aSopenharmony_ci                                           inp13, inp14, inp15, inp16,
1748cabdff1aSopenharmony_ci                                           const20, const6, const3);
1749cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp12);
1750cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1751cabdff1aSopenharmony_ci    dst += dst_stride;
1752cabdff1aSopenharmony_ci
1753cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
1754cabdff1aSopenharmony_ci                                           inp14, inp15, inp16, inp16,
1755cabdff1aSopenharmony_ci                                           const20, const6, const3);
1756cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp13);
1757cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1758cabdff1aSopenharmony_ci    dst += dst_stride;
1759cabdff1aSopenharmony_ci
1760cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
1761cabdff1aSopenharmony_ci                                           inp15, inp16, inp16, inp15,
1762cabdff1aSopenharmony_ci                                           const20, const6, const3);
1763cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp14);
1764cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1765cabdff1aSopenharmony_ci    dst += dst_stride;
1766cabdff1aSopenharmony_ci
1767cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
1768cabdff1aSopenharmony_ci                                           inp16, inp16, inp15, inp14,
1769cabdff1aSopenharmony_ci                                           const20, const6, const3);
1770cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp15);
1771cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1772cabdff1aSopenharmony_ci    dst += dst_stride;
1773cabdff1aSopenharmony_ci}
1774cabdff1aSopenharmony_ci
1775cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_8x8_msa(const uint8_t *src,
1776cabdff1aSopenharmony_ci                                        int32_t src_stride,
1777cabdff1aSopenharmony_ci                                        uint8_t *dst,
1778cabdff1aSopenharmony_ci                                        int32_t dst_stride)
1779cabdff1aSopenharmony_ci{
1780cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1781cabdff1aSopenharmony_ci    v16u8 res0, res1;
1782cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1783cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1784cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1785cabdff1aSopenharmony_ci
1786cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1787cabdff1aSopenharmony_ci    src += (4 * src_stride);
1788cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1789cabdff1aSopenharmony_ci    src += (2 * src_stride);
1790cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1791cabdff1aSopenharmony_ci                                                 inp1, inp2, inp3, inp4,
1792cabdff1aSopenharmony_ci                                                 inp1, inp0, inp0, inp1,
1793cabdff1aSopenharmony_ci                                                 inp2, inp3, inp4, inp5,
1794cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1795cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1796cabdff1aSopenharmony_ci    src += (2 * src_stride);
1797cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1798cabdff1aSopenharmony_ci                                                 inp3, inp4, inp5, inp6,
1799cabdff1aSopenharmony_ci                                                 inp3, inp2, inp1, inp0,
1800cabdff1aSopenharmony_ci                                                 inp4, inp5, inp6, inp7,
1801cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1802cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1803cabdff1aSopenharmony_ci
1804cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1805cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1806cabdff1aSopenharmony_ci                                                 inp5, inp6, inp7, inp8,
1807cabdff1aSopenharmony_ci                                                 inp5, inp4, inp3, inp2,
1808cabdff1aSopenharmony_ci                                                 inp6, inp7, inp8, inp8,
1809cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1810cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1811cabdff1aSopenharmony_ci                                                 inp7, inp8, inp8, inp7,
1812cabdff1aSopenharmony_ci                                                 inp7, inp6, inp5, inp4,
1813cabdff1aSopenharmony_ci                                                 inp8, inp8, inp7, inp6,
1814cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1815cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1816cabdff1aSopenharmony_ci}
1817cabdff1aSopenharmony_ci
1818cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_16x16_msa(const uint8_t *src,
1819cabdff1aSopenharmony_ci                                          int32_t src_stride,
1820cabdff1aSopenharmony_ci                                          uint8_t *dst,
1821cabdff1aSopenharmony_ci                                          int32_t dst_stride)
1822cabdff1aSopenharmony_ci{
1823cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1824cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
1825cabdff1aSopenharmony_ci    v16u8 res0;
1826cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1827cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1828cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1829cabdff1aSopenharmony_ci
1830cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1831cabdff1aSopenharmony_ci    src += (5 * src_stride);
1832cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1833cabdff1aSopenharmony_ci                                           inp1, inp2, inp3, inp4,
1834cabdff1aSopenharmony_ci                                           const20, const6, const3);
1835cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1836cabdff1aSopenharmony_ci    dst += dst_stride;
1837cabdff1aSopenharmony_ci
1838cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
1839cabdff1aSopenharmony_ci    src += src_stride;
1840cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1841cabdff1aSopenharmony_ci                                           inp2, inp3, inp4, inp5,
1842cabdff1aSopenharmony_ci                                           const20, const6, const3);
1843cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1844cabdff1aSopenharmony_ci    dst += dst_stride;
1845cabdff1aSopenharmony_ci
1846cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
1847cabdff1aSopenharmony_ci    src += src_stride;
1848cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1849cabdff1aSopenharmony_ci                                           inp3, inp4, inp5, inp6,
1850cabdff1aSopenharmony_ci                                           const20, const6, const3);
1851cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1852cabdff1aSopenharmony_ci    dst += dst_stride;
1853cabdff1aSopenharmony_ci
1854cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
1855cabdff1aSopenharmony_ci    src += src_stride;
1856cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1857cabdff1aSopenharmony_ci                                           inp4, inp5, inp6, inp7,
1858cabdff1aSopenharmony_ci                                           const20, const6, const3);
1859cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1860cabdff1aSopenharmony_ci    dst += dst_stride;
1861cabdff1aSopenharmony_ci
1862cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1863cabdff1aSopenharmony_ci    src += src_stride;
1864cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1865cabdff1aSopenharmony_ci                                           inp5, inp6, inp7, inp8,
1866cabdff1aSopenharmony_ci                                           const20, const6, const3);
1867cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1868cabdff1aSopenharmony_ci    dst += dst_stride;
1869cabdff1aSopenharmony_ci
1870cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
1871cabdff1aSopenharmony_ci    src += src_stride;
1872cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1873cabdff1aSopenharmony_ci                                           inp6, inp7, inp8, inp9,
1874cabdff1aSopenharmony_ci                                           const20, const6, const3);
1875cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1876cabdff1aSopenharmony_ci    dst += dst_stride;
1877cabdff1aSopenharmony_ci
1878cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
1879cabdff1aSopenharmony_ci    src += src_stride;
1880cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1881cabdff1aSopenharmony_ci                                           inp7, inp8, inp9, inp10,
1882cabdff1aSopenharmony_ci                                           const20, const6, const3);
1883cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1884cabdff1aSopenharmony_ci    dst += dst_stride;
1885cabdff1aSopenharmony_ci
1886cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
1887cabdff1aSopenharmony_ci    src += src_stride;
1888cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
1889cabdff1aSopenharmony_ci                                           inp8, inp9, inp10, inp11,
1890cabdff1aSopenharmony_ci                                           const20, const6, const3);
1891cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1892cabdff1aSopenharmony_ci    dst += dst_stride;
1893cabdff1aSopenharmony_ci
1894cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
1895cabdff1aSopenharmony_ci    src += src_stride;
1896cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
1897cabdff1aSopenharmony_ci                                           inp9, inp10, inp11, inp12,
1898cabdff1aSopenharmony_ci                                           const20, const6, const3);
1899cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1900cabdff1aSopenharmony_ci    dst += dst_stride;
1901cabdff1aSopenharmony_ci
1902cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
1903cabdff1aSopenharmony_ci    src += src_stride;
1904cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
1905cabdff1aSopenharmony_ci                                           inp10, inp11, inp12, inp13,
1906cabdff1aSopenharmony_ci                                           const20, const6, const3);
1907cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1908cabdff1aSopenharmony_ci    dst += dst_stride;
1909cabdff1aSopenharmony_ci
1910cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
1911cabdff1aSopenharmony_ci    src += src_stride;
1912cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
1913cabdff1aSopenharmony_ci                                           inp11, inp12, inp13, inp14,
1914cabdff1aSopenharmony_ci                                           const20, const6, const3);
1915cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1916cabdff1aSopenharmony_ci    dst += dst_stride;
1917cabdff1aSopenharmony_ci
1918cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
1919cabdff1aSopenharmony_ci    src += src_stride;
1920cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
1921cabdff1aSopenharmony_ci                                           inp12, inp13, inp14, inp15,
1922cabdff1aSopenharmony_ci                                           const20, const6, const3);
1923cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1924cabdff1aSopenharmony_ci    dst += dst_stride;
1925cabdff1aSopenharmony_ci
1926cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
1927cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
1928cabdff1aSopenharmony_ci                                           inp13, inp14, inp15, inp16,
1929cabdff1aSopenharmony_ci                                           const20, const6, const3);
1930cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1931cabdff1aSopenharmony_ci    dst += dst_stride;
1932cabdff1aSopenharmony_ci
1933cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
1934cabdff1aSopenharmony_ci                                           inp14, inp15, inp16, inp16,
1935cabdff1aSopenharmony_ci                                           const20, const6, const3);
1936cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1937cabdff1aSopenharmony_ci    dst += dst_stride;
1938cabdff1aSopenharmony_ci
1939cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
1940cabdff1aSopenharmony_ci                                           inp15, inp16, inp16, inp15,
1941cabdff1aSopenharmony_ci                                           const20, const6, const3);
1942cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1943cabdff1aSopenharmony_ci    dst += dst_stride;
1944cabdff1aSopenharmony_ci
1945cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
1946cabdff1aSopenharmony_ci                                           inp16, inp16, inp15, inp14,
1947cabdff1aSopenharmony_ci                                           const20, const6, const3);
1948cabdff1aSopenharmony_ci    ST_UB(res0, dst);
1949cabdff1aSopenharmony_ci}
1950cabdff1aSopenharmony_ci
1951cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_aver_src1_8x8_msa(const uint8_t *src,
1952cabdff1aSopenharmony_ci                                                  int32_t src_stride,
1953cabdff1aSopenharmony_ci                                                  uint8_t *dst,
1954cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
1955cabdff1aSopenharmony_ci{
1956cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1957cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
1958cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
1959cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
1960cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
1961cabdff1aSopenharmony_ci
1962cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1963cabdff1aSopenharmony_ci    src += (4 * src_stride);
1964cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
1965cabdff1aSopenharmony_ci    src += (2 * src_stride);
1966cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1967cabdff1aSopenharmony_ci                                                 inp1, inp2, inp3, inp4,
1968cabdff1aSopenharmony_ci                                                 inp1, inp0, inp0, inp1,
1969cabdff1aSopenharmony_ci                                                 inp2, inp3, inp4, inp5,
1970cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1971cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
1972cabdff1aSopenharmony_ci    src += (2 * src_stride);
1973cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1974cabdff1aSopenharmony_ci                                                 inp3, inp4, inp5, inp6,
1975cabdff1aSopenharmony_ci                                                 inp3, inp2, inp1, inp0,
1976cabdff1aSopenharmony_ci                                                 inp4, inp5, inp6, inp7,
1977cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1978cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1979cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
1980cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, tmp0);
1981cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(res1, tmp1);
1982cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1983cabdff1aSopenharmony_ci
1984cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
1985cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1986cabdff1aSopenharmony_ci                                                 inp5, inp6, inp7, inp8,
1987cabdff1aSopenharmony_ci                                                 inp5, inp4, inp3, inp2,
1988cabdff1aSopenharmony_ci                                                 inp6, inp7, inp8, inp8,
1989cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1990cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1991cabdff1aSopenharmony_ci                                                 inp7, inp8, inp8, inp7,
1992cabdff1aSopenharmony_ci                                                 inp7, inp6, inp5, inp4,
1993cabdff1aSopenharmony_ci                                                 inp8, inp8, inp7, inp6,
1994cabdff1aSopenharmony_ci                                                 const20, const6, const3);
1995cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
1996cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
1997cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, tmp0);
1998cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(res1, tmp1);
1999cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
2000cabdff1aSopenharmony_ci}
2001cabdff1aSopenharmony_ci
2002cabdff1aSopenharmony_cistatic void vert_mc_qpel_no_rnd_aver_src1_16x16_msa(const uint8_t *src,
2003cabdff1aSopenharmony_ci                                                    int32_t src_stride,
2004cabdff1aSopenharmony_ci                                                    uint8_t *dst,
2005cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
2006cabdff1aSopenharmony_ci{
2007cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2008cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
2009cabdff1aSopenharmony_ci    v16u8 res0;
2010cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2011cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2012cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2013cabdff1aSopenharmony_ci
2014cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2015cabdff1aSopenharmony_ci    src += (5 * src_stride);
2016cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
2017cabdff1aSopenharmony_ci                                           inp1, inp2, inp3, inp4,
2018cabdff1aSopenharmony_ci                                           const20, const6, const3);
2019cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp1);
2020cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2021cabdff1aSopenharmony_ci    dst += dst_stride;
2022cabdff1aSopenharmony_ci
2023cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
2024cabdff1aSopenharmony_ci    src += src_stride;
2025cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
2026cabdff1aSopenharmony_ci                                           inp2, inp3, inp4, inp5,
2027cabdff1aSopenharmony_ci                                           const20, const6, const3);
2028cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp2);
2029cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2030cabdff1aSopenharmony_ci    dst += dst_stride;
2031cabdff1aSopenharmony_ci
2032cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
2033cabdff1aSopenharmony_ci    src += src_stride;
2034cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
2035cabdff1aSopenharmony_ci                                           inp3, inp4, inp5, inp6,
2036cabdff1aSopenharmony_ci                                           const20, const6, const3);
2037cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp3);
2038cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2039cabdff1aSopenharmony_ci    dst += dst_stride;
2040cabdff1aSopenharmony_ci
2041cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
2042cabdff1aSopenharmony_ci    src += src_stride;
2043cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2044cabdff1aSopenharmony_ci                                           inp4, inp5, inp6, inp7,
2045cabdff1aSopenharmony_ci                                           const20, const6, const3);
2046cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp4);
2047cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2048cabdff1aSopenharmony_ci    dst += dst_stride;
2049cabdff1aSopenharmony_ci
2050cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2051cabdff1aSopenharmony_ci    src += src_stride;
2052cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
2053cabdff1aSopenharmony_ci                                           inp5, inp6, inp7, inp8,
2054cabdff1aSopenharmony_ci                                           const20, const6, const3);
2055cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp5);
2056cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2057cabdff1aSopenharmony_ci    dst += dst_stride;
2058cabdff1aSopenharmony_ci
2059cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
2060cabdff1aSopenharmony_ci    src += src_stride;
2061cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
2062cabdff1aSopenharmony_ci                                           inp6, inp7, inp8, inp9,
2063cabdff1aSopenharmony_ci                                           const20, const6, const3);
2064cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp6);
2065cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2066cabdff1aSopenharmony_ci    dst += dst_stride;
2067cabdff1aSopenharmony_ci
2068cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
2069cabdff1aSopenharmony_ci    src += src_stride;
2070cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
2071cabdff1aSopenharmony_ci                                           inp7, inp8, inp9, inp10,
2072cabdff1aSopenharmony_ci                                           const20, const6, const3);
2073cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp7);
2074cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2075cabdff1aSopenharmony_ci    dst += dst_stride;
2076cabdff1aSopenharmony_ci
2077cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
2078cabdff1aSopenharmony_ci    src += src_stride;
2079cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
2080cabdff1aSopenharmony_ci                                           inp8, inp9, inp10, inp11,
2081cabdff1aSopenharmony_ci                                           const20, const6, const3);
2082cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp8);
2083cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2084cabdff1aSopenharmony_ci    dst += dst_stride;
2085cabdff1aSopenharmony_ci
2086cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
2087cabdff1aSopenharmony_ci    src += src_stride;
2088cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
2089cabdff1aSopenharmony_ci                                           inp9, inp10, inp11, inp12,
2090cabdff1aSopenharmony_ci                                           const20, const6, const3);
2091cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp9);
2092cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2093cabdff1aSopenharmony_ci    dst += dst_stride;
2094cabdff1aSopenharmony_ci
2095cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
2096cabdff1aSopenharmony_ci    src += src_stride;
2097cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
2098cabdff1aSopenharmony_ci                                           inp10, inp11, inp12, inp13,
2099cabdff1aSopenharmony_ci                                           const20, const6, const3);
2100cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp10);
2101cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2102cabdff1aSopenharmony_ci    dst += dst_stride;
2103cabdff1aSopenharmony_ci
2104cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
2105cabdff1aSopenharmony_ci    src += src_stride;
2106cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
2107cabdff1aSopenharmony_ci                                           inp11, inp12, inp13, inp14,
2108cabdff1aSopenharmony_ci                                           const20, const6, const3);
2109cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp11);
2110cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2111cabdff1aSopenharmony_ci    dst += dst_stride;
2112cabdff1aSopenharmony_ci
2113cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
2114cabdff1aSopenharmony_ci    src += src_stride;
2115cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
2116cabdff1aSopenharmony_ci                                           inp12, inp13, inp14, inp15,
2117cabdff1aSopenharmony_ci                                           const20, const6, const3);
2118cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp12);
2119cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2120cabdff1aSopenharmony_ci    dst += dst_stride;
2121cabdff1aSopenharmony_ci
2122cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
2123cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
2124cabdff1aSopenharmony_ci                                           inp13, inp14, inp15, inp16,
2125cabdff1aSopenharmony_ci                                           const20, const6, const3);
2126cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp13);
2127cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2128cabdff1aSopenharmony_ci    dst += dst_stride;
2129cabdff1aSopenharmony_ci
2130cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
2131cabdff1aSopenharmony_ci                                           inp14, inp15, inp16, inp16,
2132cabdff1aSopenharmony_ci                                           const20, const6, const3);
2133cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp14);
2134cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2135cabdff1aSopenharmony_ci    dst += dst_stride;
2136cabdff1aSopenharmony_ci
2137cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
2138cabdff1aSopenharmony_ci                                           inp15, inp16, inp16, inp15,
2139cabdff1aSopenharmony_ci                                           const20, const6, const3);
2140cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp15);
2141cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2142cabdff1aSopenharmony_ci    dst += dst_stride;
2143cabdff1aSopenharmony_ci
2144cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
2145cabdff1aSopenharmony_ci                                           inp16, inp16, inp15, inp14,
2146cabdff1aSopenharmony_ci                                           const20, const6, const3);
2147cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(res0, inp16);
2148cabdff1aSopenharmony_ci    ST_UB(res0, dst);
2149cabdff1aSopenharmony_ci}
2150cabdff1aSopenharmony_ci
2151cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_aver_src0_8x8_msa(const uint8_t *src,
2152cabdff1aSopenharmony_ci                                                   int32_t src_stride,
2153cabdff1aSopenharmony_ci                                                   uint8_t *dst,
2154cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
2155cabdff1aSopenharmony_ci{
2156cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2157cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
2158cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
2159cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2160cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2161cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2162cabdff1aSopenharmony_ci
2163cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2164cabdff1aSopenharmony_ci    src += (4 * src_stride);
2165cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
2166cabdff1aSopenharmony_ci    src += (2 * src_stride);
2167cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2168cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
2169cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
2170cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
2171cabdff1aSopenharmony_ci                                        const20, const6, const3);
2172cabdff1aSopenharmony_ci
2173cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
2174cabdff1aSopenharmony_ci    src += (2 * src_stride);
2175cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2176cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
2177cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
2178cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
2179cabdff1aSopenharmony_ci                                        const20, const6, const3);
2180cabdff1aSopenharmony_ci
2181cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2182cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
2183cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
2184cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2185cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2186cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2187cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2188cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2189cabdff1aSopenharmony_ci    dst += (4 * dst_stride);
2190cabdff1aSopenharmony_ci
2191cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2192cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2193cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
2194cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
2195cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
2196cabdff1aSopenharmony_ci                                        const20, const6, const3);
2197cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2198cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
2199cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
2200cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
2201cabdff1aSopenharmony_ci                                        const20, const6, const3);
2202cabdff1aSopenharmony_ci
2203cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2204cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
2205cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
2206cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2207cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2208cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2209cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2210cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2211cabdff1aSopenharmony_ci}
2212cabdff1aSopenharmony_ci
2213cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_aver_src0_16x16_msa(const uint8_t *src,
2214cabdff1aSopenharmony_ci                                                     int32_t src_stride,
2215cabdff1aSopenharmony_ci                                                     uint8_t *dst,
2216cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
2217cabdff1aSopenharmony_ci{
2218cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2219cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
2220cabdff1aSopenharmony_ci    v16u8 res0, res1, dst0, dst1;
2221cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2222cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2223cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2224cabdff1aSopenharmony_ci
2225cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2226cabdff1aSopenharmony_ci    src += (5 * src_stride);
2227cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2228cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
2229cabdff1aSopenharmony_ci                                  const20, const6, const3);
2230cabdff1aSopenharmony_ci
2231cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
2232cabdff1aSopenharmony_ci    src += src_stride;
2233cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2234cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
2235cabdff1aSopenharmony_ci                                  const20, const6, const3);
2236cabdff1aSopenharmony_ci
2237cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2238cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
2239cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2240cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2241cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2242cabdff1aSopenharmony_ci
2243cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
2244cabdff1aSopenharmony_ci    src += src_stride;
2245cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2246cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
2247cabdff1aSopenharmony_ci                                  const20, const6, const3);
2248cabdff1aSopenharmony_ci
2249cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
2250cabdff1aSopenharmony_ci    src += src_stride;
2251cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2252cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
2253cabdff1aSopenharmony_ci                                  const20, const6, const3);
2254cabdff1aSopenharmony_ci
2255cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2256cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
2257cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2258cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2259cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2260cabdff1aSopenharmony_ci
2261cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp8, inp9);
2262cabdff1aSopenharmony_ci    src += (2 * src_stride);
2263cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2264cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
2265cabdff1aSopenharmony_ci                                  const20, const6, const3);
2266cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2267cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
2268cabdff1aSopenharmony_ci                                  const20, const6, const3);
2269cabdff1aSopenharmony_ci
2270cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2271cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp4, res1, inp5, res0, res1);
2272cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2273cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2274cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2275cabdff1aSopenharmony_ci
2276cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp10, inp11);
2277cabdff1aSopenharmony_ci    src += (2 * src_stride);
2278cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2279cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
2280cabdff1aSopenharmony_ci                                  const20, const6, const3);
2281cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2282cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
2283cabdff1aSopenharmony_ci                                  const20, const6, const3);
2284cabdff1aSopenharmony_ci
2285cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2286cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp6, res1, inp7, res0, res1);
2287cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2288cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2289cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2290cabdff1aSopenharmony_ci
2291cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp12, inp13);
2292cabdff1aSopenharmony_ci    src += (2 * src_stride);
2293cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2294cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
2295cabdff1aSopenharmony_ci                                  const20, const6, const3);
2296cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2297cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
2298cabdff1aSopenharmony_ci                                  const20, const6, const3);
2299cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2300cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp8, res1, inp9, res0, res1);
2301cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2302cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2303cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2304cabdff1aSopenharmony_ci
2305cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp14, inp15);
2306cabdff1aSopenharmony_ci    src += (2 * src_stride);
2307cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2308cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
2309cabdff1aSopenharmony_ci                                  const20, const6, const3);
2310cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2311cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
2312cabdff1aSopenharmony_ci                                  const20, const6, const3);
2313cabdff1aSopenharmony_ci
2314cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2315cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp10, res1, inp11, res0, res1);
2316cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2317cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2318cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2319cabdff1aSopenharmony_ci
2320cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
2321cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2322cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
2323cabdff1aSopenharmony_ci                                  const20, const6, const3);
2324cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2325cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
2326cabdff1aSopenharmony_ci                                  const20, const6, const3);
2327cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2328cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp12, res1, inp13, res0, res1);
2329cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2330cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2331cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2332cabdff1aSopenharmony_ci
2333cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2334cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
2335cabdff1aSopenharmony_ci                                  const20, const6, const3);
2336cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2337cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
2338cabdff1aSopenharmony_ci                                  const20, const6, const3);
2339cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2340cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp14, res1, inp15, res0, res1);
2341cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2342cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2343cabdff1aSopenharmony_ci}
2344cabdff1aSopenharmony_ci
2345cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_8x8_msa(const uint8_t *src,
2346cabdff1aSopenharmony_ci                                         int32_t src_stride,
2347cabdff1aSopenharmony_ci                                         uint8_t *dst,
2348cabdff1aSopenharmony_ci                                         int32_t dst_stride)
2349cabdff1aSopenharmony_ci{
2350cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2351cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
2352cabdff1aSopenharmony_ci    v16u8 res0, res1;
2353cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2354cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2355cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2356cabdff1aSopenharmony_ci
2357cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2358cabdff1aSopenharmony_ci    src += (4 * src_stride);
2359cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
2360cabdff1aSopenharmony_ci    src += (2 * src_stride);
2361cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2362cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
2363cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
2364cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
2365cabdff1aSopenharmony_ci                                        const20, const6, const3);
2366cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
2367cabdff1aSopenharmony_ci    src += (2 * src_stride);
2368cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2369cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
2370cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
2371cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
2372cabdff1aSopenharmony_ci                                        const20, const6, const3);
2373cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2374cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2375cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2376cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2377cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2378cabdff1aSopenharmony_ci    dst += (4 * dst_stride);
2379cabdff1aSopenharmony_ci
2380cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2381cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2382cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
2383cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
2384cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
2385cabdff1aSopenharmony_ci                                        const20, const6, const3);
2386cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2387cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
2388cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
2389cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
2390cabdff1aSopenharmony_ci                                        const20, const6, const3);
2391cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2392cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2393cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2394cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2395cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2396cabdff1aSopenharmony_ci}
2397cabdff1aSopenharmony_ci
2398cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_16x16_msa(const uint8_t *src,
2399cabdff1aSopenharmony_ci                                           int32_t src_stride,
2400cabdff1aSopenharmony_ci                                           uint8_t *dst,
2401cabdff1aSopenharmony_ci                                           int32_t dst_stride)
2402cabdff1aSopenharmony_ci{
2403cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2404cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
2405cabdff1aSopenharmony_ci    v16u8 res0, res1, dst0, dst1;
2406cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2407cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2408cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2409cabdff1aSopenharmony_ci
2410cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2411cabdff1aSopenharmony_ci    src += (5 * src_stride);
2412cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2413cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
2414cabdff1aSopenharmony_ci                                  const20, const6, const3);
2415cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
2416cabdff1aSopenharmony_ci    src += src_stride;
2417cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2418cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
2419cabdff1aSopenharmony_ci                                  const20, const6, const3);
2420cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2421cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2422cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2423cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2424cabdff1aSopenharmony_ci
2425cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
2426cabdff1aSopenharmony_ci    src += src_stride;
2427cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2428cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
2429cabdff1aSopenharmony_ci                                  const20, const6, const3);
2430cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
2431cabdff1aSopenharmony_ci    src += src_stride;
2432cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2433cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
2434cabdff1aSopenharmony_ci                                  const20, const6, const3);
2435cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2436cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2437cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2438cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2439cabdff1aSopenharmony_ci
2440cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2441cabdff1aSopenharmony_ci    src += src_stride;
2442cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2443cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
2444cabdff1aSopenharmony_ci                                  const20, const6, const3);
2445cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
2446cabdff1aSopenharmony_ci    src += src_stride;
2447cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2448cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
2449cabdff1aSopenharmony_ci                                  const20, const6, const3);
2450cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2451cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2452cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2453cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2454cabdff1aSopenharmony_ci
2455cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
2456cabdff1aSopenharmony_ci    src += src_stride;
2457cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2458cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
2459cabdff1aSopenharmony_ci                                  const20, const6, const3);
2460cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
2461cabdff1aSopenharmony_ci    src += src_stride;
2462cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2463cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
2464cabdff1aSopenharmony_ci                                  const20, const6, const3);
2465cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2466cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2467cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2468cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2469cabdff1aSopenharmony_ci
2470cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
2471cabdff1aSopenharmony_ci    src += src_stride;
2472cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2473cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
2474cabdff1aSopenharmony_ci                                  const20, const6, const3);
2475cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
2476cabdff1aSopenharmony_ci    src += src_stride;
2477cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2478cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
2479cabdff1aSopenharmony_ci                                  const20, const6, const3);
2480cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2481cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2482cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2483cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2484cabdff1aSopenharmony_ci
2485cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
2486cabdff1aSopenharmony_ci    src += src_stride;
2487cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2488cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
2489cabdff1aSopenharmony_ci                                  const20, const6, const3);
2490cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
2491cabdff1aSopenharmony_ci    src += src_stride;
2492cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2493cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
2494cabdff1aSopenharmony_ci                                  const20, const6, const3);
2495cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2496cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2497cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2498cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2499cabdff1aSopenharmony_ci
2500cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
2501cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2502cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
2503cabdff1aSopenharmony_ci                                  const20, const6, const3);
2504cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2505cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
2506cabdff1aSopenharmony_ci                                  const20, const6, const3);
2507cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2508cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2509cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2510cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2511cabdff1aSopenharmony_ci
2512cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2513cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
2514cabdff1aSopenharmony_ci                                  const20, const6, const3);
2515cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2516cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
2517cabdff1aSopenharmony_ci                                  const20, const6, const3);
2518cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2519cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2520cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2521cabdff1aSopenharmony_ci}
2522cabdff1aSopenharmony_ci
2523cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_aver_src1_8x8_msa(const uint8_t *src,
2524cabdff1aSopenharmony_ci                                                   int32_t src_stride,
2525cabdff1aSopenharmony_ci                                                   uint8_t *dst,
2526cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
2527cabdff1aSopenharmony_ci{
2528cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2529cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
2530cabdff1aSopenharmony_ci    v16u8 tmp0, tmp1, res0, res1;
2531cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2532cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2533cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2534cabdff1aSopenharmony_ci
2535cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2536cabdff1aSopenharmony_ci    src += (4 * src_stride);
2537cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp4, inp5);
2538cabdff1aSopenharmony_ci    src += (2 * src_stride);
2539cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2540cabdff1aSopenharmony_ci                                        inp1, inp2, inp3, inp4,
2541cabdff1aSopenharmony_ci                                        inp1, inp0, inp0, inp1,
2542cabdff1aSopenharmony_ci                                        inp2, inp3, inp4, inp5,
2543cabdff1aSopenharmony_ci                                        const20, const6, const3);
2544cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp6, inp7);
2545cabdff1aSopenharmony_ci    src += (2 * src_stride);
2546cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2547cabdff1aSopenharmony_ci                                        inp3, inp4, inp5, inp6,
2548cabdff1aSopenharmony_ci                                        inp3, inp2, inp1, inp0,
2549cabdff1aSopenharmony_ci                                        inp4, inp5, inp6, inp7,
2550cabdff1aSopenharmony_ci                                        const20, const6, const3);
2551cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2552cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
2553cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
2554cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2555cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2556cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2557cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2558cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2559cabdff1aSopenharmony_ci    dst += (4 * dst_stride);
2560cabdff1aSopenharmony_ci
2561cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2562cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2563cabdff1aSopenharmony_ci                                        inp5, inp6, inp7, inp8,
2564cabdff1aSopenharmony_ci                                        inp5, inp4, inp3, inp2,
2565cabdff1aSopenharmony_ci                                        inp6, inp7, inp8, inp8,
2566cabdff1aSopenharmony_ci                                        const20, const6, const3);
2567cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2568cabdff1aSopenharmony_ci                                        inp7, inp8, inp8, inp7,
2569cabdff1aSopenharmony_ci                                        inp7, inp6, inp5, inp4,
2570cabdff1aSopenharmony_ci                                        inp8, inp8, inp7, inp6,
2571cabdff1aSopenharmony_ci                                        const20, const6, const3);
2572cabdff1aSopenharmony_ci    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
2573cabdff1aSopenharmony_ci    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
2574cabdff1aSopenharmony_ci    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
2575cabdff1aSopenharmony_ci    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
2576cabdff1aSopenharmony_ci    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
2577cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2578cabdff1aSopenharmony_ci    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2579cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2580cabdff1aSopenharmony_ci}
2581cabdff1aSopenharmony_ci
2582cabdff1aSopenharmony_cistatic void vert_mc_qpel_avg_dst_aver_src1_16x16_msa(const uint8_t *src,
2583cabdff1aSopenharmony_ci                                                     int32_t src_stride,
2584cabdff1aSopenharmony_ci                                                     uint8_t *dst,
2585cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
2586cabdff1aSopenharmony_ci{
2587cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2588cabdff1aSopenharmony_ci    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
2589cabdff1aSopenharmony_ci    v16u8 res0, res1, dst0, dst1;
2590cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2591cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2592cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2593cabdff1aSopenharmony_ci
2594cabdff1aSopenharmony_ci    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2595cabdff1aSopenharmony_ci    src += (5 * src_stride);
2596cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2597cabdff1aSopenharmony_ci                                  inp1, inp2, inp3, inp4,
2598cabdff1aSopenharmony_ci                                  const20, const6, const3);
2599cabdff1aSopenharmony_ci    inp5 = LD_UB(src);
2600cabdff1aSopenharmony_ci    src += src_stride;
2601cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2602cabdff1aSopenharmony_ci                                  inp2, inp3, inp4, inp5,
2603cabdff1aSopenharmony_ci                                  const20, const6, const3);
2604cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2605cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
2606cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2607cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2608cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2609cabdff1aSopenharmony_ci
2610cabdff1aSopenharmony_ci    inp6 = LD_UB(src);
2611cabdff1aSopenharmony_ci    src += src_stride;
2612cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2613cabdff1aSopenharmony_ci                                  inp3, inp4, inp5, inp6,
2614cabdff1aSopenharmony_ci                                  const20, const6, const3);
2615cabdff1aSopenharmony_ci    inp7 = LD_UB(src);
2616cabdff1aSopenharmony_ci    src += src_stride;
2617cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2618cabdff1aSopenharmony_ci                                  inp4, inp5, inp6, inp7,
2619cabdff1aSopenharmony_ci                                  const20, const6, const3);
2620cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2621cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp3, res1, inp4, res0, res1);
2622cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2623cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2624cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2625cabdff1aSopenharmony_ci
2626cabdff1aSopenharmony_ci    inp8 = LD_UB(src);
2627cabdff1aSopenharmony_ci    src += src_stride;
2628cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2629cabdff1aSopenharmony_ci                                  inp5, inp6, inp7, inp8,
2630cabdff1aSopenharmony_ci                                  const20, const6, const3);
2631cabdff1aSopenharmony_ci    inp9 = LD_UB(src);
2632cabdff1aSopenharmony_ci    src += src_stride;
2633cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2634cabdff1aSopenharmony_ci                                  inp6, inp7, inp8, inp9,
2635cabdff1aSopenharmony_ci                                  const20, const6, const3);
2636cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2637cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp5, res1, inp6, res0, res1);
2638cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2639cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2640cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2641cabdff1aSopenharmony_ci
2642cabdff1aSopenharmony_ci    inp10 = LD_UB(src);
2643cabdff1aSopenharmony_ci    src += src_stride;
2644cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2645cabdff1aSopenharmony_ci                                  inp7, inp8, inp9, inp10,
2646cabdff1aSopenharmony_ci                                  const20, const6, const3);
2647cabdff1aSopenharmony_ci    inp11 = LD_UB(src);
2648cabdff1aSopenharmony_ci    src += src_stride;
2649cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2650cabdff1aSopenharmony_ci                                  inp8, inp9, inp10, inp11,
2651cabdff1aSopenharmony_ci                                  const20, const6, const3);
2652cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2653cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp7, res1, inp8, res0, res1);
2654cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2655cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2656cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2657cabdff1aSopenharmony_ci
2658cabdff1aSopenharmony_ci    inp12 = LD_UB(src);
2659cabdff1aSopenharmony_ci    src += src_stride;
2660cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2661cabdff1aSopenharmony_ci                                  inp9, inp10, inp11, inp12,
2662cabdff1aSopenharmony_ci                                  const20, const6, const3);
2663cabdff1aSopenharmony_ci    inp13 = LD_UB(src);
2664cabdff1aSopenharmony_ci    src += src_stride;
2665cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2666cabdff1aSopenharmony_ci                                  inp10, inp11, inp12, inp13,
2667cabdff1aSopenharmony_ci                                  const20, const6, const3);
2668cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2669cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp9, res1, inp10, res0, res1);
2670cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2671cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2672cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2673cabdff1aSopenharmony_ci
2674cabdff1aSopenharmony_ci    inp14 = LD_UB(src);
2675cabdff1aSopenharmony_ci    src += src_stride;
2676cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2677cabdff1aSopenharmony_ci                                  inp11, inp12, inp13, inp14,
2678cabdff1aSopenharmony_ci                                  const20, const6, const3);
2679cabdff1aSopenharmony_ci    inp15 = LD_UB(src);
2680cabdff1aSopenharmony_ci    src += src_stride;
2681cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2682cabdff1aSopenharmony_ci                                  inp12, inp13, inp14, inp15,
2683cabdff1aSopenharmony_ci                                  const20, const6, const3);
2684cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2685cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp11, res1, inp12, res0, res1);
2686cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2687cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2688cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2689cabdff1aSopenharmony_ci
2690cabdff1aSopenharmony_ci    inp16 = LD_UB(src);
2691cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2692cabdff1aSopenharmony_ci                                  inp13, inp14, inp15, inp16,
2693cabdff1aSopenharmony_ci                                  const20, const6, const3);
2694cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2695cabdff1aSopenharmony_ci                                  inp14, inp15, inp16, inp16,
2696cabdff1aSopenharmony_ci                                  const20, const6, const3);
2697cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2698cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp13, res1, inp14, res0, res1);
2699cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2700cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2701cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2702cabdff1aSopenharmony_ci
2703cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2704cabdff1aSopenharmony_ci                                  inp15, inp16, inp16, inp15,
2705cabdff1aSopenharmony_ci                                  const20, const6, const3);
2706cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2707cabdff1aSopenharmony_ci                                  inp16, inp16, inp15, inp14,
2708cabdff1aSopenharmony_ci                                  const20, const6, const3);
2709cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
2710cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, inp15, res1, inp16, res0, res1);
2711cabdff1aSopenharmony_ci    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2712cabdff1aSopenharmony_ci    ST_UB2(res0, res1, dst, dst_stride);
2713cabdff1aSopenharmony_ci}
2714cabdff1aSopenharmony_ci
2715cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(const uint8_t *src,
2716cabdff1aSopenharmony_ci                                                   int32_t src_stride,
2717cabdff1aSopenharmony_ci                                                   uint8_t *dst,
2718cabdff1aSopenharmony_ci                                                   int32_t dst_stride,
2719cabdff1aSopenharmony_ci                                                   int32_t height)
2720cabdff1aSopenharmony_ci{
2721cabdff1aSopenharmony_ci    uint8_t loop_count;
2722cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2723cabdff1aSopenharmony_ci    v16u8 res;
2724cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
2725cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2726cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2727cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
2728cabdff1aSopenharmony_ci
2729cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
2730cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2731cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2732cabdff1aSopenharmony_ci        src += (4 * src_stride);
2733cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2734cabdff1aSopenharmony_ci                                               const20, const6, const3);
2735cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp0, res);
2736cabdff1aSopenharmony_ci        ST_UB(res, dst);
2737cabdff1aSopenharmony_ci        dst += dst_stride;
2738cabdff1aSopenharmony_ci
2739cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2740cabdff1aSopenharmony_ci                                               const20, const6, const3);
2741cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp2, res);
2742cabdff1aSopenharmony_ci        ST_UB(res, dst);
2743cabdff1aSopenharmony_ci        dst += dst_stride;
2744cabdff1aSopenharmony_ci
2745cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
2746cabdff1aSopenharmony_ci                                               const20, const6, const3);
2747cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp4, res);
2748cabdff1aSopenharmony_ci        ST_UB(res, dst);
2749cabdff1aSopenharmony_ci        dst += dst_stride;
2750cabdff1aSopenharmony_ci
2751cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
2752cabdff1aSopenharmony_ci                                               const20, const6, const3);
2753cabdff1aSopenharmony_ci        res = __msa_ave_u_b(inp6, res);
2754cabdff1aSopenharmony_ci        ST_UB(res, dst);
2755cabdff1aSopenharmony_ci        dst += dst_stride;
2756cabdff1aSopenharmony_ci    }
2757cabdff1aSopenharmony_ci
2758cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
2759cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2760cabdff1aSopenharmony_ci                                           const20, const6, const3);
2761cabdff1aSopenharmony_ci    res = __msa_ave_u_b(inp0, res);
2762cabdff1aSopenharmony_ci    ST_UB(res, dst);
2763cabdff1aSopenharmony_ci}
2764cabdff1aSopenharmony_ci
2765cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(const uint8_t *src,
2766cabdff1aSopenharmony_ci                                                      int32_t src_stride,
2767cabdff1aSopenharmony_ci                                                      uint8_t *dst,
2768cabdff1aSopenharmony_ci                                                      int32_t dst_stride)
2769cabdff1aSopenharmony_ci{
2770cabdff1aSopenharmony_ci    uint8_t buff[272];
2771cabdff1aSopenharmony_ci
2772cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
2773cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
2774cabdff1aSopenharmony_ci}
2775cabdff1aSopenharmony_ci
2776cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(const uint8_t *src,
2777cabdff1aSopenharmony_ci                                                    int32_t src_stride,
2778cabdff1aSopenharmony_ci                                                    uint8_t *dst,
2779cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
2780cabdff1aSopenharmony_ci{
2781cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
2782cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
2783cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
2784cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
2785cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
2786cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
2787cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
2788cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
2789cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2790cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2791cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2792cabdff1aSopenharmony_ci
2793cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
2794cabdff1aSopenharmony_ci    src += (2 * src_stride);
2795cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2796cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
2797cabdff1aSopenharmony_ci                                                  const6, const3);
2798cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2799cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
2800cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
2801cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
2802cabdff1aSopenharmony_ci    src += (2 * src_stride);
2803cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2804cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
2805cabdff1aSopenharmony_ci                                                  const6, const3);
2806cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2807cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
2808cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
2809cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
2810cabdff1aSopenharmony_ci    src += (2 * src_stride);
2811cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2812cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
2813cabdff1aSopenharmony_ci                                                  const6, const3);
2814cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2815cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
2816cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
2817cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
2818cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
2819cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
2820cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
2821cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2822cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
2823cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
2824cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
2825cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2826cabdff1aSopenharmony_ci
2827cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
2828cabdff1aSopenharmony_ci    src += (2 * src_stride);
2829cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2830cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
2831cabdff1aSopenharmony_ci                                                  const6, const3);
2832cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2833cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
2834cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
2835cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
2836cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
2837cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
2838cabdff1aSopenharmony_ci                                                       const6, const3);
2839cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
2840cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
2841cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
2842cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
2843cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
2844cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2845cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
2846cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
2847cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
2848cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
2849cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
2850cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
2851cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2852cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
2853cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
2854cabdff1aSopenharmony_ci
2855cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
2856cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
2857cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
2858cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
2859cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
2860cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
2861cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2862cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
2863cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
2864cabdff1aSopenharmony_ci
2865cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
2866cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
2867cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
2868cabdff1aSopenharmony_ci}
2869cabdff1aSopenharmony_ci
2870cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_horiz_16x16_msa(const uint8_t *src,
2871cabdff1aSopenharmony_ci                                              int32_t src_stride,
2872cabdff1aSopenharmony_ci                                              uint8_t *dst,
2873cabdff1aSopenharmony_ci                                              int32_t dst_stride,
2874cabdff1aSopenharmony_ci                                              int32_t height)
2875cabdff1aSopenharmony_ci{
2876cabdff1aSopenharmony_ci    uint8_t loop_count;
2877cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2878cabdff1aSopenharmony_ci    v16u8 res;
2879cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
2880cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2881cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2882cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
2883cabdff1aSopenharmony_ci
2884cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
2885cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2886cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2887cabdff1aSopenharmony_ci        src += (4 * src_stride);
2888cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2889cabdff1aSopenharmony_ci                                               const20, const6, const3);
2890cabdff1aSopenharmony_ci        ST_UB(res, dst);
2891cabdff1aSopenharmony_ci        dst += dst_stride;
2892cabdff1aSopenharmony_ci
2893cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2894cabdff1aSopenharmony_ci                                               const20, const6, const3);
2895cabdff1aSopenharmony_ci        ST_UB(res, dst);
2896cabdff1aSopenharmony_ci        dst += dst_stride;
2897cabdff1aSopenharmony_ci
2898cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
2899cabdff1aSopenharmony_ci                                               const20, const6, const3);
2900cabdff1aSopenharmony_ci        ST_UB(res, dst);
2901cabdff1aSopenharmony_ci        dst += dst_stride;
2902cabdff1aSopenharmony_ci
2903cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
2904cabdff1aSopenharmony_ci                                               const20, const6, const3);
2905cabdff1aSopenharmony_ci        ST_UB(res, dst);
2906cabdff1aSopenharmony_ci        dst += dst_stride;
2907cabdff1aSopenharmony_ci    }
2908cabdff1aSopenharmony_ci
2909cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
2910cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2911cabdff1aSopenharmony_ci                                           const20, const6, const3);
2912cabdff1aSopenharmony_ci    ST_UB(res, dst);
2913cabdff1aSopenharmony_ci}
2914cabdff1aSopenharmony_ci
2915cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(const uint8_t *src,
2916cabdff1aSopenharmony_ci                                                    int32_t src_stride,
2917cabdff1aSopenharmony_ci                                                    uint8_t *dst,
2918cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
2919cabdff1aSopenharmony_ci{
2920cabdff1aSopenharmony_ci    uint8_t buff[272];
2921cabdff1aSopenharmony_ci
2922cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
2923cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
2924cabdff1aSopenharmony_ci}
2925cabdff1aSopenharmony_ci
2926cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(const uint8_t *src,
2927cabdff1aSopenharmony_ci                                                  int32_t src_stride,
2928cabdff1aSopenharmony_ci                                                  uint8_t *dst,
2929cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
2930cabdff1aSopenharmony_ci{
2931cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
2932cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
2933cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
2934cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
2935cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
2936cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
2937cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
2938cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
2939cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
2940cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
2941cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
2942cabdff1aSopenharmony_ci
2943cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
2944cabdff1aSopenharmony_ci    src += (2 * src_stride);
2945cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2946cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
2947cabdff1aSopenharmony_ci                                                    const6, const3);
2948cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
2949cabdff1aSopenharmony_ci
2950cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
2951cabdff1aSopenharmony_ci    src += (2 * src_stride);
2952cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2953cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
2954cabdff1aSopenharmony_ci                                                    const6, const3);
2955cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
2956cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
2957cabdff1aSopenharmony_ci    src += (2 * src_stride);
2958cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2959cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
2960cabdff1aSopenharmony_ci                                                    const6, const3);
2961cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
2962cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
2963cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
2964cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
2965cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
2966cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2967cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
2968cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
2969cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
2970cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2971cabdff1aSopenharmony_ci
2972cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
2973cabdff1aSopenharmony_ci    src += (2 * src_stride);
2974cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2975cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
2976cabdff1aSopenharmony_ci                                                    const6, const3);
2977cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
2978cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
2979cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
2980cabdff1aSopenharmony_ci                                                         mask2, mask3, const20,
2981cabdff1aSopenharmony_ci                                                         const6, const3);
2982cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
2983cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
2984cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
2985cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
2986cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2987cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
2988cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
2989cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
2990cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
2991cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
2992cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
2993cabdff1aSopenharmony_ci
2994cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
2995cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
2996cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
2997cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
2998cabdff1aSopenharmony_ci                                                 const20, const6, const3);
2999cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
3000cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3001cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3002cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3003cabdff1aSopenharmony_ci
3004cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3005cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3006cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3007cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3008cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3009cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
3010cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3011cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3012cabdff1aSopenharmony_ci}
3013cabdff1aSopenharmony_ci
3014cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(const uint8_t *src,
3015cabdff1aSopenharmony_ci                                                   int32_t src_stride,
3016cabdff1aSopenharmony_ci                                                   uint8_t *dst,
3017cabdff1aSopenharmony_ci                                                   int32_t dst_stride,
3018cabdff1aSopenharmony_ci                                                   int32_t height)
3019cabdff1aSopenharmony_ci{
3020cabdff1aSopenharmony_ci    uint8_t loop_count;
3021cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3022cabdff1aSopenharmony_ci    v16u8 res;
3023cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
3024cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3025cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3026cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
3027cabdff1aSopenharmony_ci
3028cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
3029cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3030cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3031cabdff1aSopenharmony_ci        src += (4 * src_stride);
3032cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3033cabdff1aSopenharmony_ci                                               const20, const6, const3);
3034cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp1);
3035cabdff1aSopenharmony_ci        ST_UB(res, dst);
3036cabdff1aSopenharmony_ci        dst += dst_stride;
3037cabdff1aSopenharmony_ci
3038cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
3039cabdff1aSopenharmony_ci                                               const20, const6, const3);
3040cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp3);
3041cabdff1aSopenharmony_ci        ST_UB(res, dst);
3042cabdff1aSopenharmony_ci        dst += dst_stride;
3043cabdff1aSopenharmony_ci
3044cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
3045cabdff1aSopenharmony_ci                                               const20, const6, const3);
3046cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp5);
3047cabdff1aSopenharmony_ci        ST_UB(res, dst);
3048cabdff1aSopenharmony_ci        dst += dst_stride;
3049cabdff1aSopenharmony_ci
3050cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
3051cabdff1aSopenharmony_ci                                               const20, const6, const3);
3052cabdff1aSopenharmony_ci        res = __msa_ave_u_b(res, inp7);
3053cabdff1aSopenharmony_ci        ST_UB(res, dst);
3054cabdff1aSopenharmony_ci        dst += dst_stride;
3055cabdff1aSopenharmony_ci    }
3056cabdff1aSopenharmony_ci
3057cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
3058cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3059cabdff1aSopenharmony_ci                                           const20, const6, const3);
3060cabdff1aSopenharmony_ci    res = __msa_ave_u_b(inp1, res);
3061cabdff1aSopenharmony_ci    ST_UB(res, dst);
3062cabdff1aSopenharmony_ci}
3063cabdff1aSopenharmony_ci
3064cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(const uint8_t *src,
3065cabdff1aSopenharmony_ci                                                      int32_t src_stride,
3066cabdff1aSopenharmony_ci                                                      uint8_t *dst,
3067cabdff1aSopenharmony_ci                                                      int32_t dst_stride)
3068cabdff1aSopenharmony_ci{
3069cabdff1aSopenharmony_ci    uint8_t buff[272];
3070cabdff1aSopenharmony_ci
3071cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
3072cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
3073cabdff1aSopenharmony_ci}
3074cabdff1aSopenharmony_ci
3075cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(const uint8_t *src,
3076cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3077cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3078cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3079cabdff1aSopenharmony_ci{
3080cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3081cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3082cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3083cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3084cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3085cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3086cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3087cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3088cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3089cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3090cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3091cabdff1aSopenharmony_ci
3092cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3093cabdff1aSopenharmony_ci    src += (2 * src_stride);
3094cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3095cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3096cabdff1aSopenharmony_ci                                                  const6, const3);
3097cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3098cabdff1aSopenharmony_ci
3099cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3100cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
3101cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3102cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3103cabdff1aSopenharmony_ci    src += (2 * src_stride);
3104cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3105cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3106cabdff1aSopenharmony_ci                                                  const6, const3);
3107cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3108cabdff1aSopenharmony_ci
3109cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3110cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
3111cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3112cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3113cabdff1aSopenharmony_ci    src += (2 * src_stride);
3114cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3115cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3116cabdff1aSopenharmony_ci                                                  const6, const3);
3117cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3118cabdff1aSopenharmony_ci
3119cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3120cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
3121cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3122cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3123cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3124cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3125cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3126cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3127cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
3128cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3129cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3130cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3131cabdff1aSopenharmony_ci
3132cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3133cabdff1aSopenharmony_ci    src += (2 * src_stride);
3134cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3135cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3136cabdff1aSopenharmony_ci                                                  const6, const3);
3137cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3138cabdff1aSopenharmony_ci
3139cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3140cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
3141cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3142cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3143cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3144cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
3145cabdff1aSopenharmony_ci                                                       const6, const3);
3146cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3147cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
3148cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3149cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3150cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3151cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3152cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3153cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
3154cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3155cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3156cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3157cabdff1aSopenharmony_ci
3158cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3159cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3160cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3161cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3162cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3163cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
3164cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3165cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3166cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3167cabdff1aSopenharmony_ci
3168cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3169cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3170cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3171cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3172cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3173cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
3174cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3175cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3176cabdff1aSopenharmony_ci}
3177cabdff1aSopenharmony_ci
3178cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(const uint8_t *src,
3179cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3180cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3181cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3182cabdff1aSopenharmony_ci{
3183cabdff1aSopenharmony_ci    uint8_t buff[272];
3184cabdff1aSopenharmony_ci
3185cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
3186cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
3187cabdff1aSopenharmony_ci}
3188cabdff1aSopenharmony_ci
3189cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(const uint8_t *src,
3190cabdff1aSopenharmony_ci                                                  int32_t src_stride,
3191cabdff1aSopenharmony_ci                                                  uint8_t *dst,
3192cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
3193cabdff1aSopenharmony_ci{
3194cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3195cabdff1aSopenharmony_ci    v16u8 res0, res1;
3196cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3197cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3198cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3199cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3200cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3201cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3202cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3203cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3204cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3205cabdff1aSopenharmony_ci
3206cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3207cabdff1aSopenharmony_ci    src += (2 * src_stride);
3208cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3209cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3210cabdff1aSopenharmony_ci                                                  const6, const3);
3211cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3212cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
3213cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3214cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3215cabdff1aSopenharmony_ci    src += (2 * src_stride);
3216cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3217cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3218cabdff1aSopenharmony_ci                                                  const6, const3);
3219cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3220cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
3221cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3222cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3223cabdff1aSopenharmony_ci    src += (2 * src_stride);
3224cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3225cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3226cabdff1aSopenharmony_ci                                                  const6, const3);
3227cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3228cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
3229cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3230cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3231cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3232cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3233cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3234cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3235cabdff1aSopenharmony_ci
3236cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3237cabdff1aSopenharmony_ci    src += (2 * src_stride);
3238cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3239cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3240cabdff1aSopenharmony_ci
3241cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3242cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3243cabdff1aSopenharmony_ci                                                  const6, const3);
3244cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3245cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
3246cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3247cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3248cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3249cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
3250cabdff1aSopenharmony_ci                                                       const6, const3);
3251cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
3252cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3253cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3254cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3255cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3256cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3257cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3258cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3259cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3260cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3261cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3262cabdff1aSopenharmony_ci    ST_D4(res1, res0, 0, 1, 0, 1, dst, dst_stride);
3263cabdff1aSopenharmony_ci    dst += (4 * dst_stride);
3264cabdff1aSopenharmony_ci
3265cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3266cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3267cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3268cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3269cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3270cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3271cabdff1aSopenharmony_ci}
3272cabdff1aSopenharmony_ci
3273cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_16x16_msa(const uint8_t *src,
3274cabdff1aSopenharmony_ci                                        int32_t src_stride,
3275cabdff1aSopenharmony_ci                                        uint8_t *dst,
3276cabdff1aSopenharmony_ci                                        int32_t dst_stride)
3277cabdff1aSopenharmony_ci{
3278cabdff1aSopenharmony_ci    uint8_t buff[272];
3279cabdff1aSopenharmony_ci
3280cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
3281cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
3282cabdff1aSopenharmony_ci}
3283cabdff1aSopenharmony_ci
3284cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_8x8_msa(const uint8_t *src,
3285cabdff1aSopenharmony_ci                                      int32_t src_stride,
3286cabdff1aSopenharmony_ci                                      uint8_t *dst,
3287cabdff1aSopenharmony_ci                                      int32_t dst_stride)
3288cabdff1aSopenharmony_ci{
3289cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3290cabdff1aSopenharmony_ci    v16u8 res0, res1;
3291cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3292cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3293cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3294cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3295cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3296cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3297cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3298cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3299cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3300cabdff1aSopenharmony_ci
3301cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3302cabdff1aSopenharmony_ci    src += (2 * src_stride);
3303cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3304cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3305cabdff1aSopenharmony_ci                                                    const6, const3);
3306cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3307cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3308cabdff1aSopenharmony_ci    src += (2 * src_stride);
3309cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3310cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3311cabdff1aSopenharmony_ci                                                    const6, const3);
3312cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3313cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3314cabdff1aSopenharmony_ci    src += (2 * src_stride);
3315cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3316cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3317cabdff1aSopenharmony_ci                                                    const6, const3);
3318cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3319cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3320cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3321cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3322cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3323cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3324cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3325cabdff1aSopenharmony_ci    src += (2 * src_stride);
3326cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3327cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3328cabdff1aSopenharmony_ci
3329cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3330cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3331cabdff1aSopenharmony_ci                                                    const6, const3);
3332cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3333cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3334cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3335cabdff1aSopenharmony_ci                                                         mask2, mask3, const20,
3336cabdff1aSopenharmony_ci                                                         const6, const3);
3337cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3338cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3339cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3340cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3341cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3342cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3343cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3344cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3345cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3346cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3347cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3348cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3349cabdff1aSopenharmony_ci
3350cabdff1aSopenharmony_ci
3351cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3352cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3353cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3354cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3355cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3356cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3357cabdff1aSopenharmony_ci}
3358cabdff1aSopenharmony_ci
3359cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(const uint8_t *src,
3360cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3361cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3362cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3363cabdff1aSopenharmony_ci{
3364cabdff1aSopenharmony_ci    uint8_t buff[272];
3365cabdff1aSopenharmony_ci
3366cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
3367cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
3368cabdff1aSopenharmony_ci}
3369cabdff1aSopenharmony_ci
3370cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(const uint8_t *src,
3371cabdff1aSopenharmony_ci                                                  int32_t src_stride,
3372cabdff1aSopenharmony_ci                                                  uint8_t *dst,
3373cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
3374cabdff1aSopenharmony_ci{
3375cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3376cabdff1aSopenharmony_ci    v16u8 res0, res1;
3377cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3378cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3379cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3380cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3381cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3382cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3383cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3384cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3385cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3386cabdff1aSopenharmony_ci
3387cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3388cabdff1aSopenharmony_ci    src += (2 * src_stride);
3389cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3390cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3391cabdff1aSopenharmony_ci                                                  const6, const3);
3392cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3393cabdff1aSopenharmony_ci
3394cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3395cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
3396cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3397cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3398cabdff1aSopenharmony_ci    src += (2 * src_stride);
3399cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3400cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3401cabdff1aSopenharmony_ci                                                  const6, const3);
3402cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3403cabdff1aSopenharmony_ci
3404cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3405cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
3406cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3407cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3408cabdff1aSopenharmony_ci    src += (2 * src_stride);
3409cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3410cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3411cabdff1aSopenharmony_ci                                                  const6, const3);
3412cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3413cabdff1aSopenharmony_ci
3414cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3415cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
3416cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3417cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3418cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3419cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3420cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3421cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3422cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3423cabdff1aSopenharmony_ci    src += (2 * src_stride);
3424cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3425cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3426cabdff1aSopenharmony_ci
3427cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3428cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3429cabdff1aSopenharmony_ci                                                  const6, const3);
3430cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3431cabdff1aSopenharmony_ci
3432cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3433cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
3434cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3435cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3436cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3437cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
3438cabdff1aSopenharmony_ci                                                       const6, const3);
3439cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3440cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
3441cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3442cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3443cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3444cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3445cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3446cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3447cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3448cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3449cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3450cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3451cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3452cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3453cabdff1aSopenharmony_ci
3454cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3455cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3456cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3457cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3458cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3459cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3460cabdff1aSopenharmony_ci}
3461cabdff1aSopenharmony_ci
3462cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(const uint8_t *src,
3463cabdff1aSopenharmony_ci                                                      int32_t src_stride,
3464cabdff1aSopenharmony_ci                                                      uint8_t *dst,
3465cabdff1aSopenharmony_ci                                                      int32_t dst_stride)
3466cabdff1aSopenharmony_ci{
3467cabdff1aSopenharmony_ci    uint8_t buff[272];
3468cabdff1aSopenharmony_ci
3469cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
3470cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
3471cabdff1aSopenharmony_ci}
3472cabdff1aSopenharmony_ci
3473cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(const uint8_t *src,
3474cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3475cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3476cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3477cabdff1aSopenharmony_ci{
3478cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3479cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3480cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3481cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3482cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3483cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3484cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3485cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3486cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3487cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3488cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3489cabdff1aSopenharmony_ci
3490cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3491cabdff1aSopenharmony_ci    src += (2 * src_stride);
3492cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3493cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3494cabdff1aSopenharmony_ci                                                  const6, const3);
3495cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3496cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
3497cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3498cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3499cabdff1aSopenharmony_ci    src += (2 * src_stride);
3500cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3501cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3502cabdff1aSopenharmony_ci                                                  const6, const3);
3503cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3504cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
3505cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3506cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3507cabdff1aSopenharmony_ci    src += (2 * src_stride);
3508cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3509cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3510cabdff1aSopenharmony_ci                                                  const6, const3);
3511cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3512cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
3513cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3514cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3515cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3516cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3517cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3518cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3519cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
3520cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3521cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3522cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3523cabdff1aSopenharmony_ci
3524cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3525cabdff1aSopenharmony_ci    src += (2 * src_stride);
3526cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3527cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3528cabdff1aSopenharmony_ci                                                  const6, const3);
3529cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3530cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
3531cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3532cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3533cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3534cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
3535cabdff1aSopenharmony_ci                                                       const6, const3);
3536cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
3537cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3538cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3539cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3540cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3541cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3542cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
3543cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3544cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3545cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3546cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3547cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3548cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3549cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3550cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3551cabdff1aSopenharmony_ci
3552cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
3553cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3554cabdff1aSopenharmony_ci
3555cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3556cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3557cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3558cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3559cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3560cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3561cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3562cabdff1aSopenharmony_ci
3563cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
3564cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3565cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3566cabdff1aSopenharmony_ci}
3567cabdff1aSopenharmony_ci
3568cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(const uint8_t *src,
3569cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3570cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3571cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3572cabdff1aSopenharmony_ci{
3573cabdff1aSopenharmony_ci    uint8_t buff[272];
3574cabdff1aSopenharmony_ci
3575cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
3576cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
3577cabdff1aSopenharmony_ci}
3578cabdff1aSopenharmony_ci
3579cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(const uint8_t *src,
3580cabdff1aSopenharmony_ci                                                  int32_t src_stride,
3581cabdff1aSopenharmony_ci                                                  uint8_t *dst,
3582cabdff1aSopenharmony_ci                                                  int32_t dst_stride)
3583cabdff1aSopenharmony_ci{
3584cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3585cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3586cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3587cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3588cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3589cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3590cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3591cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3592cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3593cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3594cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3595cabdff1aSopenharmony_ci
3596cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3597cabdff1aSopenharmony_ci    src += (2 * src_stride);
3598cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3599cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3600cabdff1aSopenharmony_ci                                                    const6, const3);
3601cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3602cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3603cabdff1aSopenharmony_ci    src += (2 * src_stride);
3604cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3605cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3606cabdff1aSopenharmony_ci                                                    const6, const3);
3607cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3608cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3609cabdff1aSopenharmony_ci    src += (2 * src_stride);
3610cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3611cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3612cabdff1aSopenharmony_ci                                                    const6, const3);
3613cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3614cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3615cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3616cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3617cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3618cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3619cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
3620cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3621cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3622cabdff1aSopenharmony_ci    src += (2 * src_stride);
3623cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3624cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3625cabdff1aSopenharmony_ci
3626cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3627cabdff1aSopenharmony_ci                                                    mask2, mask3, const20,
3628cabdff1aSopenharmony_ci                                                    const6, const3);
3629cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3630cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3631cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3632cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3633cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3634cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3635cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
3636cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3637cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3638cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3639cabdff1aSopenharmony_ci                                                         mask2, mask3, const20,
3640cabdff1aSopenharmony_ci                                                         const6, const3);
3641cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3642cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3643cabdff1aSopenharmony_ci
3644cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3645cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3646cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3647cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3648cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3649cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
3650cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3651cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3652cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3653cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3654cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3655cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3656cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
3657cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3658cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3659cabdff1aSopenharmony_ci}
3660cabdff1aSopenharmony_ci
3661cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(const uint8_t *src,
3662cabdff1aSopenharmony_ci                                                      int32_t src_stride,
3663cabdff1aSopenharmony_ci                                                      uint8_t *dst,
3664cabdff1aSopenharmony_ci                                                      int32_t dst_stride)
3665cabdff1aSopenharmony_ci{
3666cabdff1aSopenharmony_ci    uint8_t buff[272];
3667cabdff1aSopenharmony_ci
3668cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
3669cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
3670cabdff1aSopenharmony_ci}
3671cabdff1aSopenharmony_ci
3672cabdff1aSopenharmony_cistatic void hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(const uint8_t *src,
3673cabdff1aSopenharmony_ci                                                    int32_t src_stride,
3674cabdff1aSopenharmony_ci                                                    uint8_t *dst,
3675cabdff1aSopenharmony_ci                                                    int32_t dst_stride)
3676cabdff1aSopenharmony_ci{
3677cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3678cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3679cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3680cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3681cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3682cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3683cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3684cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3685cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3686cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3687cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3688cabdff1aSopenharmony_ci
3689cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3690cabdff1aSopenharmony_ci    src += (2 * src_stride);
3691cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3692cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3693cabdff1aSopenharmony_ci                                                  const6, const3);
3694cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3695cabdff1aSopenharmony_ci
3696cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3697cabdff1aSopenharmony_ci    horiz0 = __msa_ave_u_b(inp0, res0);
3698cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3699cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3700cabdff1aSopenharmony_ci    src += (2 * src_stride);
3701cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3702cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3703cabdff1aSopenharmony_ci                                                  const6, const3);
3704cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3705cabdff1aSopenharmony_ci
3706cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3707cabdff1aSopenharmony_ci    horiz2 = __msa_ave_u_b(inp2, res1);
3708cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3709cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3710cabdff1aSopenharmony_ci    src += (2 * src_stride);
3711cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3712cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3713cabdff1aSopenharmony_ci                                                  const6, const3);
3714cabdff1aSopenharmony_ci
3715cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3716cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3717cabdff1aSopenharmony_ci    horiz4 = __msa_ave_u_b(inp0, res0);
3718cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3719cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3720cabdff1aSopenharmony_ci                                                 horiz1, horiz2, horiz3, horiz4,
3721cabdff1aSopenharmony_ci                                                 horiz1, horiz0, horiz0, horiz1,
3722cabdff1aSopenharmony_ci                                                 horiz2, horiz3, horiz4, horiz5,
3723cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3724cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
3725cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3726cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3727cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3728cabdff1aSopenharmony_ci
3729cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3730cabdff1aSopenharmony_ci    src += (2 * src_stride);
3731cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3732cabdff1aSopenharmony_ci                                                  mask2, mask3, const20,
3733cabdff1aSopenharmony_ci                                                  const6, const3);
3734cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3735cabdff1aSopenharmony_ci
3736cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3737cabdff1aSopenharmony_ci    horiz6 = __msa_ave_u_b(inp2, res1);
3738cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3739cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3740cabdff1aSopenharmony_ci                                                 horiz3, horiz4, horiz5, horiz6,
3741cabdff1aSopenharmony_ci                                                 horiz3, horiz2, horiz1, horiz0,
3742cabdff1aSopenharmony_ci                                                 horiz4, horiz5, horiz6, horiz7,
3743cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3744cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
3745cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3746cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3747cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3748cabdff1aSopenharmony_ci
3749cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3750cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3751cabdff1aSopenharmony_ci                                                       mask2, mask3, const20,
3752cabdff1aSopenharmony_ci                                                       const6, const3);
3753cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3754cabdff1aSopenharmony_ci    horiz8 = __msa_ave_u_b(inp0, res0);
3755cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3756cabdff1aSopenharmony_ci                                                 horiz5, horiz6, horiz7, horiz8,
3757cabdff1aSopenharmony_ci                                                 horiz5, horiz4, horiz3, horiz2,
3758cabdff1aSopenharmony_ci                                                 horiz6, horiz7, horiz8, horiz8,
3759cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3760cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3761cabdff1aSopenharmony_ci                                                 horiz7, horiz8, horiz8, horiz7,
3762cabdff1aSopenharmony_ci                                                 horiz7, horiz6, horiz5, horiz4,
3763cabdff1aSopenharmony_ci                                                 horiz8, horiz8, horiz7, horiz6,
3764cabdff1aSopenharmony_ci                                                 const20, const6, const3);
3765cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
3766cabdff1aSopenharmony_ci    res0 = __msa_ave_u_b(avg0, res0);
3767cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
3768cabdff1aSopenharmony_ci    res1 = __msa_ave_u_b(avg1, res1);
3769cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3770cabdff1aSopenharmony_ci}
3771cabdff1aSopenharmony_ci
3772cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_horiz_src0_16x16_msa(const uint8_t *src,
3773cabdff1aSopenharmony_ci                                                 int32_t src_stride,
3774cabdff1aSopenharmony_ci                                                 uint8_t *dst,
3775cabdff1aSopenharmony_ci                                                 int32_t dst_stride,
3776cabdff1aSopenharmony_ci                                                 int32_t height)
3777cabdff1aSopenharmony_ci{
3778cabdff1aSopenharmony_ci    uint8_t loop_count;
3779cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3780cabdff1aSopenharmony_ci    v16u8 res;
3781cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
3782cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3783cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3784cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
3785cabdff1aSopenharmony_ci
3786cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
3787cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3788cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3789cabdff1aSopenharmony_ci        src += (4 * src_stride);
3790cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3791cabdff1aSopenharmony_ci                                      const20, const6, const3);
3792cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp0, res);
3793cabdff1aSopenharmony_ci        ST_UB(res, dst);
3794cabdff1aSopenharmony_ci        dst += dst_stride;
3795cabdff1aSopenharmony_ci
3796cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3797cabdff1aSopenharmony_ci                                      const20, const6, const3);
3798cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp2, res);
3799cabdff1aSopenharmony_ci        ST_UB(res, dst);
3800cabdff1aSopenharmony_ci        dst += dst_stride;
3801cabdff1aSopenharmony_ci
3802cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
3803cabdff1aSopenharmony_ci                                      const20, const6, const3);
3804cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp4, res);
3805cabdff1aSopenharmony_ci        ST_UB(res, dst);
3806cabdff1aSopenharmony_ci        dst += dst_stride;
3807cabdff1aSopenharmony_ci
3808cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
3809cabdff1aSopenharmony_ci                                      const20, const6, const3);
3810cabdff1aSopenharmony_ci        res = __msa_aver_u_b(inp6, res);
3811cabdff1aSopenharmony_ci        ST_UB(res, dst);
3812cabdff1aSopenharmony_ci        dst += dst_stride;
3813cabdff1aSopenharmony_ci    }
3814cabdff1aSopenharmony_ci
3815cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
3816cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3817cabdff1aSopenharmony_ci    res = __msa_aver_u_b(inp0, res);
3818cabdff1aSopenharmony_ci    ST_UB(res, dst);
3819cabdff1aSopenharmony_ci}
3820cabdff1aSopenharmony_ci
3821cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src00_16x16_msa(const uint8_t *src,
3822cabdff1aSopenharmony_ci                                               int32_t src_stride,
3823cabdff1aSopenharmony_ci                                               uint8_t *dst,
3824cabdff1aSopenharmony_ci                                               int32_t dst_stride)
3825cabdff1aSopenharmony_ci{
3826cabdff1aSopenharmony_ci    uint8_t buff[272];
3827cabdff1aSopenharmony_ci
3828cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
3829cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
3830cabdff1aSopenharmony_ci}
3831cabdff1aSopenharmony_ci
3832cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src00_8x8_msa(const uint8_t *src,
3833cabdff1aSopenharmony_ci                                             int32_t src_stride,
3834cabdff1aSopenharmony_ci                                             uint8_t *dst,
3835cabdff1aSopenharmony_ci                                             int32_t dst_stride)
3836cabdff1aSopenharmony_ci{
3837cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3838cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3839cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3840cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3841cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3842cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3843cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3844cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3845cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3846cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3847cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3848cabdff1aSopenharmony_ci
3849cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
3850cabdff1aSopenharmony_ci    src += (4 * src_stride);
3851cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3852cabdff1aSopenharmony_ci                                         const20, const6, const3);
3853cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3854cabdff1aSopenharmony_ci                                         const20, const6, const3);
3855cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3856cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
3857cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3858cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3859cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
3860cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
3861cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3862cabdff1aSopenharmony_ci    src += (2 * src_stride);
3863cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3864cabdff1aSopenharmony_ci                                         const20, const6, const3);
3865cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3866cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
3867cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
3868cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3869cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
3870cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
3871cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
3872cabdff1aSopenharmony_ci                                        const20, const6, const3);
3873cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
3874cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
3875cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
3876cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
3877cabdff1aSopenharmony_ci
3878cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3879cabdff1aSopenharmony_ci    src += (2 * src_stride);
3880cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3881cabdff1aSopenharmony_ci                                         const20, const6, const3);
3882cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3883cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
3884cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
3885cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3886cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
3887cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
3888cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
3889cabdff1aSopenharmony_ci                                        const20, const6, const3);
3890cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
3891cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
3892cabdff1aSopenharmony_ci
3893cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
3894cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
3895cabdff1aSopenharmony_ci                                              const20, const6, const3);
3896cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
3897cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
3898cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
3899cabdff1aSopenharmony_ci
3900cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3901cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
3902cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
3903cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
3904cabdff1aSopenharmony_ci                                        const20, const6, const3);
3905cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
3906cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
3907cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3908cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
3909cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
3910cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
3911cabdff1aSopenharmony_ci                                        const20, const6, const3);
3912cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
3913cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
3914cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3915cabdff1aSopenharmony_ci}
3916cabdff1aSopenharmony_ci
3917cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_horiz_16x16_msa(const uint8_t *src,
3918cabdff1aSopenharmony_ci                                            int32_t src_stride,
3919cabdff1aSopenharmony_ci                                            uint8_t *dst,
3920cabdff1aSopenharmony_ci                                            int32_t dst_stride,
3921cabdff1aSopenharmony_ci                                            int32_t height)
3922cabdff1aSopenharmony_ci{
3923cabdff1aSopenharmony_ci    uint8_t loop_count;
3924cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3925cabdff1aSopenharmony_ci    v16u8 res;
3926cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
3927cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3928cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3929cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
3930cabdff1aSopenharmony_ci
3931cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
3932cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3933cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3934cabdff1aSopenharmony_ci        src += (4 * src_stride);
3935cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3936cabdff1aSopenharmony_ci                                      const20, const6, const3);
3937cabdff1aSopenharmony_ci        ST_UB(res, dst);
3938cabdff1aSopenharmony_ci        dst += dst_stride;
3939cabdff1aSopenharmony_ci
3940cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3941cabdff1aSopenharmony_ci                                      const20, const6, const3);
3942cabdff1aSopenharmony_ci        ST_UB(res, dst);
3943cabdff1aSopenharmony_ci        dst += dst_stride;
3944cabdff1aSopenharmony_ci
3945cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
3946cabdff1aSopenharmony_ci                                      const20, const6, const3);
3947cabdff1aSopenharmony_ci        ST_UB(res, dst);
3948cabdff1aSopenharmony_ci        dst += dst_stride;
3949cabdff1aSopenharmony_ci
3950cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
3951cabdff1aSopenharmony_ci                                      const20, const6, const3);
3952cabdff1aSopenharmony_ci        ST_UB(res, dst);
3953cabdff1aSopenharmony_ci        dst += dst_stride;
3954cabdff1aSopenharmony_ci    }
3955cabdff1aSopenharmony_ci
3956cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
3957cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3958cabdff1aSopenharmony_ci    ST_UB(res, dst);
3959cabdff1aSopenharmony_ci}
3960cabdff1aSopenharmony_ci
3961cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_v_src0_16x16_msa(const uint8_t *src,
3962cabdff1aSopenharmony_ci                                             int32_t src_stride,
3963cabdff1aSopenharmony_ci                                             uint8_t *dst,
3964cabdff1aSopenharmony_ci                                             int32_t dst_stride)
3965cabdff1aSopenharmony_ci{
3966cabdff1aSopenharmony_ci    uint8_t buff[272];
3967cabdff1aSopenharmony_ci
3968cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
3969cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
3970cabdff1aSopenharmony_ci}
3971cabdff1aSopenharmony_ci
3972cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_v_src0_8x8_msa(const uint8_t *src,
3973cabdff1aSopenharmony_ci                                           int32_t src_stride,
3974cabdff1aSopenharmony_ci                                           uint8_t *dst,
3975cabdff1aSopenharmony_ci                                           int32_t dst_stride)
3976cabdff1aSopenharmony_ci{
3977cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
3978cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
3979cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
3980cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
3981cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
3982cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
3983cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
3984cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
3985cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
3986cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
3987cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
3988cabdff1aSopenharmony_ci
3989cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
3990cabdff1aSopenharmony_ci    src += (2 * src_stride);
3991cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
3992cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
3993cabdff1aSopenharmony_ci                                           const20, const6, const3);
3994cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
3995cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
3996cabdff1aSopenharmony_ci    src += (2 * src_stride);
3997cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
3998cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
3999cabdff1aSopenharmony_ci                                           const20, const6, const3);
4000cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4001cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4002cabdff1aSopenharmony_ci    src += (2 * src_stride);
4003cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4004cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4005cabdff1aSopenharmony_ci                                           const20, const6, const3);
4006cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4007cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4008cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4009cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4010cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4011cabdff1aSopenharmony_ci                                        const20, const6, const3);
4012cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
4013cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4014cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4015cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4016cabdff1aSopenharmony_ci
4017cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4018cabdff1aSopenharmony_ci    src += (2 * src_stride);
4019cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4020cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4021cabdff1aSopenharmony_ci                                           const20, const6, const3);
4022cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4023cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4024cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4025cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4026cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4027cabdff1aSopenharmony_ci                                        const20, const6, const3);
4028cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4029cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4030cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
4031cabdff1aSopenharmony_ci                                                const20, const6, const3);
4032cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
4033cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4034cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4035cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4036cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4037cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4038cabdff1aSopenharmony_ci                                        const20, const6, const3);
4039cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4040cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4041cabdff1aSopenharmony_ci
4042cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
4043cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4044cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4045cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4046cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4047cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4048cabdff1aSopenharmony_ci                                        const20, const6, const3);
4049cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
4050cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4051cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4052cabdff1aSopenharmony_ci}
4053cabdff1aSopenharmony_ci
4054cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_horiz_src1_16x16_msa(const uint8_t *src,
4055cabdff1aSopenharmony_ci                                                 int32_t src_stride,
4056cabdff1aSopenharmony_ci                                                 uint8_t *dst,
4057cabdff1aSopenharmony_ci                                                 int32_t dst_stride,
4058cabdff1aSopenharmony_ci                                                 int32_t height)
4059cabdff1aSopenharmony_ci{
4060cabdff1aSopenharmony_ci    uint8_t loop_count;
4061cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
4062cabdff1aSopenharmony_ci    v16u8 res;
4063cabdff1aSopenharmony_ci    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
4064cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4065cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4066cabdff1aSopenharmony_ci    v8u16 const20 = (v8u16) __msa_ldi_h(20);
4067cabdff1aSopenharmony_ci
4068cabdff1aSopenharmony_ci    for (loop_count = (height >> 2); loop_count--;) {
4069cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
4070cabdff1aSopenharmony_ci        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
4071cabdff1aSopenharmony_ci        src += (4 * src_stride);
4072cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
4073cabdff1aSopenharmony_ci                                      const20, const6, const3);
4074cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp1);
4075cabdff1aSopenharmony_ci        ST_UB(res, dst);
4076cabdff1aSopenharmony_ci        dst += dst_stride;
4077cabdff1aSopenharmony_ci
4078cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
4079cabdff1aSopenharmony_ci                                      const20, const6, const3);
4080cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp3);
4081cabdff1aSopenharmony_ci        ST_UB(res, dst);
4082cabdff1aSopenharmony_ci        dst += dst_stride;
4083cabdff1aSopenharmony_ci
4084cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
4085cabdff1aSopenharmony_ci                                      const20, const6, const3);
4086cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp5);
4087cabdff1aSopenharmony_ci        ST_UB(res, dst);
4088cabdff1aSopenharmony_ci        dst += dst_stride;
4089cabdff1aSopenharmony_ci
4090cabdff1aSopenharmony_ci        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
4091cabdff1aSopenharmony_ci                                      const20, const6, const3);
4092cabdff1aSopenharmony_ci        res = __msa_aver_u_b(res, inp7);
4093cabdff1aSopenharmony_ci        ST_UB(res, dst);
4094cabdff1aSopenharmony_ci        dst += dst_stride;
4095cabdff1aSopenharmony_ci    }
4096cabdff1aSopenharmony_ci
4097cabdff1aSopenharmony_ci    LD_UB2(src, 1, inp0, inp1);
4098cabdff1aSopenharmony_ci    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
4099cabdff1aSopenharmony_ci    res = __msa_aver_u_b(inp1, res);
4100cabdff1aSopenharmony_ci    ST_UB(res, dst);
4101cabdff1aSopenharmony_ci}
4102cabdff1aSopenharmony_ci
4103cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src10_16x16_msa(const uint8_t *src,
4104cabdff1aSopenharmony_ci                                               int32_t src_stride,
4105cabdff1aSopenharmony_ci                                               uint8_t *dst,
4106cabdff1aSopenharmony_ci                                               int32_t dst_stride)
4107cabdff1aSopenharmony_ci{
4108cabdff1aSopenharmony_ci    uint8_t buff[272];
4109cabdff1aSopenharmony_ci
4110cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
4111cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
4112cabdff1aSopenharmony_ci}
4113cabdff1aSopenharmony_ci
4114cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src10_8x8_msa(const uint8_t *src,
4115cabdff1aSopenharmony_ci                                             int32_t src_stride,
4116cabdff1aSopenharmony_ci                                             uint8_t *dst,
4117cabdff1aSopenharmony_ci                                             int32_t dst_stride)
4118cabdff1aSopenharmony_ci{
4119cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4120cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4121cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4122cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4123cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4124cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4125cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4126cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4127cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4128cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4129cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4130cabdff1aSopenharmony_ci
4131cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4132cabdff1aSopenharmony_ci    src += (4 * src_stride);
4133cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4134cabdff1aSopenharmony_ci                                         const20, const6, const3);
4135cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4136cabdff1aSopenharmony_ci                                         const20, const6, const3);
4137cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4138cabdff1aSopenharmony_ci
4139cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4140cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4141cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4142cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4143cabdff1aSopenharmony_ci
4144cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4145cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4146cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4147cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4148cabdff1aSopenharmony_ci    src += (4 * src_stride);
4149cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4150cabdff1aSopenharmony_ci                                         const20, const6, const3);
4151cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4152cabdff1aSopenharmony_ci                                         const20, const6, const3);
4153cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4154cabdff1aSopenharmony_ci
4155cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4156cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4157cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4158cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4159cabdff1aSopenharmony_ci
4160cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4161cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4162cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4163cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4164cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4165cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4166cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4167cabdff1aSopenharmony_ci                                        const20, const6, const3);
4168cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
4169cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4170cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4171cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4172cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4173cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4174cabdff1aSopenharmony_ci                                        const20, const6, const3);
4175cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4176cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4177cabdff1aSopenharmony_ci
4178cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4179cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4180cabdff1aSopenharmony_ci                                              const20, const6, const3);
4181cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
4182cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4183cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4184cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4185cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4186cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4187cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4188cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4189cabdff1aSopenharmony_ci                                        const20, const6, const3);
4190cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4191cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4192cabdff1aSopenharmony_ci
4193cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
4194cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4195cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4196cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4197cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4198cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4199cabdff1aSopenharmony_ci                                        const20, const6, const3);
4200cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
4201cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4202cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4203cabdff1aSopenharmony_ci}
4204cabdff1aSopenharmony_ci
4205cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_h_src0_16x16_msa(const uint8_t *src,
4206cabdff1aSopenharmony_ci                                             int32_t src_stride,
4207cabdff1aSopenharmony_ci                                             uint8_t *dst,
4208cabdff1aSopenharmony_ci                                             int32_t dst_stride)
4209cabdff1aSopenharmony_ci{
4210cabdff1aSopenharmony_ci    uint8_t buff[272];
4211cabdff1aSopenharmony_ci
4212cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
4213cabdff1aSopenharmony_ci    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
4214cabdff1aSopenharmony_ci}
4215cabdff1aSopenharmony_ci
4216cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_h_src0_8x8_msa(const uint8_t *src,
4217cabdff1aSopenharmony_ci                                           int32_t src_stride,
4218cabdff1aSopenharmony_ci                                           uint8_t *dst,
4219cabdff1aSopenharmony_ci                                           int32_t dst_stride)
4220cabdff1aSopenharmony_ci{
4221cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4222cabdff1aSopenharmony_ci    v16u8 res0, res1;
4223cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4224cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4225cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4226cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4227cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4228cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4229cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4230cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4231cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4232cabdff1aSopenharmony_ci
4233cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4234cabdff1aSopenharmony_ci    src += (2 * src_stride);
4235cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4236cabdff1aSopenharmony_ci                                         const20, const6, const3);
4237cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4238cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4239cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4240cabdff1aSopenharmony_ci
4241cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4242cabdff1aSopenharmony_ci    src += (2 * src_stride);
4243cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4244cabdff1aSopenharmony_ci                                         const20, const6, const3);
4245cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4246cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4247cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4248cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4249cabdff1aSopenharmony_ci    src += (2 * src_stride);
4250cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4251cabdff1aSopenharmony_ci                                         const20, const6, const3);
4252cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4253cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4254cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4255cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4256cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4257cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4258cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4259cabdff1aSopenharmony_ci                                        const20, const6, const3);
4260cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4261cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4262cabdff1aSopenharmony_ci
4263cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4264cabdff1aSopenharmony_ci    src += (2 * src_stride);
4265cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4266cabdff1aSopenharmony_ci                                         const20, const6, const3);
4267cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4268cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4269cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4270cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4271cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4272cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4273cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4274cabdff1aSopenharmony_ci                                        const20, const6, const3);
4275cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4276cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4277cabdff1aSopenharmony_ci                                              const20, const6, const3);
4278cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4279cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4280cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4281cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4282cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4283cabdff1aSopenharmony_ci                                        const20, const6, const3);
4284cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4285cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4286cabdff1aSopenharmony_ci
4287cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4288cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4289cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4290cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4291cabdff1aSopenharmony_ci                                        const20, const6, const3);
4292cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4293cabdff1aSopenharmony_ci}
4294cabdff1aSopenharmony_ci
4295cabdff1aSopenharmony_cistatic void hv_mc_qpel_16x16_msa(const uint8_t *src,
4296cabdff1aSopenharmony_ci                                 int32_t src_stride,
4297cabdff1aSopenharmony_ci                                 uint8_t *dst,
4298cabdff1aSopenharmony_ci                                 int32_t dst_stride)
4299cabdff1aSopenharmony_ci{
4300cabdff1aSopenharmony_ci    uint8_t buff[272];
4301cabdff1aSopenharmony_ci
4302cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
4303cabdff1aSopenharmony_ci    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
4304cabdff1aSopenharmony_ci}
4305cabdff1aSopenharmony_ci
4306cabdff1aSopenharmony_cistatic void hv_mc_qpel_8x8_msa(const uint8_t *src, int32_t src_stride,
4307cabdff1aSopenharmony_ci                               uint8_t *dst, int32_t dst_stride)
4308cabdff1aSopenharmony_ci{
4309cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4310cabdff1aSopenharmony_ci    v16u8 res0, res1;
4311cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4312cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4313cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4314cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4315cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4316cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4317cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4318cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4319cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4320cabdff1aSopenharmony_ci
4321cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4322cabdff1aSopenharmony_ci    src += (2 * src_stride);
4323cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4324cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4325cabdff1aSopenharmony_ci                                           const20, const6, const3);
4326cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4327cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4328cabdff1aSopenharmony_ci    src += (2 * src_stride);
4329cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4330cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4331cabdff1aSopenharmony_ci                                           const20, const6, const3);
4332cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4333cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4334cabdff1aSopenharmony_ci    src += (2 * src_stride);
4335cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4336cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4337cabdff1aSopenharmony_ci                                           const20, const6, const3);
4338cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4339cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4340cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4341cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4342cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4343cabdff1aSopenharmony_ci                                        const20, const6, const3);
4344cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4345cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4346cabdff1aSopenharmony_ci
4347cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4348cabdff1aSopenharmony_ci    src += (2 * src_stride);
4349cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4350cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4351cabdff1aSopenharmony_ci                                           const20, const6, const3);
4352cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4353cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4354cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4355cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4356cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4357cabdff1aSopenharmony_ci                                        const20, const6, const3);
4358cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4359cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4360cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
4361cabdff1aSopenharmony_ci                                                const20, const6, const3);
4362cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4363cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4364cabdff1aSopenharmony_ci
4365cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4366cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4367cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4368cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4369cabdff1aSopenharmony_ci                                        const20, const6, const3);
4370cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4371cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4372cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4373cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4374cabdff1aSopenharmony_ci                                        const20, const6, const3);
4375cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4376cabdff1aSopenharmony_ci}
4377cabdff1aSopenharmony_ci
4378cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_h_src1_16x16_msa(const uint8_t *src,
4379cabdff1aSopenharmony_ci                                             int32_t src_stride,
4380cabdff1aSopenharmony_ci                                             uint8_t *dst,
4381cabdff1aSopenharmony_ci                                             int32_t dst_stride)
4382cabdff1aSopenharmony_ci{
4383cabdff1aSopenharmony_ci    uint8_t buff[272];
4384cabdff1aSopenharmony_ci
4385cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
4386cabdff1aSopenharmony_ci    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
4387cabdff1aSopenharmony_ci}
4388cabdff1aSopenharmony_ci
4389cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_h_src1_8x8_msa(const uint8_t *src,
4390cabdff1aSopenharmony_ci                                           int32_t src_stride,
4391cabdff1aSopenharmony_ci                                           uint8_t *dst,
4392cabdff1aSopenharmony_ci                                           int32_t dst_stride)
4393cabdff1aSopenharmony_ci{
4394cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4395cabdff1aSopenharmony_ci    v16u8 res0, res1;
4396cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4397cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4398cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4399cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4400cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4401cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4402cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4403cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4404cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4405cabdff1aSopenharmony_ci
4406cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4407cabdff1aSopenharmony_ci    src += (4 * src_stride);
4408cabdff1aSopenharmony_ci
4409cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4410cabdff1aSopenharmony_ci                                         const20, const6, const3);
4411cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4412cabdff1aSopenharmony_ci                                         const20, const6, const3);
4413cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4414cabdff1aSopenharmony_ci
4415cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4416cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4417cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4418cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4419cabdff1aSopenharmony_ci
4420cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4421cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4422cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4423cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4424cabdff1aSopenharmony_ci    src += (4 * src_stride);
4425cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4426cabdff1aSopenharmony_ci                                         const20, const6, const3);
4427cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4428cabdff1aSopenharmony_ci                                         const20, const6, const3);
4429cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4430cabdff1aSopenharmony_ci
4431cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4432cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4433cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4434cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4435cabdff1aSopenharmony_ci
4436cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4437cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4438cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4439cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4440cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4441cabdff1aSopenharmony_ci                                              const20, const6, const3);
4442cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4443cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4444cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4445cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4446cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4447cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4448cabdff1aSopenharmony_ci                                        const20, const6, const3);
4449cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4450cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4451cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4452cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4453cabdff1aSopenharmony_ci                                        const20, const6, const3);
4454cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4455cabdff1aSopenharmony_ci    dst += (4 * dst_stride);
4456cabdff1aSopenharmony_ci
4457cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4458cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4459cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4460cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4461cabdff1aSopenharmony_ci                                        const20, const6, const3);
4462cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4463cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4464cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4465cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4466cabdff1aSopenharmony_ci                                        const20, const6, const3);
4467cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4468cabdff1aSopenharmony_ci}
4469cabdff1aSopenharmony_ci
4470cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src01_16x16_msa(const uint8_t *src,
4471cabdff1aSopenharmony_ci                                               int32_t src_stride,
4472cabdff1aSopenharmony_ci                                               uint8_t *dst,
4473cabdff1aSopenharmony_ci                                               int32_t dst_stride)
4474cabdff1aSopenharmony_ci{
4475cabdff1aSopenharmony_ci    uint8_t buff[272];
4476cabdff1aSopenharmony_ci
4477cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
4478cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
4479cabdff1aSopenharmony_ci}
4480cabdff1aSopenharmony_ci
4481cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src01_8x8_msa(const uint8_t *src,
4482cabdff1aSopenharmony_ci                                             int32_t src_stride,
4483cabdff1aSopenharmony_ci                                             uint8_t *dst,
4484cabdff1aSopenharmony_ci                                             int32_t dst_stride)
4485cabdff1aSopenharmony_ci{
4486cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4487cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4488cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4489cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4490cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4491cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4492cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4493cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4494cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4495cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4496cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4497cabdff1aSopenharmony_ci
4498cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4499cabdff1aSopenharmony_ci    src += (4 * src_stride);
4500cabdff1aSopenharmony_ci
4501cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4502cabdff1aSopenharmony_ci                                         const20, const6, const3);
4503cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4504cabdff1aSopenharmony_ci                                         const20, const6, const3);
4505cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4506cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4507cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4508cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4509cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4510cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4511cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4512cabdff1aSopenharmony_ci    src += (2 * src_stride);
4513cabdff1aSopenharmony_ci
4514cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4515cabdff1aSopenharmony_ci                                         const20, const6, const3);
4516cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4517cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4518cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4519cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4520cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4521cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4522cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4523cabdff1aSopenharmony_ci                                        const20, const6, const3);
4524cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_insve_d((v2i64) horiz1, 1, (v2i64) horiz2);
4525cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4526cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4527cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4528cabdff1aSopenharmony_ci
4529cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4530cabdff1aSopenharmony_ci    src += (2 * src_stride);
4531cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4532cabdff1aSopenharmony_ci                                         const20, const6, const3);
4533cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4534cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4535cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4536cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4537cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4538cabdff1aSopenharmony_ci                                              const20, const6, const3);
4539cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4540cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4541cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4542cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4543cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4544cabdff1aSopenharmony_ci                                        const20, const6, const3);
4545cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_insve_d((v2i64) horiz3, 1, (v2i64) horiz4);
4546cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4547cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4548cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4549cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4550cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4551cabdff1aSopenharmony_ci                                        const20, const6, const3);
4552cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4553cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4554cabdff1aSopenharmony_ci
4555cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_insve_d((v2i64) horiz5, 1, (v2i64) horiz6);
4556cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4557cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4558cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4559cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4560cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4561cabdff1aSopenharmony_ci                                        const20, const6, const3);
4562cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_insve_d((v2i64) horiz7, 1, (v2i64) horiz8);
4563cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4564cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4565cabdff1aSopenharmony_ci}
4566cabdff1aSopenharmony_ci
4567cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_v_src1_16x16_msa(const uint8_t *src,
4568cabdff1aSopenharmony_ci                                             int32_t src_stride,
4569cabdff1aSopenharmony_ci                                             uint8_t *dst,
4570cabdff1aSopenharmony_ci                                             int32_t dst_stride)
4571cabdff1aSopenharmony_ci{
4572cabdff1aSopenharmony_ci    uint8_t buff[272];
4573cabdff1aSopenharmony_ci
4574cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
4575cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
4576cabdff1aSopenharmony_ci}
4577cabdff1aSopenharmony_ci
4578cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_v_src1_8x8_msa(const uint8_t *src,
4579cabdff1aSopenharmony_ci                                           int32_t src_stride,
4580cabdff1aSopenharmony_ci                                           uint8_t *dst,
4581cabdff1aSopenharmony_ci                                           int32_t dst_stride)
4582cabdff1aSopenharmony_ci{
4583cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4584cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4585cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4586cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4587cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4588cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4589cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4590cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4591cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4592cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4593cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4594cabdff1aSopenharmony_ci
4595cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4596cabdff1aSopenharmony_ci    src += (2 * src_stride);
4597cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4598cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4599cabdff1aSopenharmony_ci                                           const20, const6, const3);
4600cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4601cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4602cabdff1aSopenharmony_ci    src += (2 * src_stride);
4603cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4604cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4605cabdff1aSopenharmony_ci                                           const20, const6, const3);
4606cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4607cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4608cabdff1aSopenharmony_ci    src += (2 * src_stride);
4609cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4610cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4611cabdff1aSopenharmony_ci                                           const20, const6, const3);
4612cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4613cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4614cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4615cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4616cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4617cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4618cabdff1aSopenharmony_ci                                        const20, const6, const3);
4619cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_insve_d((v2i64) horiz1, 1, (v2i64) horiz2);
4620cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4621cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4622cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4623cabdff1aSopenharmony_ci
4624cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4625cabdff1aSopenharmony_ci    src += (2 * src_stride);
4626cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4627cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4628cabdff1aSopenharmony_ci                                           const20, const6, const3);
4629cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4630cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4631cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4632cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4633cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4634cabdff1aSopenharmony_ci                                        const20, const6, const3);
4635cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4636cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4637cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
4638cabdff1aSopenharmony_ci                                                const20, const6, const3);
4639cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_insve_d((v2i64) horiz3, 1, (v2i64) horiz4);
4640cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4641cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4642cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4643cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4644cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4645cabdff1aSopenharmony_ci                                        const20, const6, const3);
4646cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4647cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4648cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_insve_d((v2i64) horiz5, 1, (v2i64) horiz6);
4649cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4650cabdff1aSopenharmony_ci
4651cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4652cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4653cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4654cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4655cabdff1aSopenharmony_ci                                        const20, const6, const3);
4656cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_insve_d((v2i64) horiz7, 1, (v2i64) horiz8);
4657cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4658cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4659cabdff1aSopenharmony_ci}
4660cabdff1aSopenharmony_ci
4661cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src11_16x16_msa(const uint8_t *src,
4662cabdff1aSopenharmony_ci                                               int32_t src_stride,
4663cabdff1aSopenharmony_ci                                               uint8_t *dst,
4664cabdff1aSopenharmony_ci                                               int32_t dst_stride)
4665cabdff1aSopenharmony_ci{
4666cabdff1aSopenharmony_ci    uint8_t buff[272];
4667cabdff1aSopenharmony_ci
4668cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
4669cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
4670cabdff1aSopenharmony_ci}
4671cabdff1aSopenharmony_ci
4672cabdff1aSopenharmony_cistatic void hv_mc_qpel_aver_hv_src11_8x8_msa(const uint8_t *src,
4673cabdff1aSopenharmony_ci                                             int32_t src_stride,
4674cabdff1aSopenharmony_ci                                             uint8_t *dst, int32_t dst_stride)
4675cabdff1aSopenharmony_ci{
4676cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4677cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4678cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4679cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4680cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4681cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4682cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4683cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4684cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4685cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4686cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4687cabdff1aSopenharmony_ci
4688cabdff1aSopenharmony_ci    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4689cabdff1aSopenharmony_ci    src += (4 * src_stride);
4690cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4691cabdff1aSopenharmony_ci                                         mask0, mask1, mask2, mask3,
4692cabdff1aSopenharmony_ci                                         const20, const6, const3);
4693cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4694cabdff1aSopenharmony_ci
4695cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4696cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4697cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4698cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4699cabdff1aSopenharmony_ci                                         const20, const6, const3);
4700cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4701cabdff1aSopenharmony_ci
4702cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4703cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4704cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4705cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4706cabdff1aSopenharmony_ci    src += (2 * src_stride);
4707cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4708cabdff1aSopenharmony_ci                                         const20, const6, const3);
4709cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4710cabdff1aSopenharmony_ci
4711cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4712cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4713cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4714cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4715cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4716cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4717cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4718cabdff1aSopenharmony_ci                                        const20, const6, const3);
4719cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
4720cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4721cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4722cabdff1aSopenharmony_ci    src += (2 * src_stride);
4723cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4724cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4725cabdff1aSopenharmony_ci
4726cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4727cabdff1aSopenharmony_ci                                         const20, const6, const3);
4728cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4729cabdff1aSopenharmony_ci
4730cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4731cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4732cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4733cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4734cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4735cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4736cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4737cabdff1aSopenharmony_ci                                        const20, const6, const3);
4738cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
4739cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4740cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4741cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4742cabdff1aSopenharmony_ci                                              const20, const6, const3);
4743cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4744cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4745cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4746cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4747cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4748cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4749cabdff1aSopenharmony_ci                                        const20, const6, const3);
4750cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4751cabdff1aSopenharmony_ci    dst += 2 * dst_stride;
4752cabdff1aSopenharmony_ci
4753cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
4754cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4755cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4756cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4757cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4758cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4759cabdff1aSopenharmony_ci                                        const20, const6, const3);
4760cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
4761cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4762cabdff1aSopenharmony_ci    ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4763cabdff1aSopenharmony_ci}
4764cabdff1aSopenharmony_ci
4765cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(const uint8_t *src,
4766cabdff1aSopenharmony_ci                                                       int32_t src_stride,
4767cabdff1aSopenharmony_ci                                                       uint8_t *dst,
4768cabdff1aSopenharmony_ci                                                       int32_t dst_stride)
4769cabdff1aSopenharmony_ci{
4770cabdff1aSopenharmony_ci    uint8_t buff[272];
4771cabdff1aSopenharmony_ci
4772cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
4773cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
4774cabdff1aSopenharmony_ci}
4775cabdff1aSopenharmony_ci
4776cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(const uint8_t *src,
4777cabdff1aSopenharmony_ci                                                     int32_t src_stride,
4778cabdff1aSopenharmony_ci                                                     uint8_t *dst,
4779cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
4780cabdff1aSopenharmony_ci{
4781cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4782cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4783cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4784cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4785cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
4786cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4787cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4788cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4789cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4790cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4791cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4792cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4793cabdff1aSopenharmony_ci
4794cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4795cabdff1aSopenharmony_ci    src += (2 * src_stride);
4796cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4797cabdff1aSopenharmony_ci                                         const20, const6, const3);
4798cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4799cabdff1aSopenharmony_ci    src += (2 * src_stride);
4800cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4801cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
4802cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4803cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4804cabdff1aSopenharmony_ci                                         const20, const6, const3);
4805cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4806cabdff1aSopenharmony_ci    src += (2 * src_stride);
4807cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4808cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
4809cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4810cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4811cabdff1aSopenharmony_ci                                         const20, const6, const3);
4812cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4813cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
4814cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4815cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4816cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
4817cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4818cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4819cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4820cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4821cabdff1aSopenharmony_ci                                        const20, const6, const3);
4822cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4823cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4824cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4825cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4826cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4827cabdff1aSopenharmony_ci
4828cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4829cabdff1aSopenharmony_ci    src += (2 * src_stride);
4830cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4831cabdff1aSopenharmony_ci                                         const20, const6, const3);
4832cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4833cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
4834cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4835cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4836cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
4837cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4838cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4839cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4840cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4841cabdff1aSopenharmony_ci                                        const20, const6, const3);
4842cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4843cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4844cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4845cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4846cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4847cabdff1aSopenharmony_ci
4848cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4849cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4850cabdff1aSopenharmony_ci                                              const20, const6, const3);
4851cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
4852cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4853cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
4854cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4855cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4856cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4857cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4858cabdff1aSopenharmony_ci                                        const20, const6, const3);
4859cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4860cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4861cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4862cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4863cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4864cabdff1aSopenharmony_ci
4865cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4866cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
4867cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4868cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4869cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4870cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4871cabdff1aSopenharmony_ci                                        const20, const6, const3);
4872cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4873cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4874cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4875cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4876cabdff1aSopenharmony_ci}
4877cabdff1aSopenharmony_ci
4878cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(const uint8_t *src,
4879cabdff1aSopenharmony_ci                                                     int32_t src_stride,
4880cabdff1aSopenharmony_ci                                                     uint8_t *dst,
4881cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
4882cabdff1aSopenharmony_ci{
4883cabdff1aSopenharmony_ci    uint8_t buff[272];
4884cabdff1aSopenharmony_ci
4885cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
4886cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
4887cabdff1aSopenharmony_ci}
4888cabdff1aSopenharmony_ci
4889cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(const uint8_t *src,
4890cabdff1aSopenharmony_ci                                                   int32_t src_stride,
4891cabdff1aSopenharmony_ci                                                   uint8_t *dst,
4892cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
4893cabdff1aSopenharmony_ci{
4894cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
4895cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
4896cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
4897cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
4898cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
4899cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
4900cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
4901cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
4902cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
4903cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
4904cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
4905cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
4906cabdff1aSopenharmony_ci
4907cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4908cabdff1aSopenharmony_ci    src += (2 * src_stride);
4909cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4910cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4911cabdff1aSopenharmony_ci                                           const20, const6, const3);
4912cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4913cabdff1aSopenharmony_ci    src += (2 * src_stride);
4914cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
4915cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4916cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4917cabdff1aSopenharmony_ci                                           const20, const6, const3);
4918cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
4919cabdff1aSopenharmony_ci    src += (2 * src_stride);
4920cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
4921cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4922cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4923cabdff1aSopenharmony_ci                                           const20, const6, const3);
4924cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
4925cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4926cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
4927cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4928cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
4929cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
4930cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
4931cabdff1aSopenharmony_ci                                        const20, const6, const3);
4932cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4933cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4934cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4935cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4936cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4937cabdff1aSopenharmony_ci
4938cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
4939cabdff1aSopenharmony_ci    src += (2 * src_stride);
4940cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4941cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
4942cabdff1aSopenharmony_ci                                           const20, const6, const3);
4943cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
4944cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4945cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
4946cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4947cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
4948cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
4949cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
4950cabdff1aSopenharmony_ci                                        const20, const6, const3);
4951cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4952cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4953cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4954cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4955cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4956cabdff1aSopenharmony_ci
4957cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
4958cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4959cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
4960cabdff1aSopenharmony_ci                                                const20, const6, const3);
4961cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4962cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
4963cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4964cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
4965cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
4966cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
4967cabdff1aSopenharmony_ci                                        const20, const6, const3);
4968cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4969cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4970cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
4971cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
4972cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
4973cabdff1aSopenharmony_ci
4974cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
4975cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
4976cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4977cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
4978cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
4979cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
4980cabdff1aSopenharmony_ci                                        const20, const6, const3);
4981cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4982cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
4983cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
4984cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
4985cabdff1aSopenharmony_ci}
4986cabdff1aSopenharmony_ci
4987cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(const uint8_t *src,
4988cabdff1aSopenharmony_ci                                                       int32_t src_stride,
4989cabdff1aSopenharmony_ci                                                       uint8_t *dst,
4990cabdff1aSopenharmony_ci                                                       int32_t dst_stride)
4991cabdff1aSopenharmony_ci{
4992cabdff1aSopenharmony_ci    uint8_t buff[272];
4993cabdff1aSopenharmony_ci
4994cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
4995cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
4996cabdff1aSopenharmony_ci}
4997cabdff1aSopenharmony_ci
4998cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(const uint8_t *src,
4999cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5000cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5001cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5002cabdff1aSopenharmony_ci{
5003cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5004cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5005cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5006cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5007cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5008cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5009cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5010cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5011cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5012cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5013cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5014cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5015cabdff1aSopenharmony_ci
5016cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5017cabdff1aSopenharmony_ci    src += (2 * src_stride);
5018cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5019cabdff1aSopenharmony_ci                                         const20, const6, const3);
5020cabdff1aSopenharmony_ci
5021cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5022cabdff1aSopenharmony_ci    src += (2 * src_stride);
5023cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5024cabdff1aSopenharmony_ci
5025cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5026cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
5027cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5028cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5029cabdff1aSopenharmony_ci                                         const20, const6, const3);
5030cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5031cabdff1aSopenharmony_ci    src += (2 * src_stride);
5032cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5033cabdff1aSopenharmony_ci
5034cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5035cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
5036cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5037cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5038cabdff1aSopenharmony_ci                                         const20, const6, const3);
5039cabdff1aSopenharmony_ci
5040cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5041cabdff1aSopenharmony_ci
5042cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5043cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
5044cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5045cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5046cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
5047cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5048cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5049cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5050cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5051cabdff1aSopenharmony_ci                                        const20, const6, const3);
5052cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5053cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5054cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5055cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5056cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5057cabdff1aSopenharmony_ci
5058cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5059cabdff1aSopenharmony_ci    src += (2 * src_stride);
5060cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5061cabdff1aSopenharmony_ci                                         const20, const6, const3);
5062cabdff1aSopenharmony_ci
5063cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5064cabdff1aSopenharmony_ci
5065cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5066cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
5067cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5068cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5069cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
5070cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5071cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5072cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5073cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5074cabdff1aSopenharmony_ci                                        const20, const6, const3);
5075cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5076cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5077cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5078cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5079cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5080cabdff1aSopenharmony_ci
5081cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5082cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5083cabdff1aSopenharmony_ci                                              const20, const6, const3);
5084cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5085cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
5086cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5087cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
5088cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5089cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
5090cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
5091cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
5092cabdff1aSopenharmony_ci                                        const20, const6, const3);
5093cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5094cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5095cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5096cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5097cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5098cabdff1aSopenharmony_ci
5099cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5100cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
5101cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5102cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
5103cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
5104cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
5105cabdff1aSopenharmony_ci                                        const20, const6, const3);
5106cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5107cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5108cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5109cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5110cabdff1aSopenharmony_ci}
5111cabdff1aSopenharmony_ci
5112cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(const uint8_t *src,
5113cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5114cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5115cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5116cabdff1aSopenharmony_ci{
5117cabdff1aSopenharmony_ci    uint8_t buff[272];
5118cabdff1aSopenharmony_ci
5119cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
5120cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
5121cabdff1aSopenharmony_ci}
5122cabdff1aSopenharmony_ci
5123cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(const uint8_t *src,
5124cabdff1aSopenharmony_ci                                                   int32_t src_stride,
5125cabdff1aSopenharmony_ci                                                   uint8_t *dst,
5126cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
5127cabdff1aSopenharmony_ci{
5128cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5129cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5130cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5131cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5132cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5133cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5134cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5135cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5136cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5137cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5138cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5139cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5140cabdff1aSopenharmony_ci
5141cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5142cabdff1aSopenharmony_ci    src += (2 * src_stride);
5143cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5144cabdff1aSopenharmony_ci                                         const20, const6, const3);
5145cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5146cabdff1aSopenharmony_ci    src += (2 * src_stride);
5147cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5148cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
5149cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5150cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5151cabdff1aSopenharmony_ci                                         const20, const6, const3);
5152cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5153cabdff1aSopenharmony_ci    src += (2 * src_stride);
5154cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5155cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
5156cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5157cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5158cabdff1aSopenharmony_ci                                         const20, const6, const3);
5159cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5160cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
5161cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5162cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5163cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5164cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5165cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5166cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5167cabdff1aSopenharmony_ci                                        const20, const6, const3);
5168cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5169cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5170cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5171cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5172cabdff1aSopenharmony_ci
5173cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5174cabdff1aSopenharmony_ci    src += (2 * src_stride);
5175cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5176cabdff1aSopenharmony_ci                                         const20, const6, const3);
5177cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5178cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
5179cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5180cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5181cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5182cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5183cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5184cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5185cabdff1aSopenharmony_ci                                        const20, const6, const3);
5186cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5187cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5188cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5189cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5190cabdff1aSopenharmony_ci
5191cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5192cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5193cabdff1aSopenharmony_ci                                              const20, const6, const3);
5194cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
5195cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5196cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5197cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
5198cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
5199cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
5200cabdff1aSopenharmony_ci                                        const20, const6, const3);
5201cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5202cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5203cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5204cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5205cabdff1aSopenharmony_ci
5206cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5207cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5208cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
5209cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
5210cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
5211cabdff1aSopenharmony_ci                                        const20, const6, const3);
5212cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5213cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5214cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5215cabdff1aSopenharmony_ci}
5216cabdff1aSopenharmony_ci
5217cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_16x16_msa(const uint8_t *src, int32_t src_stride,
5218cabdff1aSopenharmony_ci                                         uint8_t *dst, int32_t dst_stride)
5219cabdff1aSopenharmony_ci{
5220cabdff1aSopenharmony_ci    uint8_t buff[272];
5221cabdff1aSopenharmony_ci
5222cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
5223cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
5224cabdff1aSopenharmony_ci
5225cabdff1aSopenharmony_ci}
5226cabdff1aSopenharmony_ci
5227cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_8x8_msa(const uint8_t *src, int32_t src_stride,
5228cabdff1aSopenharmony_ci                                       uint8_t *dst, int32_t dst_stride)
5229cabdff1aSopenharmony_ci{
5230cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5231cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5232cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5233cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5234cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5235cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5236cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5237cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5238cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5239cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5240cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5241cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5242cabdff1aSopenharmony_ci
5243cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5244cabdff1aSopenharmony_ci    src += (2 * src_stride);
5245cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5246cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5247cabdff1aSopenharmony_ci                                           const20, const6, const3);
5248cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5249cabdff1aSopenharmony_ci    src += (2 * src_stride);
5250cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5251cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5252cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5253cabdff1aSopenharmony_ci                                           const20, const6, const3);
5254cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5255cabdff1aSopenharmony_ci    src += (2 * src_stride);
5256cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5257cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5258cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5259cabdff1aSopenharmony_ci                                           const20, const6, const3);
5260cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5261cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5262cabdff1aSopenharmony_ci    src += (2 * src_stride);
5263cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5264cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5265cabdff1aSopenharmony_ci                                           const20, const6, const3);
5266cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5267cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5268cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
5269cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
5270cabdff1aSopenharmony_ci                                                const20, const6, const3);
5271cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5272cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5273cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5274cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5275cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5276cabdff1aSopenharmony_ci                                        const20, const6, const3);
5277cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5278cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5279cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5280cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5281cabdff1aSopenharmony_ci
5282cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5283cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5284cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5285cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5286cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5287cabdff1aSopenharmony_ci                                        const20, const6, const3);
5288cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5289cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5290cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5291cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5292cabdff1aSopenharmony_ci
5293cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5294cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5295cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
5296cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
5297cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
5298cabdff1aSopenharmony_ci                                        const20, const6, const3);
5299cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5300cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5301cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5302cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5303cabdff1aSopenharmony_ci
5304cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5305cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5306cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
5307cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
5308cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
5309cabdff1aSopenharmony_ci                                        const20, const6, const3);
5310cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5311cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5312cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5313cabdff1aSopenharmony_ci}
5314cabdff1aSopenharmony_ci
5315cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(const uint8_t *src,
5316cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5317cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5318cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5319cabdff1aSopenharmony_ci{
5320cabdff1aSopenharmony_ci    uint8_t buff[272];
5321cabdff1aSopenharmony_ci
5322cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
5323cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
5324cabdff1aSopenharmony_ci}
5325cabdff1aSopenharmony_ci
5326cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(const uint8_t *src,
5327cabdff1aSopenharmony_ci                                                   int32_t src_stride,
5328cabdff1aSopenharmony_ci                                                   uint8_t *dst,
5329cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
5330cabdff1aSopenharmony_ci{
5331cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5332cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5333cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5334cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5335cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5336cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5337cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5338cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5339cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5340cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5341cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5342cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5343cabdff1aSopenharmony_ci
5344cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5345cabdff1aSopenharmony_ci    src += (2 * src_stride);
5346cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5347cabdff1aSopenharmony_ci                                         const20, const6, const3);
5348cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5349cabdff1aSopenharmony_ci    src += (2 * src_stride);
5350cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5351cabdff1aSopenharmony_ci
5352cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5353cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
5354cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5355cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5356cabdff1aSopenharmony_ci                                         const20, const6, const3);
5357cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5358cabdff1aSopenharmony_ci    src += (2 * src_stride);
5359cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5360cabdff1aSopenharmony_ci
5361cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5362cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
5363cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5364cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5365cabdff1aSopenharmony_ci                                         const20, const6, const3);
5366cabdff1aSopenharmony_ci
5367cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5368cabdff1aSopenharmony_ci
5369cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5370cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
5371cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5372cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5373cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5374cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5375cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5376cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5377cabdff1aSopenharmony_ci                                        const20, const6, const3);
5378cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5379cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5380cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5381cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5382cabdff1aSopenharmony_ci
5383cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5384cabdff1aSopenharmony_ci    src += (2 * src_stride);
5385cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5386cabdff1aSopenharmony_ci                                         const20, const6, const3);
5387cabdff1aSopenharmony_ci
5388cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5389cabdff1aSopenharmony_ci
5390cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5391cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
5392cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5393cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5394cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5395cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5396cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5397cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5398cabdff1aSopenharmony_ci                                        const20, const6, const3);
5399cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5400cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5401cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5402cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5403cabdff1aSopenharmony_ci
5404cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5405cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5406cabdff1aSopenharmony_ci                                              const20, const6, const3);
5407cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5408cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
5409cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5410cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5411cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
5412cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
5413cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
5414cabdff1aSopenharmony_ci                                        const20, const6, const3);
5415cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5416cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5417cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5418cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5419cabdff1aSopenharmony_ci
5420cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5421cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5422cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
5423cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
5424cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
5425cabdff1aSopenharmony_ci                                        const20, const6, const3);
5426cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5427cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5428cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5429cabdff1aSopenharmony_ci}
5430cabdff1aSopenharmony_ci
5431cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(const uint8_t *src,
5432cabdff1aSopenharmony_ci                                                       int32_t src_stride,
5433cabdff1aSopenharmony_ci                                                       uint8_t *dst,
5434cabdff1aSopenharmony_ci                                                       int32_t dst_stride)
5435cabdff1aSopenharmony_ci{
5436cabdff1aSopenharmony_ci    uint8_t buff[272];
5437cabdff1aSopenharmony_ci
5438cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
5439cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
5440cabdff1aSopenharmony_ci}
5441cabdff1aSopenharmony_ci
5442cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(const uint8_t *src,
5443cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5444cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5445cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5446cabdff1aSopenharmony_ci{
5447cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5448cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5449cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5450cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5451cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5452cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5453cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5454cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5455cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5456cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5457cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5458cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5459cabdff1aSopenharmony_ci
5460cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5461cabdff1aSopenharmony_ci    src += (2 * src_stride);
5462cabdff1aSopenharmony_ci
5463cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5464cabdff1aSopenharmony_ci                                         const20, const6, const3);
5465cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5466cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
5467cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5468cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5469cabdff1aSopenharmony_ci    src += (2 * src_stride);
5470cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5471cabdff1aSopenharmony_ci                                         const20, const6, const3);
5472cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5473cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
5474cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5475cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5476cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5477cabdff1aSopenharmony_ci    src += (2 * src_stride);
5478cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5479cabdff1aSopenharmony_ci                                         const20, const6, const3);
5480cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5481cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
5482cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5483cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5484cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5485cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5486cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5487cabdff1aSopenharmony_ci                                        const20, const6, const3);
5488cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
5489cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5490cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5491cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5492cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5493cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5494cabdff1aSopenharmony_ci
5495cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5496cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5497cabdff1aSopenharmony_ci    src += (2 * src_stride);
5498cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5499cabdff1aSopenharmony_ci                                         const20, const6, const3);
5500cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5501cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
5502cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5503cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5504cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5505cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5506cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5507cabdff1aSopenharmony_ci                                        const20, const6, const3);
5508cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
5509cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5510cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5511cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5512cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5513cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5514cabdff1aSopenharmony_ci
5515cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5516cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5517cabdff1aSopenharmony_ci                                              const20, const6, const3);
5518cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
5519cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5520cabdff1aSopenharmony_ci                                        horiz5, horiz6, horiz7, horiz8,
5521cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz3, horiz2,
5522cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz8,
5523cabdff1aSopenharmony_ci                                        const20, const6, const3);
5524cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5525cabdff1aSopenharmony_ci                                        horiz7, horiz8, horiz8, horiz7,
5526cabdff1aSopenharmony_ci                                        horiz7, horiz6, horiz5, horiz4,
5527cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz6,
5528cabdff1aSopenharmony_ci                                        const20, const6, const3);
5529cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
5530cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5531cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5532cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5533cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5534cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5535cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5536cabdff1aSopenharmony_ci
5537cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
5538cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5539cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5540cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5541cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5542cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5543cabdff1aSopenharmony_ci}
5544cabdff1aSopenharmony_ci
5545cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(const uint8_t *src,
5546cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5547cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5548cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5549cabdff1aSopenharmony_ci{
5550cabdff1aSopenharmony_ci    uint8_t buff[272];
5551cabdff1aSopenharmony_ci
5552cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
5553cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
5554cabdff1aSopenharmony_ci}
5555cabdff1aSopenharmony_ci
5556cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(const uint8_t *src,
5557cabdff1aSopenharmony_ci                                                   int32_t src_stride,
5558cabdff1aSopenharmony_ci                                                   uint8_t *dst,
5559cabdff1aSopenharmony_ci                                                   int32_t dst_stride)
5560cabdff1aSopenharmony_ci{
5561cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5562cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5563cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5564cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5565cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5566cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5567cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5568cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5569cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5570cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5571cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5572cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5573cabdff1aSopenharmony_ci
5574cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5575cabdff1aSopenharmony_ci    src += (2 * src_stride);
5576cabdff1aSopenharmony_ci    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5577cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5578cabdff1aSopenharmony_ci                                           const20, const6, const3);
5579cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5580cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5581cabdff1aSopenharmony_ci    src += (2 * src_stride);
5582cabdff1aSopenharmony_ci    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5583cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5584cabdff1aSopenharmony_ci                                           const20, const6, const3);
5585cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5586cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5587cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5588cabdff1aSopenharmony_ci    src += (2 * src_stride);
5589cabdff1aSopenharmony_ci    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5590cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5591cabdff1aSopenharmony_ci                                           const20, const6, const3);
5592cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5593cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5594cabdff1aSopenharmony_ci                                        horiz1, horiz2, horiz3, horiz4,
5595cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz0, horiz1,
5596cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz5,
5597cabdff1aSopenharmony_ci                                        const20, const6, const3);
5598cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
5599cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5600cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5601cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5602cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5603cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5604cabdff1aSopenharmony_ci
5605cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5606cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5607cabdff1aSopenharmony_ci    src += (2 * src_stride);
5608cabdff1aSopenharmony_ci    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5609cabdff1aSopenharmony_ci                                           mask0, mask1, mask2, mask3,
5610cabdff1aSopenharmony_ci                                           const20, const6, const3);
5611cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5612cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5613cabdff1aSopenharmony_ci                                        horiz3, horiz4, horiz5, horiz6,
5614cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz1, horiz0,
5615cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz7,
5616cabdff1aSopenharmony_ci                                        const20, const6, const3);
5617cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
5618cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5619cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5620cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5621cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5622cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5623cabdff1aSopenharmony_ci
5624cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5625cabdff1aSopenharmony_ci    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
5626cabdff1aSopenharmony_ci                                                mask0, mask1, mask2, mask3,
5627cabdff1aSopenharmony_ci                                                const20, const6, const3);
5628cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
5629cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz5, horiz4,
5630cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz6, horiz7, horiz8,
5631cabdff1aSopenharmony_ci                                        horiz8, const20, const6, const3);
5632cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
5633cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz7, horiz6,
5634cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz8, horiz8, horiz7,
5635cabdff1aSopenharmony_ci                                        horiz6, const20, const6, const3);
5636cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
5637cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5638cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5639cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5640cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5641cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5642cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5643cabdff1aSopenharmony_ci
5644cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
5645cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5646cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5647cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5648cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5649cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5650cabdff1aSopenharmony_ci}
5651cabdff1aSopenharmony_ci
5652cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(const uint8_t *src,
5653cabdff1aSopenharmony_ci                                                       int32_t src_stride,
5654cabdff1aSopenharmony_ci                                                       uint8_t *dst,
5655cabdff1aSopenharmony_ci                                                       int32_t dst_stride)
5656cabdff1aSopenharmony_ci{
5657cabdff1aSopenharmony_ci    uint8_t buff[272];
5658cabdff1aSopenharmony_ci
5659cabdff1aSopenharmony_ci    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
5660cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
5661cabdff1aSopenharmony_ci}
5662cabdff1aSopenharmony_ci
5663cabdff1aSopenharmony_cistatic void hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(const uint8_t *src,
5664cabdff1aSopenharmony_ci                                                     int32_t src_stride,
5665cabdff1aSopenharmony_ci                                                     uint8_t *dst,
5666cabdff1aSopenharmony_ci                                                     int32_t dst_stride)
5667cabdff1aSopenharmony_ci{
5668cabdff1aSopenharmony_ci    v16u8 inp0, inp1, inp2, inp3;
5669cabdff1aSopenharmony_ci    v16u8 res0, res1, avg0, avg1;
5670cabdff1aSopenharmony_ci    v16u8 horiz0, horiz1, horiz2, horiz3;
5671cabdff1aSopenharmony_ci    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
5672cabdff1aSopenharmony_ci    v16u8 dst0, dst1;
5673cabdff1aSopenharmony_ci    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
5674cabdff1aSopenharmony_ci    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
5675cabdff1aSopenharmony_ci    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
5676cabdff1aSopenharmony_ci    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
5677cabdff1aSopenharmony_ci    v16u8 const20 = (v16u8) __msa_ldi_b(20);
5678cabdff1aSopenharmony_ci    v16u8 const6 = (v16u8) __msa_ldi_b(6);
5679cabdff1aSopenharmony_ci    v16u8 const3 = (v16u8) __msa_ldi_b(3);
5680cabdff1aSopenharmony_ci
5681cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5682cabdff1aSopenharmony_ci    src += (2 * src_stride);
5683cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5684cabdff1aSopenharmony_ci                                         const20, const6, const3);
5685cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5686cabdff1aSopenharmony_ci    src += (2 * src_stride);
5687cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5688cabdff1aSopenharmony_ci
5689cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5690cabdff1aSopenharmony_ci    horiz0 = __msa_aver_u_b(inp0, res0);
5691cabdff1aSopenharmony_ci    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
5692cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5693cabdff1aSopenharmony_ci                                         const20, const6, const3);
5694cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp0, inp1);
5695cabdff1aSopenharmony_ci    src += (2 * src_stride);
5696cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5697cabdff1aSopenharmony_ci
5698cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5699cabdff1aSopenharmony_ci    horiz2 = __msa_aver_u_b(inp2, res1);
5700cabdff1aSopenharmony_ci    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
5701cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5702cabdff1aSopenharmony_ci                                         const20, const6, const3);
5703cabdff1aSopenharmony_ci    SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5704cabdff1aSopenharmony_ci
5705cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5706cabdff1aSopenharmony_ci    horiz4 = __msa_aver_u_b(inp0, res0);
5707cabdff1aSopenharmony_ci    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
5708cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5709cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
5710cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2, horiz1,
5711cabdff1aSopenharmony_ci                                        horiz2, horiz3, horiz4, horiz1, horiz0,
5712cabdff1aSopenharmony_ci                                        horiz0, horiz1, horiz2, horiz3, horiz4,
5713cabdff1aSopenharmony_ci                                        horiz5, const20, const6, const3);
5714cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5715cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5716cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5717cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5718cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5719cabdff1aSopenharmony_ci
5720cabdff1aSopenharmony_ci    LD_UB2(src, src_stride, inp2, inp3);
5721cabdff1aSopenharmony_ci    src += (2 * src_stride);
5722cabdff1aSopenharmony_ci    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5723cabdff1aSopenharmony_ci                                         const20, const6, const3);
5724cabdff1aSopenharmony_ci    SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5725cabdff1aSopenharmony_ci
5726cabdff1aSopenharmony_ci    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5727cabdff1aSopenharmony_ci    horiz6 = __msa_aver_u_b(inp2, res1);
5728cabdff1aSopenharmony_ci    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
5729cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5730cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
5731cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0, horiz3,
5732cabdff1aSopenharmony_ci                                        horiz4, horiz5, horiz6, horiz3, horiz2,
5733cabdff1aSopenharmony_ci                                        horiz1, horiz0, horiz4, horiz5, horiz6,
5734cabdff1aSopenharmony_ci                                        horiz7, const20, const6, const3);
5735cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5736cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5737cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5738cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5739cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5740cabdff1aSopenharmony_ci
5741cabdff1aSopenharmony_ci    inp0 = LD_UB(src);
5742cabdff1aSopenharmony_ci    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5743cabdff1aSopenharmony_ci                                              const20, const6, const3);
5744cabdff1aSopenharmony_ci    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5745cabdff1aSopenharmony_ci    horiz8 = __msa_aver_u_b(inp0, res0);
5746cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5747cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
5748cabdff1aSopenharmony_ci    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
5749cabdff1aSopenharmony_ci                                        horiz6, horiz7, horiz8, horiz5, horiz4,
5750cabdff1aSopenharmony_ci                                        horiz3, horiz2, horiz6, horiz7, horiz8,
5751cabdff1aSopenharmony_ci                                        horiz8, const20, const6, const3);
5752cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5753cabdff1aSopenharmony_ci    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5754cabdff1aSopenharmony_ci    res0 = __msa_aver_u_b(avg0, res0);
5755cabdff1aSopenharmony_ci    ST_D2(res0, 0, 1, dst, dst_stride);
5756cabdff1aSopenharmony_ci    dst += (2 * dst_stride);
5757cabdff1aSopenharmony_ci
5758cabdff1aSopenharmony_ci    LD_UB2(dst, dst_stride, dst0, dst1);
5759cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
5760cabdff1aSopenharmony_ci    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
5761cabdff1aSopenharmony_ci                                        horiz8, horiz8, horiz7, horiz7, horiz6,
5762cabdff1aSopenharmony_ci                                        horiz5, horiz4, horiz8, horiz8, horiz7,
5763cabdff1aSopenharmony_ci                                        horiz6, const20, const6, const3);
5764cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5765cabdff1aSopenharmony_ci    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
5766cabdff1aSopenharmony_ci    res1 = __msa_aver_u_b(avg1, res1);
5767cabdff1aSopenharmony_ci    ST_D2(res1, 0, 1, dst, dst_stride);
5768cabdff1aSopenharmony_ci}
5769cabdff1aSopenharmony_ci
5770cabdff1aSopenharmony_cistatic void copy_8x8_msa(const uint8_t *src, int32_t src_stride,
5771cabdff1aSopenharmony_ci                         uint8_t *dst, int32_t dst_stride)
5772cabdff1aSopenharmony_ci{
5773cabdff1aSopenharmony_ci    uint64_t src0, src1;
5774cabdff1aSopenharmony_ci    int32_t loop_cnt;
5775cabdff1aSopenharmony_ci
5776cabdff1aSopenharmony_ci    for (loop_cnt = 4; loop_cnt--;) {
5777cabdff1aSopenharmony_ci        src0 = LD(src);
5778cabdff1aSopenharmony_ci        src += src_stride;
5779cabdff1aSopenharmony_ci        src1 = LD(src);
5780cabdff1aSopenharmony_ci        src += src_stride;
5781cabdff1aSopenharmony_ci
5782cabdff1aSopenharmony_ci        SD(src0, dst);
5783cabdff1aSopenharmony_ci        dst += dst_stride;
5784cabdff1aSopenharmony_ci        SD(src1, dst);
5785cabdff1aSopenharmony_ci        dst += dst_stride;
5786cabdff1aSopenharmony_ci    }
5787cabdff1aSopenharmony_ci}
5788cabdff1aSopenharmony_ci
5789cabdff1aSopenharmony_cistatic void copy_16x16_msa(const uint8_t *src, int32_t src_stride,
5790cabdff1aSopenharmony_ci                           uint8_t *dst, int32_t dst_stride)
5791cabdff1aSopenharmony_ci{
5792cabdff1aSopenharmony_ci    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
5793cabdff1aSopenharmony_ci    v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
5794cabdff1aSopenharmony_ci
5795cabdff1aSopenharmony_ci    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
5796cabdff1aSopenharmony_ci    src += (8 * src_stride);
5797cabdff1aSopenharmony_ci    LD_UB8(src, src_stride,
5798cabdff1aSopenharmony_ci           src8, src9, src10, src11, src12, src13, src14, src15);
5799cabdff1aSopenharmony_ci
5800cabdff1aSopenharmony_ci    ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
5801cabdff1aSopenharmony_ci    dst += (8 * dst_stride);
5802cabdff1aSopenharmony_ci    ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15,
5803cabdff1aSopenharmony_ci           dst, dst_stride);
5804cabdff1aSopenharmony_ci}
5805cabdff1aSopenharmony_ci
5806cabdff1aSopenharmony_cistatic void avg_width8_msa(const uint8_t *src, int32_t src_stride,
5807cabdff1aSopenharmony_ci                           uint8_t *dst, int32_t dst_stride,
5808cabdff1aSopenharmony_ci                           int32_t height)
5809cabdff1aSopenharmony_ci{
5810cabdff1aSopenharmony_ci    int32_t cnt;
5811cabdff1aSopenharmony_ci    uint64_t out0, out1, out2, out3;
5812cabdff1aSopenharmony_ci    v16u8 src0, src1, src2, src3;
5813cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3;
5814cabdff1aSopenharmony_ci
5815cabdff1aSopenharmony_ci    for (cnt = (height / 4); cnt--;) {
5816cabdff1aSopenharmony_ci        LD_UB4(src, src_stride, src0, src1, src2, src3);
5817cabdff1aSopenharmony_ci        src += (4 * src_stride);
5818cabdff1aSopenharmony_ci        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
5819cabdff1aSopenharmony_ci
5820cabdff1aSopenharmony_ci        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
5821cabdff1aSopenharmony_ci                    dst0, dst1, dst2, dst3);
5822cabdff1aSopenharmony_ci
5823cabdff1aSopenharmony_ci        out0 = __msa_copy_u_d((v2i64) dst0, 0);
5824cabdff1aSopenharmony_ci        out1 = __msa_copy_u_d((v2i64) dst1, 0);
5825cabdff1aSopenharmony_ci        out2 = __msa_copy_u_d((v2i64) dst2, 0);
5826cabdff1aSopenharmony_ci        out3 = __msa_copy_u_d((v2i64) dst3, 0);
5827cabdff1aSopenharmony_ci        SD4(out0, out1, out2, out3, dst, dst_stride);
5828cabdff1aSopenharmony_ci        dst += (4 * dst_stride);
5829cabdff1aSopenharmony_ci    }
5830cabdff1aSopenharmony_ci}
5831cabdff1aSopenharmony_ci
5832cabdff1aSopenharmony_cistatic void avg_width16_msa(const uint8_t *src, int32_t src_stride,
5833cabdff1aSopenharmony_ci                            uint8_t *dst, int32_t dst_stride,
5834cabdff1aSopenharmony_ci                            int32_t height)
5835cabdff1aSopenharmony_ci{
5836cabdff1aSopenharmony_ci    int32_t cnt;
5837cabdff1aSopenharmony_ci    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
5838cabdff1aSopenharmony_ci    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
5839cabdff1aSopenharmony_ci
5840cabdff1aSopenharmony_ci    for (cnt = (height / 8); cnt--;) {
5841cabdff1aSopenharmony_ci        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
5842cabdff1aSopenharmony_ci        src += (8 * src_stride);
5843cabdff1aSopenharmony_ci        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
5844cabdff1aSopenharmony_ci
5845cabdff1aSopenharmony_ci        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
5846cabdff1aSopenharmony_ci                    dst0, dst1, dst2, dst3);
5847cabdff1aSopenharmony_ci        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
5848cabdff1aSopenharmony_ci                    dst4, dst5, dst6, dst7);
5849cabdff1aSopenharmony_ci        ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
5850cabdff1aSopenharmony_ci        dst += (8 * dst_stride);
5851cabdff1aSopenharmony_ci    }
5852cabdff1aSopenharmony_ci}
5853cabdff1aSopenharmony_ci
5854cabdff1aSopenharmony_civoid ff_copy_16x16_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
5855cabdff1aSopenharmony_ci{
5856cabdff1aSopenharmony_ci    copy_16x16_msa(src, stride, dest, stride);
5857cabdff1aSopenharmony_ci}
5858cabdff1aSopenharmony_ci
5859cabdff1aSopenharmony_civoid ff_copy_8x8_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
5860cabdff1aSopenharmony_ci{
5861cabdff1aSopenharmony_ci    copy_8x8_msa(src, stride, dest, stride);
5862cabdff1aSopenharmony_ci}
5863cabdff1aSopenharmony_ci
5864cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_aver_src0_8width_msa(uint8_t *dest,
5865cabdff1aSopenharmony_ci                                           const uint8_t *src,
5866cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
5867cabdff1aSopenharmony_ci{
5868cabdff1aSopenharmony_ci    horiz_mc_qpel_aver_src0_8width_msa(src, stride, dest, stride, 8);
5869cabdff1aSopenharmony_ci}
5870cabdff1aSopenharmony_ci
5871cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_aver_src0_16width_msa(uint8_t *dest,
5872cabdff1aSopenharmony_ci                                            const uint8_t *src,
5873cabdff1aSopenharmony_ci                                            ptrdiff_t stride)
5874cabdff1aSopenharmony_ci{
5875cabdff1aSopenharmony_ci    horiz_mc_qpel_aver_src0_16width_msa(src, stride, dest, stride, 16);
5876cabdff1aSopenharmony_ci}
5877cabdff1aSopenharmony_ci
5878cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_8width_msa(uint8_t *dest, const uint8_t *src,
5879cabdff1aSopenharmony_ci                                 ptrdiff_t stride)
5880cabdff1aSopenharmony_ci{
5881cabdff1aSopenharmony_ci    horiz_mc_qpel_8width_msa(src, stride, dest, stride, 8);
5882cabdff1aSopenharmony_ci}
5883cabdff1aSopenharmony_ci
5884cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_16width_msa(uint8_t *dest,
5885cabdff1aSopenharmony_ci                                  const uint8_t *src, ptrdiff_t stride)
5886cabdff1aSopenharmony_ci{
5887cabdff1aSopenharmony_ci    horiz_mc_qpel_16width_msa(src, stride, dest, stride, 16);
5888cabdff1aSopenharmony_ci}
5889cabdff1aSopenharmony_ci
5890cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_aver_src1_8width_msa(uint8_t *dest,
5891cabdff1aSopenharmony_ci                                           const uint8_t *src,
5892cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
5893cabdff1aSopenharmony_ci{
5894cabdff1aSopenharmony_ci    horiz_mc_qpel_aver_src1_8width_msa(src, stride, dest, stride, 8);
5895cabdff1aSopenharmony_ci}
5896cabdff1aSopenharmony_ci
5897cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_aver_src1_16width_msa(uint8_t *dest,
5898cabdff1aSopenharmony_ci                                            const uint8_t *src,
5899cabdff1aSopenharmony_ci                                            ptrdiff_t stride)
5900cabdff1aSopenharmony_ci{
5901cabdff1aSopenharmony_ci    horiz_mc_qpel_aver_src1_16width_msa(src, stride, dest, stride, 16);
5902cabdff1aSopenharmony_ci}
5903cabdff1aSopenharmony_ci
5904cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_aver_src0_8width_msa(uint8_t *dest,
5905cabdff1aSopenharmony_ci                                                  const uint8_t *src,
5906cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
5907cabdff1aSopenharmony_ci{
5908cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_aver_src0_8width_msa(src, stride, dest, stride, 8);
5909cabdff1aSopenharmony_ci}
5910cabdff1aSopenharmony_ci
5911cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_aver_src0_16width_msa(uint8_t *dest,
5912cabdff1aSopenharmony_ci                                                   const uint8_t *src,
5913cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
5914cabdff1aSopenharmony_ci{
5915cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_aver_src0_16width_msa(src, stride, dest, stride, 16);
5916cabdff1aSopenharmony_ci}
5917cabdff1aSopenharmony_ci
5918cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_8width_msa(uint8_t *dest,
5919cabdff1aSopenharmony_ci                                        const uint8_t *src, ptrdiff_t stride)
5920cabdff1aSopenharmony_ci{
5921cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_8width_msa(src, stride, dest, stride, 8);
5922cabdff1aSopenharmony_ci}
5923cabdff1aSopenharmony_ci
5924cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_16width_msa(uint8_t *dest,
5925cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
5926cabdff1aSopenharmony_ci{
5927cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_16width_msa(src, stride, dest, stride, 16);
5928cabdff1aSopenharmony_ci}
5929cabdff1aSopenharmony_ci
5930cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_aver_src1_8width_msa(uint8_t *dest,
5931cabdff1aSopenharmony_ci                                                  const uint8_t *src,
5932cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
5933cabdff1aSopenharmony_ci{
5934cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_aver_src1_8width_msa(src, stride, dest, stride, 8);
5935cabdff1aSopenharmony_ci}
5936cabdff1aSopenharmony_ci
5937cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_no_rnd_aver_src1_16width_msa(uint8_t *dest,
5938cabdff1aSopenharmony_ci                                                   const uint8_t *src,
5939cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
5940cabdff1aSopenharmony_ci{
5941cabdff1aSopenharmony_ci    horiz_mc_qpel_no_rnd_aver_src1_16width_msa(src, stride, dest, stride, 16);
5942cabdff1aSopenharmony_ci}
5943cabdff1aSopenharmony_ci
5944cabdff1aSopenharmony_civoid ff_avg_width8_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
5945cabdff1aSopenharmony_ci{
5946cabdff1aSopenharmony_ci    avg_width8_msa(src, stride, dest, stride, 8);
5947cabdff1aSopenharmony_ci}
5948cabdff1aSopenharmony_ci
5949cabdff1aSopenharmony_civoid ff_avg_width16_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
5950cabdff1aSopenharmony_ci{
5951cabdff1aSopenharmony_ci    avg_width16_msa(src, stride, dest, stride, 16);
5952cabdff1aSopenharmony_ci}
5953cabdff1aSopenharmony_ci
5954cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_aver_src0_8width_msa(uint8_t *dest,
5955cabdff1aSopenharmony_ci                                                   const uint8_t *src,
5956cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
5957cabdff1aSopenharmony_ci{
5958cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_aver_src0_8width_msa(src, stride, dest, stride, 8);
5959cabdff1aSopenharmony_ci}
5960cabdff1aSopenharmony_ci
5961cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_aver_src0_16width_msa(uint8_t *dest,
5962cabdff1aSopenharmony_ci                                                    const uint8_t *src,
5963cabdff1aSopenharmony_ci                                                    ptrdiff_t stride)
5964cabdff1aSopenharmony_ci{
5965cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_aver_src0_16width_msa(src, stride, dest, stride, 16);
5966cabdff1aSopenharmony_ci}
5967cabdff1aSopenharmony_ci
5968cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_8width_msa(uint8_t *dest,
5969cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
5970cabdff1aSopenharmony_ci{
5971cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_8width_msa(src, stride, dest, stride, 8);
5972cabdff1aSopenharmony_ci}
5973cabdff1aSopenharmony_ci
5974cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_16width_msa(uint8_t *dest,
5975cabdff1aSopenharmony_ci                                          const uint8_t *src, ptrdiff_t stride)
5976cabdff1aSopenharmony_ci{
5977cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_16width_msa(src, stride, dest, stride, 16);
5978cabdff1aSopenharmony_ci}
5979cabdff1aSopenharmony_ci
5980cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_aver_src1_8width_msa(uint8_t *dest,
5981cabdff1aSopenharmony_ci                                                   const uint8_t *src,
5982cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
5983cabdff1aSopenharmony_ci{
5984cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_aver_src1_8width_msa(src, stride, dest, stride, 8);
5985cabdff1aSopenharmony_ci}
5986cabdff1aSopenharmony_ci
5987cabdff1aSopenharmony_civoid ff_horiz_mc_qpel_avg_dst_aver_src1_16width_msa(uint8_t *dest,
5988cabdff1aSopenharmony_ci                                                    const uint8_t *src,
5989cabdff1aSopenharmony_ci                                                    ptrdiff_t stride)
5990cabdff1aSopenharmony_ci{
5991cabdff1aSopenharmony_ci    horiz_mc_qpel_avg_dst_aver_src1_16width_msa(src, stride, dest, stride, 16);
5992cabdff1aSopenharmony_ci}
5993cabdff1aSopenharmony_ci
5994cabdff1aSopenharmony_ci
5995cabdff1aSopenharmony_civoid ff_vert_mc_qpel_aver_src0_8x8_msa(uint8_t *dest,
5996cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
5997cabdff1aSopenharmony_ci{
5998cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src0_8x8_msa(src, stride, dest, stride);
5999cabdff1aSopenharmony_ci}
6000cabdff1aSopenharmony_ci
6001cabdff1aSopenharmony_civoid ff_vert_mc_qpel_aver_src0_16x16_msa(uint8_t *dest,
6002cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6003cabdff1aSopenharmony_ci{
6004cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src0_16x16_msa(src, stride, dest, stride);
6005cabdff1aSopenharmony_ci}
6006cabdff1aSopenharmony_ci
6007cabdff1aSopenharmony_civoid ff_vert_mc_qpel_8x8_msa(uint8_t *dest, const uint8_t *src,
6008cabdff1aSopenharmony_ci                             ptrdiff_t stride)
6009cabdff1aSopenharmony_ci{
6010cabdff1aSopenharmony_ci    vert_mc_qpel_8x8_msa(src, stride, dest, stride);
6011cabdff1aSopenharmony_ci}
6012cabdff1aSopenharmony_ci
6013cabdff1aSopenharmony_civoid ff_vert_mc_qpel_16x16_msa(uint8_t *dest, const uint8_t *src,
6014cabdff1aSopenharmony_ci                               ptrdiff_t stride)
6015cabdff1aSopenharmony_ci{
6016cabdff1aSopenharmony_ci    vert_mc_qpel_16x16_msa(src, stride, dest, stride);
6017cabdff1aSopenharmony_ci}
6018cabdff1aSopenharmony_ci
6019cabdff1aSopenharmony_civoid ff_vert_mc_qpel_aver_src1_8x8_msa(uint8_t *dest,
6020cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6021cabdff1aSopenharmony_ci{
6022cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src1_8x8_msa(src, stride, dest, stride);
6023cabdff1aSopenharmony_ci}
6024cabdff1aSopenharmony_ci
6025cabdff1aSopenharmony_civoid ff_vert_mc_qpel_aver_src1_16x16_msa(uint8_t *dest,
6026cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6027cabdff1aSopenharmony_ci{
6028cabdff1aSopenharmony_ci    vert_mc_qpel_aver_src1_16x16_msa(src, stride, dest, stride);
6029cabdff1aSopenharmony_ci}
6030cabdff1aSopenharmony_ci
6031cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_aver_src0_8x8_msa(uint8_t *dest,
6032cabdff1aSopenharmony_ci                                              const uint8_t *src,
6033cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6034cabdff1aSopenharmony_ci{
6035cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src0_8x8_msa(src, stride, dest, stride);
6036cabdff1aSopenharmony_ci}
6037cabdff1aSopenharmony_ci
6038cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_aver_src0_16x16_msa(uint8_t *dest,
6039cabdff1aSopenharmony_ci                                                const uint8_t *src,
6040cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6041cabdff1aSopenharmony_ci{
6042cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(src, stride, dest, stride);
6043cabdff1aSopenharmony_ci}
6044cabdff1aSopenharmony_ci
6045cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_8x8_msa(uint8_t *dest,
6046cabdff1aSopenharmony_ci                                    const uint8_t *src, ptrdiff_t stride)
6047cabdff1aSopenharmony_ci{
6048cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_8x8_msa(src, stride, dest, stride);
6049cabdff1aSopenharmony_ci}
6050cabdff1aSopenharmony_ci
6051cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_16x16_msa(uint8_t *dest,
6052cabdff1aSopenharmony_ci                                      const uint8_t *src, ptrdiff_t stride)
6053cabdff1aSopenharmony_ci{
6054cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_16x16_msa(src, stride, dest, stride);
6055cabdff1aSopenharmony_ci}
6056cabdff1aSopenharmony_ci
6057cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_aver_src1_8x8_msa(uint8_t *dest,
6058cabdff1aSopenharmony_ci                                              const uint8_t *src,
6059cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6060cabdff1aSopenharmony_ci{
6061cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src1_8x8_msa(src, stride, dest, stride);
6062cabdff1aSopenharmony_ci}
6063cabdff1aSopenharmony_ci
6064cabdff1aSopenharmony_civoid ff_vert_mc_qpel_no_rnd_aver_src1_16x16_msa(uint8_t *dest,
6065cabdff1aSopenharmony_ci                                                const uint8_t *src,
6066cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6067cabdff1aSopenharmony_ci{
6068cabdff1aSopenharmony_ci    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(src, stride, dest, stride);
6069cabdff1aSopenharmony_ci}
6070cabdff1aSopenharmony_ci
6071cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_aver_src0_8x8_msa(uint8_t *dest,
6072cabdff1aSopenharmony_ci                                               const uint8_t *src,
6073cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6074cabdff1aSopenharmony_ci{
6075cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src0_8x8_msa(src, stride, dest, stride);
6076cabdff1aSopenharmony_ci}
6077cabdff1aSopenharmony_ci
6078cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_aver_src0_16x16_msa(uint8_t *dest,
6079cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6080cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6081cabdff1aSopenharmony_ci{
6082cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(src, stride, dest, stride);
6083cabdff1aSopenharmony_ci}
6084cabdff1aSopenharmony_ci
6085cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_8x8_msa(uint8_t *dest,
6086cabdff1aSopenharmony_ci                                     const uint8_t *src, ptrdiff_t stride)
6087cabdff1aSopenharmony_ci{
6088cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_8x8_msa(src, stride, dest, stride);
6089cabdff1aSopenharmony_ci}
6090cabdff1aSopenharmony_ci
6091cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_16x16_msa(uint8_t *dest,
6092cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6093cabdff1aSopenharmony_ci{
6094cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_16x16_msa(src, stride, dest, stride);
6095cabdff1aSopenharmony_ci}
6096cabdff1aSopenharmony_ci
6097cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_aver_src1_8x8_msa(uint8_t *dest,
6098cabdff1aSopenharmony_ci                                               const uint8_t *src,
6099cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6100cabdff1aSopenharmony_ci{
6101cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src1_8x8_msa(src, stride, dest, stride);
6102cabdff1aSopenharmony_ci}
6103cabdff1aSopenharmony_ci
6104cabdff1aSopenharmony_civoid ff_vert_mc_qpel_avg_dst_aver_src1_16x16_msa(uint8_t *dest,
6105cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6106cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6107cabdff1aSopenharmony_ci{
6108cabdff1aSopenharmony_ci    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(src, stride, dest, stride);
6109cabdff1aSopenharmony_ci}
6110cabdff1aSopenharmony_ci
6111cabdff1aSopenharmony_ci/* HV cases */
6112cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src00_16x16_msa(uint8_t *dest,
6113cabdff1aSopenharmony_ci                                           const uint8_t *src,
6114cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
6115cabdff1aSopenharmony_ci{
6116cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src00_16x16_msa(src, stride, dest, stride);
6117cabdff1aSopenharmony_ci}
6118cabdff1aSopenharmony_ci
6119cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src00_8x8_msa(uint8_t *dest,
6120cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6121cabdff1aSopenharmony_ci{
6122cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src00_8x8_msa(src, stride, dest, stride);
6123cabdff1aSopenharmony_ci}
6124cabdff1aSopenharmony_ci
6125cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_v_src0_16x16_msa(uint8_t *dest,
6126cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6127cabdff1aSopenharmony_ci{
6128cabdff1aSopenharmony_ci    hv_mc_qpel_aver_v_src0_16x16_msa(src, stride, dest, stride);
6129cabdff1aSopenharmony_ci}
6130cabdff1aSopenharmony_ci
6131cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_v_src0_8x8_msa(uint8_t *dest,
6132cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6133cabdff1aSopenharmony_ci{
6134cabdff1aSopenharmony_ci    hv_mc_qpel_aver_v_src0_8x8_msa(src, stride, dest, stride);
6135cabdff1aSopenharmony_ci}
6136cabdff1aSopenharmony_ci
6137cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src10_16x16_msa(uint8_t *dest,
6138cabdff1aSopenharmony_ci                                           const uint8_t *src,
6139cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
6140cabdff1aSopenharmony_ci{
6141cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src10_16x16_msa(src, stride, dest, stride);
6142cabdff1aSopenharmony_ci}
6143cabdff1aSopenharmony_ci
6144cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src10_8x8_msa(uint8_t *dest,
6145cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6146cabdff1aSopenharmony_ci{
6147cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src10_8x8_msa(src, stride, dest, stride);
6148cabdff1aSopenharmony_ci}
6149cabdff1aSopenharmony_ci
6150cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_h_src0_16x16_msa(uint8_t *dest,
6151cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6152cabdff1aSopenharmony_ci{
6153cabdff1aSopenharmony_ci    hv_mc_qpel_aver_h_src0_16x16_msa(src, stride, dest, stride);
6154cabdff1aSopenharmony_ci}
6155cabdff1aSopenharmony_ci
6156cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_h_src0_8x8_msa(uint8_t *dest,
6157cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6158cabdff1aSopenharmony_ci{
6159cabdff1aSopenharmony_ci    hv_mc_qpel_aver_h_src0_8x8_msa(src, stride, dest, stride);
6160cabdff1aSopenharmony_ci}
6161cabdff1aSopenharmony_ci
6162cabdff1aSopenharmony_civoid ff_hv_mc_qpel_16x16_msa(uint8_t *dest, const uint8_t *src,
6163cabdff1aSopenharmony_ci                             ptrdiff_t stride)
6164cabdff1aSopenharmony_ci{
6165cabdff1aSopenharmony_ci    hv_mc_qpel_16x16_msa(src, stride, dest, stride);
6166cabdff1aSopenharmony_ci}
6167cabdff1aSopenharmony_ci
6168cabdff1aSopenharmony_civoid ff_hv_mc_qpel_8x8_msa(uint8_t *dest, const uint8_t *src,
6169cabdff1aSopenharmony_ci                           ptrdiff_t stride)
6170cabdff1aSopenharmony_ci{
6171cabdff1aSopenharmony_ci    hv_mc_qpel_8x8_msa(src, stride, dest, stride);
6172cabdff1aSopenharmony_ci}
6173cabdff1aSopenharmony_ci
6174cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_h_src1_16x16_msa(uint8_t *dest,
6175cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6176cabdff1aSopenharmony_ci{
6177cabdff1aSopenharmony_ci    hv_mc_qpel_aver_h_src1_16x16_msa(src, stride, dest, stride);
6178cabdff1aSopenharmony_ci}
6179cabdff1aSopenharmony_ci
6180cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_h_src1_8x8_msa(uint8_t *dest,
6181cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6182cabdff1aSopenharmony_ci{
6183cabdff1aSopenharmony_ci    hv_mc_qpel_aver_h_src1_8x8_msa(src, stride, dest, stride);
6184cabdff1aSopenharmony_ci}
6185cabdff1aSopenharmony_ci
6186cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src01_16x16_msa(uint8_t *dest,
6187cabdff1aSopenharmony_ci                                           const uint8_t *src,
6188cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
6189cabdff1aSopenharmony_ci{
6190cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src01_16x16_msa(src, stride, dest, stride);
6191cabdff1aSopenharmony_ci}
6192cabdff1aSopenharmony_ci
6193cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src01_8x8_msa(uint8_t *dest,
6194cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6195cabdff1aSopenharmony_ci{
6196cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src01_8x8_msa(src, stride, dest, stride);
6197cabdff1aSopenharmony_ci}
6198cabdff1aSopenharmony_ci
6199cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_v_src1_16x16_msa(uint8_t *dest,
6200cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6201cabdff1aSopenharmony_ci{
6202cabdff1aSopenharmony_ci    hv_mc_qpel_aver_v_src1_16x16_msa(src, stride, dest, stride);
6203cabdff1aSopenharmony_ci}
6204cabdff1aSopenharmony_ci
6205cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_v_src1_8x8_msa(uint8_t *dest,
6206cabdff1aSopenharmony_ci                                       const uint8_t *src, ptrdiff_t stride)
6207cabdff1aSopenharmony_ci{
6208cabdff1aSopenharmony_ci    hv_mc_qpel_aver_v_src1_8x8_msa(src, stride, dest, stride);
6209cabdff1aSopenharmony_ci}
6210cabdff1aSopenharmony_ci
6211cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src11_16x16_msa(uint8_t *dest,
6212cabdff1aSopenharmony_ci                                           const uint8_t *src,
6213cabdff1aSopenharmony_ci                                           ptrdiff_t stride)
6214cabdff1aSopenharmony_ci{
6215cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src11_16x16_msa(src, stride, dest, stride);
6216cabdff1aSopenharmony_ci}
6217cabdff1aSopenharmony_ci
6218cabdff1aSopenharmony_civoid ff_hv_mc_qpel_aver_hv_src11_8x8_msa(uint8_t *dest,
6219cabdff1aSopenharmony_ci                                         const uint8_t *src, ptrdiff_t stride)
6220cabdff1aSopenharmony_ci{
6221cabdff1aSopenharmony_ci    hv_mc_qpel_aver_hv_src11_8x8_msa(src, stride, dest, stride);
6222cabdff1aSopenharmony_ci}
6223cabdff1aSopenharmony_ci
6224cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(uint8_t *dest,
6225cabdff1aSopenharmony_ci                                                   const uint8_t *src,
6226cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
6227cabdff1aSopenharmony_ci{
6228cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(src, stride, dest, stride);
6229cabdff1aSopenharmony_ci}
6230cabdff1aSopenharmony_ci
6231cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(uint8_t *dest,
6232cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6233cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6234cabdff1aSopenharmony_ci{
6235cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(src, stride, dest, stride);
6236cabdff1aSopenharmony_ci}
6237cabdff1aSopenharmony_ci
6238cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(uint8_t *dest,
6239cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6240cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6241cabdff1aSopenharmony_ci{
6242cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(src, stride, dest, stride);
6243cabdff1aSopenharmony_ci}
6244cabdff1aSopenharmony_ci
6245cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(uint8_t *dest,
6246cabdff1aSopenharmony_ci                                               const uint8_t *src,
6247cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6248cabdff1aSopenharmony_ci{
6249cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(src, stride, dest, stride);
6250cabdff1aSopenharmony_ci}
6251cabdff1aSopenharmony_ci
6252cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(uint8_t *dest,
6253cabdff1aSopenharmony_ci                                                   const uint8_t *src,
6254cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
6255cabdff1aSopenharmony_ci{
6256cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(src, stride, dest, stride);
6257cabdff1aSopenharmony_ci}
6258cabdff1aSopenharmony_ci
6259cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(uint8_t *dest,
6260cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6261cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6262cabdff1aSopenharmony_ci{
6263cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(src, stride, dest, stride);
6264cabdff1aSopenharmony_ci}
6265cabdff1aSopenharmony_ci
6266cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(uint8_t *dest,
6267cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6268cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6269cabdff1aSopenharmony_ci{
6270cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(src, stride, dest, stride);
6271cabdff1aSopenharmony_ci}
6272cabdff1aSopenharmony_ci
6273cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(uint8_t *dest,
6274cabdff1aSopenharmony_ci                                               const uint8_t *src,
6275cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6276cabdff1aSopenharmony_ci{
6277cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(src, stride, dest, stride);
6278cabdff1aSopenharmony_ci}
6279cabdff1aSopenharmony_ci
6280cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_16x16_msa(uint8_t *dest,
6281cabdff1aSopenharmony_ci                                     const uint8_t *src, ptrdiff_t stride)
6282cabdff1aSopenharmony_ci{
6283cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_16x16_msa(src, stride, dest, stride);
6284cabdff1aSopenharmony_ci}
6285cabdff1aSopenharmony_ci
6286cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_8x8_msa(uint8_t *dest,
6287cabdff1aSopenharmony_ci                                   const uint8_t *src, ptrdiff_t stride)
6288cabdff1aSopenharmony_ci{
6289cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_8x8_msa(src, stride, dest, stride);
6290cabdff1aSopenharmony_ci}
6291cabdff1aSopenharmony_ci
6292cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(uint8_t *dest,
6293cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6294cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6295cabdff1aSopenharmony_ci{
6296cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(src, stride, dest, stride);
6297cabdff1aSopenharmony_ci}
6298cabdff1aSopenharmony_ci
6299cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(uint8_t *dest,
6300cabdff1aSopenharmony_ci                                               const uint8_t *src,
6301cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6302cabdff1aSopenharmony_ci{
6303cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(src, stride, dest, stride);
6304cabdff1aSopenharmony_ci}
6305cabdff1aSopenharmony_ci
6306cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(uint8_t *dest,
6307cabdff1aSopenharmony_ci                                                   const uint8_t *src,
6308cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
6309cabdff1aSopenharmony_ci{
6310cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(src, stride, dest, stride);
6311cabdff1aSopenharmony_ci}
6312cabdff1aSopenharmony_ci
6313cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(uint8_t *dest,
6314cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6315cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6316cabdff1aSopenharmony_ci{
6317cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(src, stride, dest, stride);
6318cabdff1aSopenharmony_ci}
6319cabdff1aSopenharmony_ci
6320cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(uint8_t *dest,
6321cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6322cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6323cabdff1aSopenharmony_ci{
6324cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(src, stride, dest, stride);
6325cabdff1aSopenharmony_ci}
6326cabdff1aSopenharmony_ci
6327cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(uint8_t *dest,
6328cabdff1aSopenharmony_ci                                               const uint8_t *src,
6329cabdff1aSopenharmony_ci                                               ptrdiff_t stride)
6330cabdff1aSopenharmony_ci{
6331cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(src, stride, dest, stride);
6332cabdff1aSopenharmony_ci}
6333cabdff1aSopenharmony_ci
6334cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(uint8_t *dest,
6335cabdff1aSopenharmony_ci                                                   const uint8_t *src,
6336cabdff1aSopenharmony_ci                                                   ptrdiff_t stride)
6337cabdff1aSopenharmony_ci{
6338cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(src, stride, dest, stride);
6339cabdff1aSopenharmony_ci}
6340cabdff1aSopenharmony_ci
6341cabdff1aSopenharmony_civoid ff_hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(uint8_t *dest,
6342cabdff1aSopenharmony_ci                                                 const uint8_t *src,
6343cabdff1aSopenharmony_ci                                                 ptrdiff_t stride)
6344cabdff1aSopenharmony_ci{
6345cabdff1aSopenharmony_ci    hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(src, stride, dest, stride);
6346cabdff1aSopenharmony_ci}
6347cabdff1aSopenharmony_ci
6348cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(uint8_t *dest,
6349cabdff1aSopenharmony_ci                                                  const uint8_t *src,
6350cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
6351cabdff1aSopenharmony_ci{
6352cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(src, stride, dest, stride);
6353cabdff1aSopenharmony_ci}
6354cabdff1aSopenharmony_ci
6355cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(uint8_t *dest,
6356cabdff1aSopenharmony_ci                                                const uint8_t *src,
6357cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6358cabdff1aSopenharmony_ci{
6359cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(src, stride, dest, stride);
6360cabdff1aSopenharmony_ci}
6361cabdff1aSopenharmony_ci
6362cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(uint8_t *dest,
6363cabdff1aSopenharmony_ci                                                const uint8_t *src,
6364cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6365cabdff1aSopenharmony_ci{
6366cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(src, stride, dest, stride);
6367cabdff1aSopenharmony_ci}
6368cabdff1aSopenharmony_ci
6369cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(uint8_t *dest,
6370cabdff1aSopenharmony_ci                                              const uint8_t *src,
6371cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6372cabdff1aSopenharmony_ci{
6373cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(src, stride, dest, stride);
6374cabdff1aSopenharmony_ci}
6375cabdff1aSopenharmony_ci
6376cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(uint8_t *dest,
6377cabdff1aSopenharmony_ci                                                  const uint8_t *src,
6378cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
6379cabdff1aSopenharmony_ci{
6380cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(src, stride, dest, stride);
6381cabdff1aSopenharmony_ci}
6382cabdff1aSopenharmony_ci
6383cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(uint8_t *dest,
6384cabdff1aSopenharmony_ci                                                const uint8_t *src,
6385cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6386cabdff1aSopenharmony_ci{
6387cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(src, stride, dest, stride);
6388cabdff1aSopenharmony_ci}
6389cabdff1aSopenharmony_ci
6390cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(uint8_t *dest,
6391cabdff1aSopenharmony_ci                                                const uint8_t *src,
6392cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6393cabdff1aSopenharmony_ci{
6394cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(src, stride, dest, stride);
6395cabdff1aSopenharmony_ci}
6396cabdff1aSopenharmony_ci
6397cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(uint8_t *dest,
6398cabdff1aSopenharmony_ci                                              const uint8_t *src,
6399cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6400cabdff1aSopenharmony_ci{
6401cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(src, stride, dest, stride);
6402cabdff1aSopenharmony_ci}
6403cabdff1aSopenharmony_ci
6404cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_16x16_msa(uint8_t *dest,
6405cabdff1aSopenharmony_ci                                    const uint8_t *src, ptrdiff_t stride)
6406cabdff1aSopenharmony_ci{
6407cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_16x16_msa(src, stride, dest, stride);
6408cabdff1aSopenharmony_ci}
6409cabdff1aSopenharmony_ci
6410cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_8x8_msa(uint8_t *dest,
6411cabdff1aSopenharmony_ci                                  const uint8_t *src, ptrdiff_t stride)
6412cabdff1aSopenharmony_ci{
6413cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_8x8_msa(src, stride, dest, stride);
6414cabdff1aSopenharmony_ci}
6415cabdff1aSopenharmony_ci
6416cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(uint8_t *dest,
6417cabdff1aSopenharmony_ci                                                const uint8_t *src,
6418cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6419cabdff1aSopenharmony_ci{
6420cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(src, stride, dest, stride);
6421cabdff1aSopenharmony_ci}
6422cabdff1aSopenharmony_ci
6423cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(uint8_t *dest,
6424cabdff1aSopenharmony_ci                                              const uint8_t *src,
6425cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6426cabdff1aSopenharmony_ci{
6427cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(src, stride, dest, stride);
6428cabdff1aSopenharmony_ci}
6429cabdff1aSopenharmony_ci
6430cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(uint8_t *dest,
6431cabdff1aSopenharmony_ci                                                  const uint8_t *src,
6432cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
6433cabdff1aSopenharmony_ci{
6434cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(src, stride, dest, stride);
6435cabdff1aSopenharmony_ci}
6436cabdff1aSopenharmony_ci
6437cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(uint8_t *dest,
6438cabdff1aSopenharmony_ci                                                const uint8_t *src,
6439cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6440cabdff1aSopenharmony_ci{
6441cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(src, stride, dest, stride);
6442cabdff1aSopenharmony_ci}
6443cabdff1aSopenharmony_ci
6444cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(uint8_t *dest,
6445cabdff1aSopenharmony_ci                                                const uint8_t *src,
6446cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6447cabdff1aSopenharmony_ci{
6448cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(src, stride, dest, stride);
6449cabdff1aSopenharmony_ci}
6450cabdff1aSopenharmony_ci
6451cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(uint8_t *dest,
6452cabdff1aSopenharmony_ci                                              const uint8_t *src,
6453cabdff1aSopenharmony_ci                                              ptrdiff_t stride)
6454cabdff1aSopenharmony_ci{
6455cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(src, stride, dest, stride);
6456cabdff1aSopenharmony_ci}
6457cabdff1aSopenharmony_ci
6458cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(uint8_t *dest,
6459cabdff1aSopenharmony_ci                                                  const uint8_t *src,
6460cabdff1aSopenharmony_ci                                                  ptrdiff_t stride)
6461cabdff1aSopenharmony_ci{
6462cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(src, stride, dest, stride);
6463cabdff1aSopenharmony_ci}
6464cabdff1aSopenharmony_ci
6465cabdff1aSopenharmony_civoid ff_hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(uint8_t *dest,
6466cabdff1aSopenharmony_ci                                                const uint8_t *src,
6467cabdff1aSopenharmony_ci                                                ptrdiff_t stride)
6468cabdff1aSopenharmony_ci{
6469cabdff1aSopenharmony_ci    hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(src, stride, dest, stride);
6470cabdff1aSopenharmony_ci}
6471