1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2021 Loongson Technology Corporation Limited
3cabdff1aSopenharmony_ci * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "libavutil/loongarch/loongson_intrinsics.h"
23cabdff1aSopenharmony_ci#include "hpeldsp_lasx.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_cistatic av_always_inline void
26cabdff1aSopenharmony_ciput_pixels8_l2_8_lsx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
27cabdff1aSopenharmony_ci                     int dst_stride, int src_stride1, int src_stride2, int h)
28cabdff1aSopenharmony_ci{
29cabdff1aSopenharmony_ci    int stride1_2, stride1_3, stride1_4;
30cabdff1aSopenharmony_ci    int stride2_2, stride2_3, stride2_4;
31cabdff1aSopenharmony_ci    __asm__ volatile (
32cabdff1aSopenharmony_ci        "slli.d   %[stride1_2],  %[srcStride1],     1             \n\t"
33cabdff1aSopenharmony_ci        "slli.d   %[stride2_2],  %[srcStride2],     1             \n\t"
34cabdff1aSopenharmony_ci        "add.d    %[stride1_3],  %[stride1_2],      %[srcStride1] \n\t"
35cabdff1aSopenharmony_ci        "add.d    %[stride2_3],  %[stride2_2],      %[srcStride2] \n\t"
36cabdff1aSopenharmony_ci        "slli.d   %[stride1_4],  %[stride1_2],      1             \n\t"
37cabdff1aSopenharmony_ci        "slli.d   %[stride2_4],  %[stride2_2],      1             \n\t"
38cabdff1aSopenharmony_ci        "1:                                                       \n\t"
39cabdff1aSopenharmony_ci        "vld      $vr0,          %[src1],           0             \n\t"
40cabdff1aSopenharmony_ci        "vldx     $vr1,          %[src1],           %[srcStride1] \n\t"
41cabdff1aSopenharmony_ci        "vldx     $vr2,          %[src1],           %[stride1_2]  \n\t"
42cabdff1aSopenharmony_ci        "vldx     $vr3,          %[src1],           %[stride1_3]  \n\t"
43cabdff1aSopenharmony_ci        "add.d    %[src1],       %[src1],           %[stride1_4]  \n\t"
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ci        "vld      $vr4,          %[src2],           0             \n\t"
46cabdff1aSopenharmony_ci        "vldx     $vr5,          %[src2],           %[srcStride2] \n\t"
47cabdff1aSopenharmony_ci        "vldx     $vr6,          %[src2],           %[stride2_2]  \n\t"
48cabdff1aSopenharmony_ci        "vldx     $vr7,          %[src2],           %[stride2_3]  \n\t"
49cabdff1aSopenharmony_ci        "add.d    %[src2],       %[src2],           %[stride2_4]  \n\t"
50cabdff1aSopenharmony_ci
51cabdff1aSopenharmony_ci        "addi.d   %[h],          %[h],              -4            \n\t"
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci        "vavgr.bu $vr0,          $vr4,              $vr0          \n\t"
54cabdff1aSopenharmony_ci        "vavgr.bu $vr1,          $vr5,              $vr1          \n\t"
55cabdff1aSopenharmony_ci        "vavgr.bu $vr2,          $vr6,              $vr2          \n\t"
56cabdff1aSopenharmony_ci        "vavgr.bu $vr3,          $vr7,              $vr3          \n\t"
57cabdff1aSopenharmony_ci        "vstelm.d $vr0,          %[dst],            0,  0         \n\t"
58cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[dstStride]  \n\t"
59cabdff1aSopenharmony_ci        "vstelm.d $vr1,          %[dst],            0,  0         \n\t"
60cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[dstStride]  \n\t"
61cabdff1aSopenharmony_ci        "vstelm.d $vr2,          %[dst],            0,  0         \n\t"
62cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[dstStride]  \n\t"
63cabdff1aSopenharmony_ci        "vstelm.d $vr3,          %[dst],            0,  0         \n\t"
64cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[dstStride]  \n\t"
65cabdff1aSopenharmony_ci        "bnez     %[h],                             1b            \n\t"
66cabdff1aSopenharmony_ci
67cabdff1aSopenharmony_ci        : [dst]"+&r"(dst), [src2]"+&r"(src2), [src1]"+&r"(src1),
68cabdff1aSopenharmony_ci          [h]"+&r"(h), [stride1_2]"=&r"(stride1_2),
69cabdff1aSopenharmony_ci          [stride1_3]"=&r"(stride1_3), [stride1_4]"=&r"(stride1_4),
70cabdff1aSopenharmony_ci          [stride2_2]"=&r"(stride2_2), [stride2_3]"=&r"(stride2_3),
71cabdff1aSopenharmony_ci          [stride2_4]"=&r"(stride2_4)
72cabdff1aSopenharmony_ci        : [dstStride]"r"(dst_stride), [srcStride1]"r"(src_stride1),
73cabdff1aSopenharmony_ci          [srcStride2]"r"(src_stride2)
74cabdff1aSopenharmony_ci        : "memory"
75cabdff1aSopenharmony_ci    );
76cabdff1aSopenharmony_ci}
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_cistatic av_always_inline void
79cabdff1aSopenharmony_ciput_pixels16_l2_8_lsx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
80cabdff1aSopenharmony_ci                      int dst_stride, int src_stride1, int src_stride2, int h)
81cabdff1aSopenharmony_ci{
82cabdff1aSopenharmony_ci    int stride1_2, stride1_3, stride1_4;
83cabdff1aSopenharmony_ci    int stride2_2, stride2_3, stride2_4;
84cabdff1aSopenharmony_ci    int dststride2, dststride3, dststride4;
85cabdff1aSopenharmony_ci    __asm__ volatile (
86cabdff1aSopenharmony_ci        "slli.d   %[stride1_2],  %[srcStride1],     1             \n\t"
87cabdff1aSopenharmony_ci        "slli.d   %[stride2_2],  %[srcStride2],     1             \n\t"
88cabdff1aSopenharmony_ci        "slli.d   %[dststride2], %[dstStride],      1             \n\t"
89cabdff1aSopenharmony_ci        "add.d    %[stride1_3],  %[stride1_2],      %[srcStride1] \n\t"
90cabdff1aSopenharmony_ci        "add.d    %[stride2_3],  %[stride2_2],      %[srcStride2] \n\t"
91cabdff1aSopenharmony_ci        "add.d    %[dststride3], %[dststride2],     %[dstStride]  \n\t"
92cabdff1aSopenharmony_ci        "slli.d   %[stride1_4],  %[stride1_2],      1             \n\t"
93cabdff1aSopenharmony_ci        "slli.d   %[stride2_4],  %[stride2_2],      1             \n\t"
94cabdff1aSopenharmony_ci        "slli.d   %[dststride4], %[dststride2],     1             \n\t"
95cabdff1aSopenharmony_ci        "1:                                                       \n\t"
96cabdff1aSopenharmony_ci        "vld      $vr0,          %[src1],           0             \n\t"
97cabdff1aSopenharmony_ci        "vldx     $vr1,          %[src1],           %[srcStride1] \n\t"
98cabdff1aSopenharmony_ci        "vldx     $vr2,          %[src1],           %[stride1_2]  \n\t"
99cabdff1aSopenharmony_ci        "vldx     $vr3,          %[src1],           %[stride1_3]  \n\t"
100cabdff1aSopenharmony_ci        "add.d    %[src1],       %[src1],           %[stride1_4]  \n\t"
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ci        "vld      $vr4,          %[src2],           0             \n\t"
103cabdff1aSopenharmony_ci        "vldx     $vr5,          %[src2],           %[srcStride2] \n\t"
104cabdff1aSopenharmony_ci        "vldx     $vr6,          %[src2],           %[stride2_2]  \n\t"
105cabdff1aSopenharmony_ci        "vldx     $vr7,          %[src2],           %[stride2_3]  \n\t"
106cabdff1aSopenharmony_ci        "add.d    %[src2],       %[src2],           %[stride2_4]  \n\t"
107cabdff1aSopenharmony_ci
108cabdff1aSopenharmony_ci        "addi.d   %[h],          %[h],              -4            \n\t"
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci        "vavgr.bu $vr0,          $vr4,              $vr0          \n\t"
111cabdff1aSopenharmony_ci        "vavgr.bu $vr1,          $vr5,              $vr1          \n\t"
112cabdff1aSopenharmony_ci        "vavgr.bu $vr2,          $vr6,              $vr2          \n\t"
113cabdff1aSopenharmony_ci        "vavgr.bu $vr3,          $vr7,              $vr3          \n\t"
114cabdff1aSopenharmony_ci        "vst      $vr0,          %[dst],            0             \n\t"
115cabdff1aSopenharmony_ci        "vstx     $vr1,          %[dst],            %[dstStride]  \n\t"
116cabdff1aSopenharmony_ci        "vstx     $vr2,          %[dst],            %[dststride2] \n\t"
117cabdff1aSopenharmony_ci        "vstx     $vr3,          %[dst],            %[dststride3] \n\t"
118cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[dststride4] \n\t"
119cabdff1aSopenharmony_ci        "bnez     %[h],                             1b            \n\t"
120cabdff1aSopenharmony_ci
121cabdff1aSopenharmony_ci        : [dst]"+&r"(dst), [src2]"+&r"(src2), [src1]"+&r"(src1),
122cabdff1aSopenharmony_ci          [h]"+&r"(h), [stride1_2]"=&r"(stride1_2),
123cabdff1aSopenharmony_ci          [stride1_3]"=&r"(stride1_3), [stride1_4]"=&r"(stride1_4),
124cabdff1aSopenharmony_ci          [stride2_2]"=&r"(stride2_2), [stride2_3]"=&r"(stride2_3),
125cabdff1aSopenharmony_ci          [stride2_4]"=&r"(stride2_4), [dststride2]"=&r"(dststride2),
126cabdff1aSopenharmony_ci          [dststride3]"=&r"(dststride3), [dststride4]"=&r"(dststride4)
127cabdff1aSopenharmony_ci        : [dstStride]"r"(dst_stride), [srcStride1]"r"(src_stride1),
128cabdff1aSopenharmony_ci          [srcStride2]"r"(src_stride2)
129cabdff1aSopenharmony_ci        : "memory"
130cabdff1aSopenharmony_ci    );
131cabdff1aSopenharmony_ci}
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_civoid ff_put_pixels8_8_lasx(uint8_t *block, const uint8_t *pixels,
134cabdff1aSopenharmony_ci                           ptrdiff_t line_size, int h)
135cabdff1aSopenharmony_ci{
136cabdff1aSopenharmony_ci    uint64_t tmp[8];
137cabdff1aSopenharmony_ci    int h_8 = h >> 3;
138cabdff1aSopenharmony_ci    int res = h & 7;
139cabdff1aSopenharmony_ci    ptrdiff_t stride2, stride3, stride4;
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ci    __asm__ volatile (
142cabdff1aSopenharmony_ci        "beqz     %[h_8],                           2f            \n\t"
143cabdff1aSopenharmony_ci        "slli.d   %[stride2],    %[stride],         1             \n\t"
144cabdff1aSopenharmony_ci        "add.d    %[stride3],    %[stride2],        %[stride]     \n\t"
145cabdff1aSopenharmony_ci        "slli.d   %[stride4],    %[stride2],        1             \n\t"
146cabdff1aSopenharmony_ci        "1:                                                       \n\t"
147cabdff1aSopenharmony_ci        "ld.d     %[tmp0],       %[src],            0x0           \n\t"
148cabdff1aSopenharmony_ci        "ldx.d    %[tmp1],       %[src],            %[stride]     \n\t"
149cabdff1aSopenharmony_ci        "ldx.d    %[tmp2],       %[src],            %[stride2]    \n\t"
150cabdff1aSopenharmony_ci        "ldx.d    %[tmp3],       %[src],            %[stride3]    \n\t"
151cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride4]    \n\t"
152cabdff1aSopenharmony_ci        "ld.d     %[tmp4],       %[src],            0x0           \n\t"
153cabdff1aSopenharmony_ci        "ldx.d    %[tmp5],       %[src],            %[stride]     \n\t"
154cabdff1aSopenharmony_ci        "ldx.d    %[tmp6],       %[src],            %[stride2]    \n\t"
155cabdff1aSopenharmony_ci        "ldx.d    %[tmp7],       %[src],            %[stride3]    \n\t"
156cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride4]    \n\t"
157cabdff1aSopenharmony_ci
158cabdff1aSopenharmony_ci        "addi.d   %[h_8],        %[h_8],            -1            \n\t"
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci        "st.d     %[tmp0],       %[dst],            0x0           \n\t"
161cabdff1aSopenharmony_ci        "stx.d    %[tmp1],       %[dst],            %[stride]     \n\t"
162cabdff1aSopenharmony_ci        "stx.d    %[tmp2],       %[dst],            %[stride2]    \n\t"
163cabdff1aSopenharmony_ci        "stx.d    %[tmp3],       %[dst],            %[stride3]    \n\t"
164cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride4]    \n\t"
165cabdff1aSopenharmony_ci        "st.d     %[tmp4],       %[dst],            0x0           \n\t"
166cabdff1aSopenharmony_ci        "stx.d    %[tmp5],       %[dst],            %[stride]     \n\t"
167cabdff1aSopenharmony_ci        "stx.d    %[tmp6],       %[dst],            %[stride2]    \n\t"
168cabdff1aSopenharmony_ci        "stx.d    %[tmp7],       %[dst],            %[stride3]    \n\t"
169cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride4]    \n\t"
170cabdff1aSopenharmony_ci        "bnez     %[h_8],        1b                               \n\t"
171cabdff1aSopenharmony_ci
172cabdff1aSopenharmony_ci        "2:                                                       \n\t"
173cabdff1aSopenharmony_ci        "beqz     %[res],        4f                               \n\t"
174cabdff1aSopenharmony_ci        "3:                                                       \n\t"
175cabdff1aSopenharmony_ci        "ld.d     %[tmp0],       %[src],            0x0           \n\t"
176cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride]     \n\t"
177cabdff1aSopenharmony_ci        "addi.d   %[res],        %[res],            -1            \n\t"
178cabdff1aSopenharmony_ci        "st.d     %[tmp0],       %[dst],            0x0           \n\t"
179cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride]     \n\t"
180cabdff1aSopenharmony_ci        "bnez     %[res],        3b                               \n\t"
181cabdff1aSopenharmony_ci        "4:                                                       \n\t"
182cabdff1aSopenharmony_ci        : [tmp0]"=&r"(tmp[0]),        [tmp1]"=&r"(tmp[1]),
183cabdff1aSopenharmony_ci          [tmp2]"=&r"(tmp[2]),        [tmp3]"=&r"(tmp[3]),
184cabdff1aSopenharmony_ci          [tmp4]"=&r"(tmp[4]),        [tmp5]"=&r"(tmp[5]),
185cabdff1aSopenharmony_ci          [tmp6]"=&r"(tmp[6]),        [tmp7]"=&r"(tmp[7]),
186cabdff1aSopenharmony_ci          [dst]"+&r"(block),          [src]"+&r"(pixels),
187cabdff1aSopenharmony_ci          [h_8]"+&r"(h_8),            [res]"+&r"(res),
188cabdff1aSopenharmony_ci          [stride2]"=&r"(stride2),    [stride3]"=&r"(stride3),
189cabdff1aSopenharmony_ci          [stride4]"=&r"(stride4)
190cabdff1aSopenharmony_ci        : [stride]"r"(line_size)
191cabdff1aSopenharmony_ci        : "memory"
192cabdff1aSopenharmony_ci    );
193cabdff1aSopenharmony_ci}
194cabdff1aSopenharmony_ci
195cabdff1aSopenharmony_civoid ff_put_pixels16_8_lsx(uint8_t *block, const uint8_t *pixels,
196cabdff1aSopenharmony_ci                           ptrdiff_t line_size, int h)
197cabdff1aSopenharmony_ci{
198cabdff1aSopenharmony_ci    int h_8 = h >> 3;
199cabdff1aSopenharmony_ci    int res = h & 7;
200cabdff1aSopenharmony_ci    ptrdiff_t stride2, stride3, stride4;
201cabdff1aSopenharmony_ci
202cabdff1aSopenharmony_ci    __asm__ volatile (
203cabdff1aSopenharmony_ci        "beqz     %[h_8],                           2f            \n\t"
204cabdff1aSopenharmony_ci        "slli.d   %[stride2],    %[stride],         1             \n\t"
205cabdff1aSopenharmony_ci        "add.d    %[stride3],    %[stride2],        %[stride]     \n\t"
206cabdff1aSopenharmony_ci        "slli.d   %[stride4],    %[stride2],        1             \n\t"
207cabdff1aSopenharmony_ci        "1:                                                       \n\t"
208cabdff1aSopenharmony_ci        "vld      $vr0,          %[src],            0x0           \n\t"
209cabdff1aSopenharmony_ci        "vldx     $vr1,          %[src],            %[stride]     \n\t"
210cabdff1aSopenharmony_ci        "vldx     $vr2,          %[src],            %[stride2]    \n\t"
211cabdff1aSopenharmony_ci        "vldx     $vr3,          %[src],            %[stride3]    \n\t"
212cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride4]    \n\t"
213cabdff1aSopenharmony_ci        "vld      $vr4,          %[src],            0x0           \n\t"
214cabdff1aSopenharmony_ci        "vldx     $vr5,          %[src],            %[stride]     \n\t"
215cabdff1aSopenharmony_ci        "vldx     $vr6,          %[src],            %[stride2]    \n\t"
216cabdff1aSopenharmony_ci        "vldx     $vr7,          %[src],            %[stride3]    \n\t"
217cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride4]    \n\t"
218cabdff1aSopenharmony_ci
219cabdff1aSopenharmony_ci        "addi.d   %[h_8],        %[h_8],            -1            \n\t"
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_ci        "vst      $vr0,          %[dst],            0x0           \n\t"
222cabdff1aSopenharmony_ci        "vstx     $vr1,          %[dst],            %[stride]     \n\t"
223cabdff1aSopenharmony_ci        "vstx     $vr2,          %[dst],            %[stride2]    \n\t"
224cabdff1aSopenharmony_ci        "vstx     $vr3,          %[dst],            %[stride3]    \n\t"
225cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride4]    \n\t"
226cabdff1aSopenharmony_ci        "vst      $vr4,          %[dst],            0x0           \n\t"
227cabdff1aSopenharmony_ci        "vstx     $vr5,          %[dst],            %[stride]     \n\t"
228cabdff1aSopenharmony_ci        "vstx     $vr6,          %[dst],            %[stride2]    \n\t"
229cabdff1aSopenharmony_ci        "vstx     $vr7,          %[dst],            %[stride3]    \n\t"
230cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride4]    \n\t"
231cabdff1aSopenharmony_ci        "bnez     %[h_8],        1b                               \n\t"
232cabdff1aSopenharmony_ci
233cabdff1aSopenharmony_ci        "2:                                                       \n\t"
234cabdff1aSopenharmony_ci        "beqz     %[res],        4f                               \n\t"
235cabdff1aSopenharmony_ci        "3:                                                       \n\t"
236cabdff1aSopenharmony_ci        "vld      $vr0,          %[src],            0x0           \n\t"
237cabdff1aSopenharmony_ci        "add.d    %[src],        %[src],            %[stride]     \n\t"
238cabdff1aSopenharmony_ci        "addi.d   %[res],        %[res],            -1            \n\t"
239cabdff1aSopenharmony_ci        "vst      $vr0,          %[dst],            0x0           \n\t"
240cabdff1aSopenharmony_ci        "add.d    %[dst],        %[dst],            %[stride]     \n\t"
241cabdff1aSopenharmony_ci        "bnez     %[res],        3b                               \n\t"
242cabdff1aSopenharmony_ci        "4:                                                       \n\t"
243cabdff1aSopenharmony_ci        : [dst]"+&r"(block),          [src]"+&r"(pixels),
244cabdff1aSopenharmony_ci          [h_8]"+&r"(h_8),            [res]"+&r"(res),
245cabdff1aSopenharmony_ci          [stride2]"=&r"(stride2),    [stride3]"=&r"(stride3),
246cabdff1aSopenharmony_ci          [stride4]"=&r"(stride4)
247cabdff1aSopenharmony_ci        : [stride]"r"(line_size)
248cabdff1aSopenharmony_ci        : "memory"
249cabdff1aSopenharmony_ci    );
250cabdff1aSopenharmony_ci}
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_civoid ff_put_pixels8_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
253cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h)
254cabdff1aSopenharmony_ci{
255cabdff1aSopenharmony_ci    put_pixels8_l2_8_lsx(block, pixels, pixels + 1, line_size, line_size,
256cabdff1aSopenharmony_ci                         line_size, h);
257cabdff1aSopenharmony_ci}
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_civoid ff_put_pixels8_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
260cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h)
261cabdff1aSopenharmony_ci{
262cabdff1aSopenharmony_ci    put_pixels8_l2_8_lsx(block, pixels, pixels + line_size, line_size,
263cabdff1aSopenharmony_ci                         line_size, line_size, h);
264cabdff1aSopenharmony_ci}
265cabdff1aSopenharmony_ci
266cabdff1aSopenharmony_civoid ff_put_pixels16_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
267cabdff1aSopenharmony_ci                               ptrdiff_t line_size, int h)
268cabdff1aSopenharmony_ci{
269cabdff1aSopenharmony_ci    put_pixels16_l2_8_lsx(block, pixels, pixels + 1, line_size, line_size,
270cabdff1aSopenharmony_ci                          line_size, h);
271cabdff1aSopenharmony_ci}
272cabdff1aSopenharmony_ci
273cabdff1aSopenharmony_civoid ff_put_pixels16_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
274cabdff1aSopenharmony_ci                               ptrdiff_t line_size, int h)
275cabdff1aSopenharmony_ci{
276cabdff1aSopenharmony_ci    put_pixels16_l2_8_lsx(block, pixels, pixels + line_size, line_size,
277cabdff1aSopenharmony_ci                          line_size, line_size, h);
278cabdff1aSopenharmony_ci}
279cabdff1aSopenharmony_ci
280cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_16x16_lasx(const uint8_t *src,
281cabdff1aSopenharmony_ci                                            int32_t src_stride,
282cabdff1aSopenharmony_ci                                            uint8_t *dst, int32_t dst_stride)
283cabdff1aSopenharmony_ci{
284cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
285cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
286cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
287cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
288cabdff1aSopenharmony_ci    uint8_t *_src = (uint8_t*)src;
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
291cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
292cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
293cabdff1aSopenharmony_ci    _src += 1;
294cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
295cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
296cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
297cabdff1aSopenharmony_ci    _src += (src_stride_4x -1);
298cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5,
299cabdff1aSopenharmony_ci              src4, 0x20, src7, src6, 0x20, src0, src1, src2, src3);
300cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
301cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
302cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
303cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
304cabdff1aSopenharmony_ci    dst += dst_stride;
305cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
306cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
307cabdff1aSopenharmony_ci    dst += dst_stride;
308cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
309cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
310cabdff1aSopenharmony_ci    dst += dst_stride;
311cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
312cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
313cabdff1aSopenharmony_ci    dst += dst_stride;
314cabdff1aSopenharmony_ci
315cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
316cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
317cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
318cabdff1aSopenharmony_ci    _src += 1;
319cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
320cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
321cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
322cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
323cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
324cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
325cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
326cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
327cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
328cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
329cabdff1aSopenharmony_ci    dst += dst_stride;
330cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
331cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
332cabdff1aSopenharmony_ci    dst += dst_stride;
333cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
334cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
335cabdff1aSopenharmony_ci    dst += dst_stride;
336cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
337cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
338cabdff1aSopenharmony_ci    dst += dst_stride;
339cabdff1aSopenharmony_ci
340cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
341cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
342cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
343cabdff1aSopenharmony_ci    _src += 1;
344cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
345cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
346cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
347cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
348cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
349cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
350cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
351cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
352cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
353cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
354cabdff1aSopenharmony_ci    dst += dst_stride;
355cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
356cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
357cabdff1aSopenharmony_ci    dst += dst_stride;
358cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
359cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
360cabdff1aSopenharmony_ci    dst += dst_stride;
361cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
362cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
363cabdff1aSopenharmony_ci    dst += dst_stride;
364cabdff1aSopenharmony_ci
365cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
366cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
367cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
368cabdff1aSopenharmony_ci    _src += 1;
369cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
370cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
371cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
372cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
373cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
374cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
375cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
376cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
377cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
378cabdff1aSopenharmony_ci    dst += dst_stride;
379cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
380cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
381cabdff1aSopenharmony_ci    dst += dst_stride;
382cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
383cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
384cabdff1aSopenharmony_ci    dst += dst_stride;
385cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
386cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
387cabdff1aSopenharmony_ci}
388cabdff1aSopenharmony_ci
389cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_8x16_lasx(const uint8_t *src,
390cabdff1aSopenharmony_ci                                           int32_t src_stride,
391cabdff1aSopenharmony_ci                                           uint8_t *dst, int32_t dst_stride)
392cabdff1aSopenharmony_ci{
393cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
394cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
395cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
396cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
397cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
398cabdff1aSopenharmony_ci
399cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
400cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
401cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
402cabdff1aSopenharmony_ci    _src += 1;
403cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
404cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
405cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
406cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
407cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
408cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
409cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
410cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
411cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
412cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
413cabdff1aSopenharmony_ci    dst += dst_stride;
414cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
415cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
416cabdff1aSopenharmony_ci    dst += dst_stride;
417cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
418cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
419cabdff1aSopenharmony_ci    dst += dst_stride;
420cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
421cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
422cabdff1aSopenharmony_ci    dst += dst_stride;
423cabdff1aSopenharmony_ci
424cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
425cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
426cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
427cabdff1aSopenharmony_ci    _src += 1;
428cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
429cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
430cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
431cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
432cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
433cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
434cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
435cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
436cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
437cabdff1aSopenharmony_ci    dst += dst_stride;
438cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
439cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
440cabdff1aSopenharmony_ci    dst += dst_stride;
441cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
442cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 1);
443cabdff1aSopenharmony_ci    dst += dst_stride;
444cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 2);
445cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 8, 3);
446cabdff1aSopenharmony_ci}
447cabdff1aSopenharmony_ci
448cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
449cabdff1aSopenharmony_ci                                      ptrdiff_t line_size, int h)
450cabdff1aSopenharmony_ci{
451cabdff1aSopenharmony_ci    if (h == 16) {
452cabdff1aSopenharmony_ci        common_hz_bil_no_rnd_16x16_lasx(pixels, line_size, block, line_size);
453cabdff1aSopenharmony_ci    } else if (h == 8) {
454cabdff1aSopenharmony_ci        common_hz_bil_no_rnd_8x16_lasx(pixels, line_size, block, line_size);
455cabdff1aSopenharmony_ci    }
456cabdff1aSopenharmony_ci}
457cabdff1aSopenharmony_ci
458cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_16x16_lasx(const uint8_t *src,
459cabdff1aSopenharmony_ci                                            int32_t src_stride,
460cabdff1aSopenharmony_ci                                            uint8_t *dst, int32_t dst_stride)
461cabdff1aSopenharmony_ci{
462cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8;
463cabdff1aSopenharmony_ci    __m256i src9, src10, src11, src12, src13, src14, src15, src16;
464cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
465cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
466cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
467cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
468cabdff1aSopenharmony_ci
469cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
470cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
471cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
472cabdff1aSopenharmony_ci    _src += src_stride_4x;
473cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
474cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
475cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
476cabdff1aSopenharmony_ci    _src += src_stride_4x;
477cabdff1aSopenharmony_ci    src8 = __lasx_xvld(_src, 0);
478cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src9, src10);
479cabdff1aSopenharmony_ci    src11 = __lasx_xvldx(_src, src_stride_3x);
480cabdff1aSopenharmony_ci    _src += src_stride_4x;
481cabdff1aSopenharmony_ci    src12 = __lasx_xvld(_src, 0);
482cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
483cabdff1aSopenharmony_ci              src13, src14);
484cabdff1aSopenharmony_ci    src15 = __lasx_xvldx(_src, src_stride_3x);
485cabdff1aSopenharmony_ci    _src += src_stride_4x;
486cabdff1aSopenharmony_ci    src16 = __lasx_xvld(_src, 0);
487cabdff1aSopenharmony_ci
488cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src2, src1, 0x20, src3, src2,
489cabdff1aSopenharmony_ci              0x20, src4, src3, 0x20, src0, src1, src2, src3);
490cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src5, src4, 0x20, src6, src5, 0x20, src7, src6,
491cabdff1aSopenharmony_ci              0x20, src8, src7, 0x20, src4, src5, src6, src7);
492cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src9, src8, 0x20, src10, src9, 0x20, src11,
493cabdff1aSopenharmony_ci              src10, 0x20, src12, src11, 0x20, src8, src9, src10, src11);
494cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src13, src12, 0x20, src14, src13, 0x20, src15,
495cabdff1aSopenharmony_ci              src14, 0x20, src16, src15, 0x20, src12, src13, src14, src15);
496cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvavg_bu, src0, src1, src2, src3, src4, src5, src6, src7,
497cabdff1aSopenharmony_ci              src0, src2, src4, src6);
498cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvavg_bu, src8, src9, src10, src11, src12, src13, src14,
499cabdff1aSopenharmony_ci              src15, src8, src10, src12, src14);
500cabdff1aSopenharmony_ci
501cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
502cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
503cabdff1aSopenharmony_ci    dst += dst_stride;
504cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
505cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
506cabdff1aSopenharmony_ci    dst += dst_stride;
507cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 0, 0);
508cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 8, 1);
509cabdff1aSopenharmony_ci    dst += dst_stride;
510cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 0, 2);
511cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 8, 3);
512cabdff1aSopenharmony_ci    dst += dst_stride;
513cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 0, 0);
514cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 8, 1);
515cabdff1aSopenharmony_ci    dst += dst_stride;
516cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 0, 2);
517cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 8, 3);
518cabdff1aSopenharmony_ci    dst += dst_stride;
519cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 0, 0);
520cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 8, 1);
521cabdff1aSopenharmony_ci    dst += dst_stride;
522cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 0, 2);
523cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 8, 3);
524cabdff1aSopenharmony_ci    dst += dst_stride;
525cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src8, dst, 0, 0);
526cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src8, dst, 8, 1);
527cabdff1aSopenharmony_ci    dst += dst_stride;
528cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src8, dst, 0, 2);
529cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src8, dst, 8, 3);
530cabdff1aSopenharmony_ci    dst += dst_stride;
531cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src10, dst, 0, 0);
532cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src10, dst, 8, 1);
533cabdff1aSopenharmony_ci    dst += dst_stride;
534cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src10, dst, 0, 2);
535cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src10, dst, 8, 3);
536cabdff1aSopenharmony_ci    dst += dst_stride;
537cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src12, dst, 0, 0);
538cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src12, dst, 8, 1);
539cabdff1aSopenharmony_ci    dst += dst_stride;
540cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src12, dst, 0, 2);
541cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src12, dst, 8, 3);
542cabdff1aSopenharmony_ci    dst += dst_stride;
543cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src14, dst, 0, 0);
544cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src14, dst, 8, 1);
545cabdff1aSopenharmony_ci    dst += dst_stride;
546cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src14, dst, 0, 2);
547cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src14, dst, 8, 3);
548cabdff1aSopenharmony_ci}
549cabdff1aSopenharmony_ci
550cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_8x16_lasx(const uint8_t *src,
551cabdff1aSopenharmony_ci                                           int32_t src_stride,
552cabdff1aSopenharmony_ci                                           uint8_t *dst, int32_t dst_stride)
553cabdff1aSopenharmony_ci{
554cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8;
555cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
556cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
557cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
558cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
559cabdff1aSopenharmony_ci
560cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
561cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
562cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
563cabdff1aSopenharmony_ci    _src += src_stride_4x;
564cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
565cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
566cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
567cabdff1aSopenharmony_ci    _src += src_stride_4x;
568cabdff1aSopenharmony_ci    src8 = __lasx_xvld(_src, 0);
569cabdff1aSopenharmony_ci
570cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src2, src1, 0x20, src3, src2,
571cabdff1aSopenharmony_ci              0x20, src4, src3, 0x20, src0, src1, src2, src3);
572cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src5, src4, 0x20, src6, src5, 0x20, src7, src6,
573cabdff1aSopenharmony_ci              0x20, src8, src7, 0x20, src4, src5, src6, src7);
574cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvavg_bu, src0, src1, src2, src3, src4, src5, src6, src7,
575cabdff1aSopenharmony_ci              src0, src2, src4, src6);
576cabdff1aSopenharmony_ci
577cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
578cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 1);
579cabdff1aSopenharmony_ci    dst += dst_stride;
580cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 2);
581cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 8, 3);
582cabdff1aSopenharmony_ci    dst += dst_stride;
583cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 0, 0);
584cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 8, 1);
585cabdff1aSopenharmony_ci    dst += dst_stride;
586cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 0, 2);
587cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src2, dst, 8, 3);
588cabdff1aSopenharmony_ci    dst += dst_stride;
589cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 0, 0);
590cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 8, 1);
591cabdff1aSopenharmony_ci    dst += dst_stride;
592cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 0, 2);
593cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src4, dst, 8, 3);
594cabdff1aSopenharmony_ci    dst += dst_stride;
595cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 0, 0);
596cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 8, 1);
597cabdff1aSopenharmony_ci    dst += dst_stride;
598cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 0, 2);
599cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src6, dst, 8, 3);
600cabdff1aSopenharmony_ci}
601cabdff1aSopenharmony_ci
602cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
603cabdff1aSopenharmony_ci                                      ptrdiff_t line_size, int h)
604cabdff1aSopenharmony_ci{
605cabdff1aSopenharmony_ci    if (h == 16) {
606cabdff1aSopenharmony_ci        common_vt_bil_no_rnd_16x16_lasx(pixels, line_size, block, line_size);
607cabdff1aSopenharmony_ci    } else if (h == 8) {
608cabdff1aSopenharmony_ci        common_vt_bil_no_rnd_8x16_lasx(pixels, line_size, block, line_size);
609cabdff1aSopenharmony_ci    }
610cabdff1aSopenharmony_ci}
611cabdff1aSopenharmony_ci
612cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_16x16_lasx(const uint8_t *src,
613cabdff1aSopenharmony_ci                                            int32_t src_stride,
614cabdff1aSopenharmony_ci                                            uint8_t *dst, int32_t dst_stride)
615cabdff1aSopenharmony_ci{
616cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
617cabdff1aSopenharmony_ci    __m256i src10, src11, src12, src13, src14, src15, src16, src17;
618cabdff1aSopenharmony_ci    __m256i sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
619cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
620cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
621cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
622cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
623cabdff1aSopenharmony_ci
624cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
625cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
626cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
627cabdff1aSopenharmony_ci    _src += src_stride_4x;
628cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
629cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
630cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
631cabdff1aSopenharmony_ci    _src += (1 - src_stride_4x);
632cabdff1aSopenharmony_ci    src9 = __lasx_xvld(_src, 0);
633cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
634cabdff1aSopenharmony_ci              src10, src11);
635cabdff1aSopenharmony_ci    src12 = __lasx_xvldx(_src, src_stride_3x);
636cabdff1aSopenharmony_ci    _src += src_stride_4x;
637cabdff1aSopenharmony_ci    src13 = __lasx_xvld(_src, 0);
638cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
639cabdff1aSopenharmony_ci              src14, src15);
640cabdff1aSopenharmony_ci    src16 = __lasx_xvldx(_src, src_stride_3x);
641cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
642cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src8, src17);
643cabdff1aSopenharmony_ci
644cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src0, src4, 0x02, src1, src5, 0x02, src2,
645cabdff1aSopenharmony_ci              src6, 0x02, src3, src7, 0x02, src0, src1, src2, src3);
646cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src4, src8, 0x02, src9, src13, 0x02, src10,
647cabdff1aSopenharmony_ci              src14, 0x02, src11, src15, 0x02, src4, src5, src6, src7);
648cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvpermi_q, src12, src16, 0x02, src13, src17, 0x02,
649cabdff1aSopenharmony_ci              src8, src9);
650cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_h, src5, src0, src6, src1, src7, src2, src8, src3,
651cabdff1aSopenharmony_ci              sum0, sum2, sum4, sum6);
652cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvh_h, src5, src0, src6, src1, src7, src2, src8, src3,
653cabdff1aSopenharmony_ci              sum1, sum3, sum5, sum7);
654cabdff1aSopenharmony_ci    src8 = __lasx_xvilvl_h(src9, src4);
655cabdff1aSopenharmony_ci    src9 = __lasx_xvilvh_h(src9, src4);
656cabdff1aSopenharmony_ci
657cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum0, sum0, sum1, sum1, sum2, sum2,
658cabdff1aSopenharmony_ci              sum3, sum3, src0, src1, src2, src3);
659cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum4, sum4, sum5, sum5, sum6, sum6,
660cabdff1aSopenharmony_ci              sum7, sum7, src4, src5, src6, src7);
661cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvhaddw_hu_bu, src8, src8, src9, src9, src8, src9);
662cabdff1aSopenharmony_ci
663cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src0, src2, src1, src3, src2, src4, src3, src5,
664cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
665cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src4, src6, src5, src7, src6, src8, src7, src9,
666cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
667cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum0, 1, sum1, 1, sum2, 1, sum3, 1,
668cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
669cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum4, 1, sum5, 1, sum6, 1, sum7, 1,
670cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
671cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvsrani_b_h, sum1, sum0, 2, sum3, sum2, 2, sum5, sum4, 2,
672cabdff1aSopenharmony_ci              sum7, sum6, 2, sum0, sum1, sum2, sum3);
673cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 0);
674cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 1);
675cabdff1aSopenharmony_ci    dst += dst_stride;
676cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 0);
677cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 1);
678cabdff1aSopenharmony_ci    dst += dst_stride;
679cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 0);
680cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 1);
681cabdff1aSopenharmony_ci    dst += dst_stride;
682cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 0);
683cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 1);
684cabdff1aSopenharmony_ci    dst += dst_stride;
685cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 2);
686cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 3);
687cabdff1aSopenharmony_ci    dst += dst_stride;
688cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 2);
689cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 3);
690cabdff1aSopenharmony_ci    dst += dst_stride;
691cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 2);
692cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 3);
693cabdff1aSopenharmony_ci    dst += dst_stride;
694cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 2);
695cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 3);
696cabdff1aSopenharmony_ci    dst += dst_stride;
697cabdff1aSopenharmony_ci
698cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
699cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
700cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
701cabdff1aSopenharmony_ci    _src += src_stride_4x;
702cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
703cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
704cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
705cabdff1aSopenharmony_ci    _src += (1 - src_stride_4x);
706cabdff1aSopenharmony_ci    src9 = __lasx_xvld(_src, 0);
707cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
708cabdff1aSopenharmony_ci              src10, src11);
709cabdff1aSopenharmony_ci    src12 = __lasx_xvldx(_src, src_stride_3x);
710cabdff1aSopenharmony_ci    _src += src_stride_4x;
711cabdff1aSopenharmony_ci    src13 = __lasx_xvld(_src, 0);
712cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
713cabdff1aSopenharmony_ci              src14, src15);
714cabdff1aSopenharmony_ci    src16 = __lasx_xvldx(_src, src_stride_3x);
715cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
716cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src8, src17);
717cabdff1aSopenharmony_ci
718cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src0, src4, 0x02, src1, src5, 0x02, src2, src6, 0x02,
719cabdff1aSopenharmony_ci              src3, src7, 0x02, src0, src1, src2, src3);
720cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src4, src8, 0x02, src9, src13, 0x02, src10, src14, 0x02,
721cabdff1aSopenharmony_ci              src11, src15, 0x02, src4, src5, src6, src7);
722cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvpermi_q, src12, src16, 0x02, src13, src17, 0x02, src8, src9);
723cabdff1aSopenharmony_ci
724cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_h, src5, src0, src6, src1, src7, src2, src8, src3,
725cabdff1aSopenharmony_ci              sum0, sum2, sum4, sum6);
726cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvh_h, src5, src0, src6, src1, src7, src2, src8, src3,
727cabdff1aSopenharmony_ci              sum1, sum3, sum5, sum7);
728cabdff1aSopenharmony_ci    src8 = __lasx_xvilvl_h(src9, src4);
729cabdff1aSopenharmony_ci    src9 = __lasx_xvilvh_h(src9, src4);
730cabdff1aSopenharmony_ci
731cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum0, sum0, sum1, sum1, sum2, sum2,
732cabdff1aSopenharmony_ci              sum3, sum3, src0, src1, src2, src3);
733cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum4, sum4, sum5, sum5, sum6, sum6,
734cabdff1aSopenharmony_ci              sum7, sum7, src4, src5, src6, src7);
735cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvhaddw_hu_bu, src8, src8, src9, src9, src8, src9);
736cabdff1aSopenharmony_ci
737cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src0, src2, src1, src3, src2, src4, src3, src5,
738cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
739cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src4, src6, src5, src7, src6, src8, src7, src9,
740cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
741cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum0, 1, sum1, 1, sum2, 1, sum3, 1,
742cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
743cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum4, 1, sum5, 1, sum6, 1, sum7, 1,
744cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
745cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvsrani_b_h, sum1, sum0, 2, sum3, sum2, 2, sum5, sum4, 2,
746cabdff1aSopenharmony_ci              sum7, sum6, 2, sum0, sum1, sum2, sum3);
747cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 0);
748cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 1);
749cabdff1aSopenharmony_ci    dst += dst_stride;
750cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 0);
751cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 1);
752cabdff1aSopenharmony_ci    dst += dst_stride;
753cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 0);
754cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 1);
755cabdff1aSopenharmony_ci    dst += dst_stride;
756cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 0);
757cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 1);
758cabdff1aSopenharmony_ci    dst += dst_stride;
759cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 2);
760cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 3);
761cabdff1aSopenharmony_ci    dst += dst_stride;
762cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 2);
763cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 3);
764cabdff1aSopenharmony_ci    dst += dst_stride;
765cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 2);
766cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 3);
767cabdff1aSopenharmony_ci    dst += dst_stride;
768cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 2);
769cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 3);
770cabdff1aSopenharmony_ci}
771cabdff1aSopenharmony_ci
772cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_8x16_lasx(const uint8_t *src,
773cabdff1aSopenharmony_ci                                           int32_t src_stride,
774cabdff1aSopenharmony_ci                                           uint8_t *dst, int32_t dst_stride)
775cabdff1aSopenharmony_ci{
776cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
777cabdff1aSopenharmony_ci    __m256i src10, src11, src12, src13, src14, src15, src16, src17;
778cabdff1aSopenharmony_ci    __m256i sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
779cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
780cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
781cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
782cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
783cabdff1aSopenharmony_ci
784cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
785cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
786cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
787cabdff1aSopenharmony_ci    _src += src_stride_4x;
788cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
789cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
790cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
791cabdff1aSopenharmony_ci    _src += (1 - src_stride_4x);
792cabdff1aSopenharmony_ci    src9 = __lasx_xvld(_src, 0);
793cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
794cabdff1aSopenharmony_ci              src10, src11);
795cabdff1aSopenharmony_ci    src12 = __lasx_xvldx(_src, src_stride_3x);
796cabdff1aSopenharmony_ci    _src += src_stride_4x;
797cabdff1aSopenharmony_ci    src13 = __lasx_xvld(_src, 0);
798cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
799cabdff1aSopenharmony_ci              src14, src15);
800cabdff1aSopenharmony_ci    src16 = __lasx_xvldx(_src, src_stride_3x);
801cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
802cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src8, src17);
803cabdff1aSopenharmony_ci
804cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src0, src4, 0x02, src1, src5, 0x02, src2,
805cabdff1aSopenharmony_ci              src6, 0x02, src3, src7, 0x02, src0, src1, src2, src3);
806cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src4, src8, 0x02, src9, src13, 0x02, src10,
807cabdff1aSopenharmony_ci              src14, 0x02, src11, src15, 0x02, src4, src5, src6, src7);
808cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvpermi_q, src12, src16, 0x02, src13, src17, 0x02, src8, src9);
809cabdff1aSopenharmony_ci
810cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_h, src5, src0, src6, src1, src7, src2, src8, src3,
811cabdff1aSopenharmony_ci              sum0, sum2, sum4, sum6);
812cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvh_h, src5, src0, src6, src1, src7, src2, src8, src3,
813cabdff1aSopenharmony_ci              sum1, sum3, sum5, sum7);
814cabdff1aSopenharmony_ci    src8 = __lasx_xvilvl_h(src9, src4);
815cabdff1aSopenharmony_ci    src9 = __lasx_xvilvh_h(src9, src4);
816cabdff1aSopenharmony_ci
817cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum0, sum0, sum1, sum1, sum2, sum2,
818cabdff1aSopenharmony_ci              sum3, sum3, src0, src1, src2, src3);
819cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum4, sum4, sum5, sum5, sum6, sum6,
820cabdff1aSopenharmony_ci              sum7, sum7, src4, src5, src6, src7);
821cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvhaddw_hu_bu, src8, src8, src9, src9, src8, src9);
822cabdff1aSopenharmony_ci
823cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src0, src2, src1, src3, src2, src4, src3, src5,
824cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
825cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src4, src6, src5, src7, src6, src8, src7, src9,
826cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
827cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum0, 1, sum1, 1, sum2, 1, sum3, 1,
828cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
829cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum4, 1, sum5, 1, sum6, 1, sum7, 1,
830cabdff1aSopenharmony_ci              sum4, sum5, sum6, sum7);
831cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvsrani_b_h, sum1, sum0, 2, sum3, sum2, 2, sum5, sum4, 2,
832cabdff1aSopenharmony_ci              sum7, sum6, 2, sum0, sum1, sum2, sum3);
833cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 0);
834cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 1);
835cabdff1aSopenharmony_ci    dst += dst_stride;
836cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 0);
837cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 1);
838cabdff1aSopenharmony_ci    dst += dst_stride;
839cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 0);
840cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 1);
841cabdff1aSopenharmony_ci    dst += dst_stride;
842cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 0);
843cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 1);
844cabdff1aSopenharmony_ci    dst += dst_stride;
845cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 2);
846cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 8, 3);
847cabdff1aSopenharmony_ci    dst += dst_stride;
848cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 2);
849cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 8, 3);
850cabdff1aSopenharmony_ci    dst += dst_stride;
851cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 0, 2);
852cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum2, dst, 8, 3);
853cabdff1aSopenharmony_ci    dst += dst_stride;
854cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 0, 2);
855cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum3, dst, 8, 3);
856cabdff1aSopenharmony_ci}
857cabdff1aSopenharmony_ci
858cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_xy2_8_lasx(uint8_t *block,
859cabdff1aSopenharmony_ci                                       const uint8_t *pixels,
860cabdff1aSopenharmony_ci                                       ptrdiff_t line_size, int h)
861cabdff1aSopenharmony_ci{
862cabdff1aSopenharmony_ci    if (h == 16) {
863cabdff1aSopenharmony_ci        common_hv_bil_no_rnd_16x16_lasx(pixels, line_size, block, line_size);
864cabdff1aSopenharmony_ci    } else if (h == 8) {
865cabdff1aSopenharmony_ci        common_hv_bil_no_rnd_8x16_lasx(pixels, line_size, block, line_size);
866cabdff1aSopenharmony_ci    }
867cabdff1aSopenharmony_ci}
868cabdff1aSopenharmony_ci
869cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_8x8_lasx(const uint8_t *src,
870cabdff1aSopenharmony_ci                                          int32_t src_stride,
871cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
872cabdff1aSopenharmony_ci{
873cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
874cabdff1aSopenharmony_ci    __m256i src8, src9, src10, src11, src12, src13, src14, src15;
875cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
876cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
877cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
878cabdff1aSopenharmony_ci    int32_t dst_stride_4x = dst_stride << 2;
879cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
880cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
881cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
882cabdff1aSopenharmony_ci
883cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
884cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
885cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
886cabdff1aSopenharmony_ci    _src += src_stride_4x;
887cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
888cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
889cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
890cabdff1aSopenharmony_ci    _src += (1 - src_stride_4x);
891cabdff1aSopenharmony_ci    src8 = __lasx_xvld(_src, 0);
892cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src9, src10);
893cabdff1aSopenharmony_ci    src11 = __lasx_xvldx(_src, src_stride_3x);
894cabdff1aSopenharmony_ci    _src += src_stride_4x;
895cabdff1aSopenharmony_ci    src12 = __lasx_xvld(_src, 0);
896cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
897cabdff1aSopenharmony_ci              src13, src14);
898cabdff1aSopenharmony_ci    src15 = __lasx_xvldx(_src, src_stride_3x);
899cabdff1aSopenharmony_ci
900cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src1, src0, src3, src2, src5, src4, src7,
901cabdff1aSopenharmony_ci              src6, src0, src1, src2, src3);
902cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src9, src8, src11, src10, src13, src12, src15,
903cabdff1aSopenharmony_ci              src14, src4, src5, src6, src7);
904cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src5, src4,
905cabdff1aSopenharmony_ci              0x20, src7, src6, 0x20, src0, src1, src2, src3);
906cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src2);
907cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src1, src3);
908cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
909cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride, 0, 1);
910cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_2x, 0, 2);
911cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_3x, 0, 3);
912cabdff1aSopenharmony_ci    dst += dst_stride_4x;
913cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
914cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride, 0, 1);
915cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride_2x, 0, 2);
916cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride_3x, 0, 3);
917cabdff1aSopenharmony_ci}
918cabdff1aSopenharmony_ci
919cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_4x8_lasx(const uint8_t *src,
920cabdff1aSopenharmony_ci                                          int32_t src_stride,
921cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
922cabdff1aSopenharmony_ci{
923cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
924cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
925cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
926cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
927cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
928cabdff1aSopenharmony_ci    uint8_t *_src = (uint8_t*)src;
929cabdff1aSopenharmony_ci
930cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
931cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
932cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
933cabdff1aSopenharmony_ci    _src += 1;
934cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
935cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
936cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
937cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src1, src0, src3, src2, src5, src4, src7, src6,
938cabdff1aSopenharmony_ci              src0, src1, src2, src3);
939cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src3, src2, 0x20, src0, src1);
940cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src1);
941cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
942cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride, 0, 1);
943cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_2x, 0, 2);
944cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_3x, 0, 3);
945cabdff1aSopenharmony_ci}
946cabdff1aSopenharmony_ci
947cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
948cabdff1aSopenharmony_ci                                     ptrdiff_t line_size, int h)
949cabdff1aSopenharmony_ci{
950cabdff1aSopenharmony_ci    if (h == 8) {
951cabdff1aSopenharmony_ci        common_hz_bil_no_rnd_8x8_lasx(pixels, line_size, block, line_size);
952cabdff1aSopenharmony_ci    } else if (h == 4) {
953cabdff1aSopenharmony_ci        common_hz_bil_no_rnd_4x8_lasx(pixels, line_size, block, line_size);
954cabdff1aSopenharmony_ci    }
955cabdff1aSopenharmony_ci}
956cabdff1aSopenharmony_ci
957cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_8x8_lasx(const uint8_t *src, int32_t src_stride,
958cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
959cabdff1aSopenharmony_ci{
960cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8;
961cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
962cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
963cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
964cabdff1aSopenharmony_ci    int32_t dst_stride_4x = dst_stride << 2;
965cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
966cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
967cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
968cabdff1aSopenharmony_ci
969cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
970cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
971cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
972cabdff1aSopenharmony_ci    _src += src_stride_4x;
973cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
974cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
975cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
976cabdff1aSopenharmony_ci    _src += src_stride_4x;
977cabdff1aSopenharmony_ci    src8 = __lasx_xvld(_src, 0);
978cabdff1aSopenharmony_ci
979cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src1, src0, src2, src1, src3, src2, src4, src3,
980cabdff1aSopenharmony_ci              src0, src1, src2, src3);
981cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src5, src4, src6, src5, src7, src6, src8, src7,
982cabdff1aSopenharmony_ci              src4, src5, src6, src7);
983cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src2, src0, 0x20, src3, src1, 0x20, src6, src4,
984cabdff1aSopenharmony_ci              0x20, src7, src5, 0x20, src0, src1, src2, src3);
985cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src1);
986cabdff1aSopenharmony_ci    src1 = __lasx_xvavg_bu(src2, src3);
987cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
988cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride, 0, 1);
989cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_2x, 0, 2);
990cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_3x, 0, 3);
991cabdff1aSopenharmony_ci    dst += dst_stride_4x;
992cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst, 0, 0);
993cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride, 0, 1);
994cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride_2x, 0, 2);
995cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src1, dst + dst_stride_3x, 0, 3);
996cabdff1aSopenharmony_ci}
997cabdff1aSopenharmony_ci
998cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_4x8_lasx(const uint8_t *src, int32_t src_stride,
999cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
1000cabdff1aSopenharmony_ci{
1001cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4;
1002cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
1003cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
1004cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
1005cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
1006cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
1007cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
1008cabdff1aSopenharmony_ci
1009cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
1010cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, _src,
1011cabdff1aSopenharmony_ci              src_stride_3x, _src, src_stride_4x, src1, src2, src3, src4);
1012cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvpickev_d, src1, src0, src2, src1, src3, src2, src4, src3,
1013cabdff1aSopenharmony_ci              src0, src1, src2, src3);
1014cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvpermi_q, src2, src0, 0x20, src3, src1, 0x20, src0, src1);
1015cabdff1aSopenharmony_ci    src0 = __lasx_xvavg_bu(src0, src1);
1016cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst, 0, 0);
1017cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride, 0, 1);
1018cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_2x, 0, 2);
1019cabdff1aSopenharmony_ci    __lasx_xvstelm_d(src0, dst + dst_stride_3x, 0, 3);
1020cabdff1aSopenharmony_ci}
1021cabdff1aSopenharmony_ci
1022cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
1023cabdff1aSopenharmony_ci                                     ptrdiff_t line_size, int h)
1024cabdff1aSopenharmony_ci{
1025cabdff1aSopenharmony_ci    if (h == 8) {
1026cabdff1aSopenharmony_ci        common_vt_bil_no_rnd_8x8_lasx(pixels, line_size, block, line_size);
1027cabdff1aSopenharmony_ci    } else if (h == 4) {
1028cabdff1aSopenharmony_ci        common_vt_bil_no_rnd_4x8_lasx(pixels, line_size, block, line_size);
1029cabdff1aSopenharmony_ci    }
1030cabdff1aSopenharmony_ci}
1031cabdff1aSopenharmony_ci
1032cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_8x8_lasx(const uint8_t *src, int32_t src_stride,
1033cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
1034cabdff1aSopenharmony_ci{
1035cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
1036cabdff1aSopenharmony_ci    __m256i src8, src9, src10, src11, src12, src13, src14, src15, src16, src17;
1037cabdff1aSopenharmony_ci    __m256i sum0, sum1, sum2, sum3;
1038cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
1039cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
1040cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
1041cabdff1aSopenharmony_ci    int32_t dst_stride_4x = dst_stride << 2;
1042cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
1043cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
1044cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
1045cabdff1aSopenharmony_ci
1046cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
1047cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
1048cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
1049cabdff1aSopenharmony_ci    _src += src_stride_4x;
1050cabdff1aSopenharmony_ci    src4 = __lasx_xvld(_src, 0);
1051cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
1052cabdff1aSopenharmony_ci    src7 = __lasx_xvldx(_src, src_stride_3x);
1053cabdff1aSopenharmony_ci    _src += (1 - src_stride_4x);
1054cabdff1aSopenharmony_ci    src9 = __lasx_xvld(_src, 0);
1055cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
1056cabdff1aSopenharmony_ci              src10, src11);
1057cabdff1aSopenharmony_ci    src12 = __lasx_xvldx(_src, src_stride_3x);
1058cabdff1aSopenharmony_ci    _src += src_stride_4x;
1059cabdff1aSopenharmony_ci    src13 = __lasx_xvld(_src, 0);
1060cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
1061cabdff1aSopenharmony_ci              src14, src15);
1062cabdff1aSopenharmony_ci    src16 = __lasx_xvldx(_src, src_stride_3x);
1063cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
1064cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src8, src17);
1065cabdff1aSopenharmony_ci
1066cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_b, src9, src0, src10, src1, src11, src2, src12, src3,
1067cabdff1aSopenharmony_ci              src0, src1, src2, src3);
1068cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_b, src13, src4, src14, src5, src15, src6, src16, src7,
1069cabdff1aSopenharmony_ci              src4, src5, src6, src7);
1070cabdff1aSopenharmony_ci    src8 = __lasx_xvilvl_b(src17, src8);
1071cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src2, src1, 0x20, src3, src2,
1072cabdff1aSopenharmony_ci              0x20, src4, src3, 0x20, src0, src1, src2, src3);
1073cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src5, src4, 0x20, src6, src5, 0x20, src7, src6,
1074cabdff1aSopenharmony_ci              0x20, src8, src7, 0x20, src4, src5, src6, src7);
1075cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, src0, src0, src1, src1, src2, src2,
1076cabdff1aSopenharmony_ci              src3, src3, src0, src1, src2, src3);
1077cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, src4, src4, src5, src5, src6, src6,
1078cabdff1aSopenharmony_ci              src7, src7, src4, src5, src6, src7);
1079cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvadd_h, src0, src1, src2, src3, src4, src5, src6, src7,
1080cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
1081cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvaddi_hu, sum0, 1, sum1, 1, sum2, 1, sum3, 1,
1082cabdff1aSopenharmony_ci              sum0, sum1, sum2, sum3);
1083cabdff1aSopenharmony_ci    DUP2_ARG3(__lasx_xvsrani_b_h, sum1, sum0, 2, sum3, sum2, 2, sum0, sum1);
1084cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 0);
1085cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride, 0, 2);
1086cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride_2x, 0, 1);
1087cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride_3x, 0, 3);
1088cabdff1aSopenharmony_ci    dst += dst_stride_4x;
1089cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst, 0, 0);
1090cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst + dst_stride, 0, 2);
1091cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst + dst_stride_2x, 0, 1);
1092cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum1, dst + dst_stride_3x, 0, 3);
1093cabdff1aSopenharmony_ci}
1094cabdff1aSopenharmony_ci
1095cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_4x8_lasx(const uint8_t *src, int32_t src_stride,
1096cabdff1aSopenharmony_ci                                          uint8_t *dst, int32_t dst_stride)
1097cabdff1aSopenharmony_ci{
1098cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
1099cabdff1aSopenharmony_ci    __m256i src8, src9, sum0, sum1;
1100cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
1101cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
1102cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
1103cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
1104cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
1105cabdff1aSopenharmony_ci    uint8_t *_src = (uint8_t*)src;
1106cabdff1aSopenharmony_ci
1107cabdff1aSopenharmony_ci    src0 = __lasx_xvld(_src, 0);
1108cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
1109cabdff1aSopenharmony_ci    src3 = __lasx_xvldx(_src, src_stride_3x);
1110cabdff1aSopenharmony_ci    _src += 1;
1111cabdff1aSopenharmony_ci    src5 = __lasx_xvld(_src, 0);
1112cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src6, src7);
1113cabdff1aSopenharmony_ci    src8 = __lasx_xvldx(_src, src_stride_3x);
1114cabdff1aSopenharmony_ci    _src += (src_stride_4x - 1);
1115cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src4, src9);
1116cabdff1aSopenharmony_ci
1117cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvilvl_b, src5, src0, src6, src1, src7, src2, src8, src3,
1118cabdff1aSopenharmony_ci              src0, src1, src2, src3);
1119cabdff1aSopenharmony_ci    src4 = __lasx_xvilvl_b(src9, src4);
1120cabdff1aSopenharmony_ci    DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src2, src1, 0x20, src3, src2,
1121cabdff1aSopenharmony_ci              0x20, src4, src3, 0x20, src0, src1, src2, src3);
1122cabdff1aSopenharmony_ci    DUP4_ARG2(__lasx_xvhaddw_hu_bu, src0, src0, src1, src1, src2, src2,
1123cabdff1aSopenharmony_ci              src3, src3, src0, src1, src2, src3);
1124cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvadd_h, src0, src1, src2, src3, sum0, sum1);
1125cabdff1aSopenharmony_ci    sum0 = __lasx_xvaddi_hu(sum0, 1);
1126cabdff1aSopenharmony_ci    sum1 = __lasx_xvaddi_hu(sum1, 1);
1127cabdff1aSopenharmony_ci    sum0 = __lasx_xvsrani_b_h(sum1, sum0, 2);
1128cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst, 0, 0);
1129cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride, 0, 2);
1130cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride_2x, 0, 1);
1131cabdff1aSopenharmony_ci    __lasx_xvstelm_d(sum0, dst + dst_stride_3x, 0, 3);
1132cabdff1aSopenharmony_ci}
1133cabdff1aSopenharmony_ci
1134cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
1135cabdff1aSopenharmony_ci                                      ptrdiff_t line_size, int h)
1136cabdff1aSopenharmony_ci{
1137cabdff1aSopenharmony_ci    if (h == 8) {
1138cabdff1aSopenharmony_ci        common_hv_bil_no_rnd_8x8_lasx(pixels, line_size, block, line_size);
1139cabdff1aSopenharmony_ci    } else if (h == 4) {
1140cabdff1aSopenharmony_ci        common_hv_bil_no_rnd_4x8_lasx(pixels, line_size, block, line_size);
1141cabdff1aSopenharmony_ci    }
1142cabdff1aSopenharmony_ci}
1143cabdff1aSopenharmony_ci
1144cabdff1aSopenharmony_cistatic void common_hv_bil_16w_lasx(const uint8_t *src, int32_t src_stride,
1145cabdff1aSopenharmony_ci                                   uint8_t *dst, int32_t dst_stride,
1146cabdff1aSopenharmony_ci                                   uint8_t height)
1147cabdff1aSopenharmony_ci{
1148cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
1149cabdff1aSopenharmony_ci    __m256i src10, src11, src12, src13, src14, src15, src16, src17;
1150cabdff1aSopenharmony_ci    __m256i sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1151cabdff1aSopenharmony_ci    uint8_t loop_cnt;
1152cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
1153cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
1154cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
1155cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
1156cabdff1aSopenharmony_ci
1157cabdff1aSopenharmony_ci    for (loop_cnt = (height >> 3); loop_cnt--;) {
1158cabdff1aSopenharmony_ci        src0 = __lasx_xvld(_src, 0);
1159cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src1, src2);
1160cabdff1aSopenharmony_ci        src3 = __lasx_xvldx(_src, src_stride_3x);
1161cabdff1aSopenharmony_ci        _src += src_stride_4x;
1162cabdff1aSopenharmony_ci        src4 = __lasx_xvld(_src, 0);
1163cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src5, src6);
1164cabdff1aSopenharmony_ci        src7 = __lasx_xvldx(_src, src_stride_3x);
1165cabdff1aSopenharmony_ci        _src += (1 - src_stride_4x);
1166cabdff1aSopenharmony_ci        src9 = __lasx_xvld(_src, 0);
1167cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
1168cabdff1aSopenharmony_ci                  src10, src11);
1169cabdff1aSopenharmony_ci        src12 = __lasx_xvldx(_src, src_stride_3x);
1170cabdff1aSopenharmony_ci        _src += src_stride_4x;
1171cabdff1aSopenharmony_ci        src13 = __lasx_xvld(_src, 0);
1172cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x,
1173cabdff1aSopenharmony_ci                  src14, src15);
1174cabdff1aSopenharmony_ci        src16 = __lasx_xvldx(_src, src_stride_3x);
1175cabdff1aSopenharmony_ci        _src += (src_stride_4x - 1);
1176cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src8, src17);
1177cabdff1aSopenharmony_ci
1178cabdff1aSopenharmony_ci        DUP4_ARG3(__lasx_xvpermi_q, src0, src4, 0x02, src1, src5, 0x02, src2,
1179cabdff1aSopenharmony_ci                  src6, 0x02, src3, src7, 0x02, src0, src1, src2, src3);
1180cabdff1aSopenharmony_ci        DUP4_ARG3(__lasx_xvpermi_q, src4, src8, 0x02, src9, src13, 0x02, src10,
1181cabdff1aSopenharmony_ci                  src14, 0x02, src11, src15, 0x02, src4, src5, src6, src7);
1182cabdff1aSopenharmony_ci        DUP2_ARG3(__lasx_xvpermi_q, src12, src16, 0x02, src13, src17, 0x02,
1183cabdff1aSopenharmony_ci                   src8, src9);
1184cabdff1aSopenharmony_ci
1185cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvilvl_h, src5, src0, src6, src1, src7, src2, src8,
1186cabdff1aSopenharmony_ci                  src3, sum0, sum2, sum4, sum6);
1187cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvilvh_h, src5, src0, src6, src1, src7, src2, src8,
1188cabdff1aSopenharmony_ci                  src3, sum1, sum3, sum5, sum7);
1189cabdff1aSopenharmony_ci        src8 = __lasx_xvilvl_h(src9, src4);
1190cabdff1aSopenharmony_ci        src9 = __lasx_xvilvh_h(src9, src4);
1191cabdff1aSopenharmony_ci
1192cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum0, sum0, sum1, sum1, sum2, sum2,
1193cabdff1aSopenharmony_ci                  sum3, sum3, src0, src1, src2, src3);
1194cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvhaddw_hu_bu, sum4, sum4, sum5, sum5, sum6, sum6,
1195cabdff1aSopenharmony_ci                  sum7, sum7, src4, src5, src6, src7);
1196cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvhaddw_hu_bu, src8, src8, src9, src9, src8, src9);
1197cabdff1aSopenharmony_ci
1198cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvadd_h, src0, src2, src1, src3, src2, src4, src3,
1199cabdff1aSopenharmony_ci                  src5, sum0, sum1, sum2, sum3);
1200cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvadd_h, src4, src6, src5, src7, src6, src8, src7,
1201cabdff1aSopenharmony_ci                  src9, sum4, sum5, sum6, sum7);
1202cabdff1aSopenharmony_ci        DUP4_ARG3(__lasx_xvsrarni_b_h, sum1, sum0, 2, sum3, sum2, 2, sum5,
1203cabdff1aSopenharmony_ci                  sum4, 2, sum7, sum6, 2, sum0, sum1, sum2, sum3);
1204cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst, 0, 0);
1205cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst, 8, 1);
1206cabdff1aSopenharmony_ci        dst += dst_stride;
1207cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum1, dst, 0, 0);
1208cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum1, dst, 8, 1);
1209cabdff1aSopenharmony_ci        dst += dst_stride;
1210cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum2, dst, 0, 0);
1211cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum2, dst, 8, 1);
1212cabdff1aSopenharmony_ci        dst += dst_stride;
1213cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum3, dst, 0, 0);
1214cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum3, dst, 8, 1);
1215cabdff1aSopenharmony_ci        dst += dst_stride;
1216cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst, 0, 2);
1217cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst, 8, 3);
1218cabdff1aSopenharmony_ci        dst += dst_stride;
1219cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum1, dst, 0, 2);
1220cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum1, dst, 8, 3);
1221cabdff1aSopenharmony_ci        dst += dst_stride;
1222cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum2, dst, 0, 2);
1223cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum2, dst, 8, 3);
1224cabdff1aSopenharmony_ci        dst += dst_stride;
1225cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum3, dst, 0, 2);
1226cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum3, dst, 8, 3);
1227cabdff1aSopenharmony_ci        dst += dst_stride;
1228cabdff1aSopenharmony_ci    }
1229cabdff1aSopenharmony_ci}
1230cabdff1aSopenharmony_ci
1231cabdff1aSopenharmony_civoid ff_put_pixels16_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
1232cabdff1aSopenharmony_ci                                ptrdiff_t line_size, int h)
1233cabdff1aSopenharmony_ci{
1234cabdff1aSopenharmony_ci    common_hv_bil_16w_lasx(pixels, line_size, block, line_size, h);
1235cabdff1aSopenharmony_ci}
1236cabdff1aSopenharmony_ci
1237cabdff1aSopenharmony_cistatic void common_hv_bil_8w_lasx(const uint8_t *src, int32_t src_stride,
1238cabdff1aSopenharmony_ci                                  uint8_t *dst, int32_t dst_stride,
1239cabdff1aSopenharmony_ci                                  uint8_t height)
1240cabdff1aSopenharmony_ci{
1241cabdff1aSopenharmony_ci    __m256i src0, src1, src2, src3, src4, src5, src6, src7;
1242cabdff1aSopenharmony_ci    __m256i src8, src9, sum0, sum1;
1243cabdff1aSopenharmony_ci    uint8_t loop_cnt;
1244cabdff1aSopenharmony_ci    int32_t src_stride_2x = src_stride << 1;
1245cabdff1aSopenharmony_ci    int32_t src_stride_4x = src_stride << 2;
1246cabdff1aSopenharmony_ci    int32_t dst_stride_2x = dst_stride << 1;
1247cabdff1aSopenharmony_ci    int32_t dst_stride_4x = dst_stride << 2;
1248cabdff1aSopenharmony_ci    int32_t dst_stride_3x = dst_stride_2x + dst_stride;
1249cabdff1aSopenharmony_ci    int32_t src_stride_3x = src_stride_2x + src_stride;
1250cabdff1aSopenharmony_ci    uint8_t* _src = (uint8_t*)src;
1251cabdff1aSopenharmony_ci
1252cabdff1aSopenharmony_ci    DUP2_ARG2(__lasx_xvld, _src, 0, _src, 1, src0, src5);
1253cabdff1aSopenharmony_ci    _src += src_stride;
1254cabdff1aSopenharmony_ci
1255cabdff1aSopenharmony_ci    for (loop_cnt = (height >> 2); loop_cnt--;) {
1256cabdff1aSopenharmony_ci        src1 = __lasx_xvld(_src, 0);
1257cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src2, src3);
1258cabdff1aSopenharmony_ci        src4 = __lasx_xvldx(_src, src_stride_3x);
1259cabdff1aSopenharmony_ci        _src += 1;
1260cabdff1aSopenharmony_ci        src6 = __lasx_xvld(_src, 0);
1261cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvldx, _src, src_stride, _src, src_stride_2x, src7, src8);
1262cabdff1aSopenharmony_ci        src9 = __lasx_xvldx(_src, src_stride_3x);
1263cabdff1aSopenharmony_ci        _src += (src_stride_4x - 1);
1264cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvilvl_b, src5, src0, src6, src1, src7, src2, src8, src3,
1265cabdff1aSopenharmony_ci                  src0, src1, src2, src3);
1266cabdff1aSopenharmony_ci        src5 = __lasx_xvilvl_b(src9, src4);
1267cabdff1aSopenharmony_ci        DUP4_ARG3(__lasx_xvpermi_q, src1, src0, 0x20, src2, src1, 0x20, src3, src2,
1268cabdff1aSopenharmony_ci                  0x20, src5, src3, 0x20, src0, src1, src2, src3);
1269cabdff1aSopenharmony_ci        DUP4_ARG2(__lasx_xvhaddw_hu_bu, src0, src0, src1, src1, src2, src2,
1270cabdff1aSopenharmony_ci                  src3, src3, src0, src1, src2, src3);
1271cabdff1aSopenharmony_ci        DUP2_ARG2(__lasx_xvadd_h, src0, src1, src2, src3, sum0, sum1);
1272cabdff1aSopenharmony_ci        sum0 = __lasx_xvsrarni_b_h(sum1, sum0, 2);
1273cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst, 0, 0);
1274cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst + dst_stride, 0, 2);
1275cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst + dst_stride_2x, 0, 1);
1276cabdff1aSopenharmony_ci        __lasx_xvstelm_d(sum0, dst + dst_stride_3x, 0, 3);
1277cabdff1aSopenharmony_ci        dst += dst_stride_4x;
1278cabdff1aSopenharmony_ci        src0 = src4;
1279cabdff1aSopenharmony_ci        src5 = src9;
1280cabdff1aSopenharmony_ci    }
1281cabdff1aSopenharmony_ci}
1282cabdff1aSopenharmony_ci
1283cabdff1aSopenharmony_civoid ff_put_pixels8_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
1284cabdff1aSopenharmony_ci                               ptrdiff_t line_size, int h)
1285cabdff1aSopenharmony_ci{
1286cabdff1aSopenharmony_ci    common_hv_bil_8w_lasx(pixels, line_size, block, line_size, h);
1287cabdff1aSopenharmony_ci}
1288