1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2013 Seppo Tomperi
3cabdff1aSopenharmony_ci * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
26cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h"
27cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
28cabdff1aSopenharmony_ci#include "libavcodec/hevcdsp.h"
29cabdff1aSopenharmony_ci#include "libavcodec/x86/hevcdsp.h"
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci#define LFC_FUNC(DIR, DEPTH, OPT) \
32cabdff1aSopenharmony_civoid ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_ci#define LFL_FUNC(DIR, DEPTH, OPT) \
35cabdff1aSopenharmony_civoid ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_ci#define LFC_FUNCS(type, depth, opt) \
38cabdff1aSopenharmony_ci    LFC_FUNC(h, depth, opt)  \
39cabdff1aSopenharmony_ci    LFC_FUNC(v, depth, opt)
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci#define LFL_FUNCS(type, depth, opt) \
42cabdff1aSopenharmony_ci    LFL_FUNC(h, depth, opt)  \
43cabdff1aSopenharmony_ci    LFL_FUNC(v, depth, opt)
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,   8, sse2)
46cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,  10, sse2)
47cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,  12, sse2)
48cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,   8, avx)
49cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,  10, avx)
50cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t,  12, avx)
51cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,   8, sse2)
52cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  10, sse2)
53cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  12, sse2)
54cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,   8, ssse3)
55cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  10, ssse3)
56cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  12, ssse3)
57cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,   8, avx)
58cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  10, avx)
59cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t,  12, avx)
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#define IDCT_DC_FUNCS(W, opt) \
62cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
63cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
64cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
65cabdff1aSopenharmony_ci
66cabdff1aSopenharmony_ciIDCT_DC_FUNCS(4x4,   mmxext);
67cabdff1aSopenharmony_ciIDCT_DC_FUNCS(8x8,   sse2);
68cabdff1aSopenharmony_ciIDCT_DC_FUNCS(16x16, sse2);
69cabdff1aSopenharmony_ciIDCT_DC_FUNCS(32x32, sse2);
70cabdff1aSopenharmony_ciIDCT_DC_FUNCS(16x16, avx2);
71cabdff1aSopenharmony_ciIDCT_DC_FUNCS(32x32, avx2);
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci#define IDCT_FUNCS(opt)                                             \
74cabdff1aSopenharmony_civoid ff_hevc_idct_4x4_8_    ## opt(int16_t *coeffs, int col_limit); \
75cabdff1aSopenharmony_civoid ff_hevc_idct_4x4_10_   ## opt(int16_t *coeffs, int col_limit); \
76cabdff1aSopenharmony_civoid ff_hevc_idct_8x8_8_    ## opt(int16_t *coeffs, int col_limit); \
77cabdff1aSopenharmony_civoid ff_hevc_idct_8x8_10_   ## opt(int16_t *coeffs, int col_limit); \
78cabdff1aSopenharmony_civoid ff_hevc_idct_16x16_8_  ## opt(int16_t *coeffs, int col_limit); \
79cabdff1aSopenharmony_civoid ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
80cabdff1aSopenharmony_civoid ff_hevc_idct_32x32_8_  ## opt(int16_t *coeffs, int col_limit); \
81cabdff1aSopenharmony_civoid ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ciIDCT_FUNCS(sse2)
84cabdff1aSopenharmony_ciIDCT_FUNCS(avx)
85cabdff1aSopenharmony_ci
86cabdff1aSopenharmony_ci#define mc_rep_func(name, bitd, step, W, opt) \
87cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst,                                                 \
88cabdff1aSopenharmony_ci                                                uint8_t *_src, ptrdiff_t _srcstride, int height,                \
89cabdff1aSopenharmony_ci                                                intptr_t mx, intptr_t my, int width)                            \
90cabdff1aSopenharmony_ci{                                                                                                               \
91cabdff1aSopenharmony_ci    int i;                                                                                                      \
92cabdff1aSopenharmony_ci    uint8_t *src;                                                                                               \
93cabdff1aSopenharmony_ci    int16_t *dst;                                                                                               \
94cabdff1aSopenharmony_ci    for (i = 0; i < W; i += step) {                                                                             \
95cabdff1aSopenharmony_ci        src  = _src + (i * ((bitd + 7) / 8));                                                                   \
96cabdff1aSopenharmony_ci        dst = _dst + i;                                                                                         \
97cabdff1aSopenharmony_ci        ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);            \
98cabdff1aSopenharmony_ci    }                                                                                                           \
99cabdff1aSopenharmony_ci}
100cabdff1aSopenharmony_ci#define mc_rep_uni_func(name, bitd, step, W, opt) \
101cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride,                        \
102cabdff1aSopenharmony_ci                                                    uint8_t *_src, ptrdiff_t _srcstride, int height,            \
103cabdff1aSopenharmony_ci                                                    intptr_t mx, intptr_t my, int width)                        \
104cabdff1aSopenharmony_ci{                                                                                                               \
105cabdff1aSopenharmony_ci    int i;                                                                                                      \
106cabdff1aSopenharmony_ci    uint8_t *src;                                                                                               \
107cabdff1aSopenharmony_ci    uint8_t *dst;                                                                                               \
108cabdff1aSopenharmony_ci    for (i = 0; i < W; i += step) {                                                                             \
109cabdff1aSopenharmony_ci        src = _src + (i * ((bitd + 7) / 8));                                                                    \
110cabdff1aSopenharmony_ci        dst = _dst + (i * ((bitd + 7) / 8));                                                                    \
111cabdff1aSopenharmony_ci        ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride,                     \
112cabdff1aSopenharmony_ci                                                          height, mx, my, width);                               \
113cabdff1aSopenharmony_ci    }                                                                                                           \
114cabdff1aSopenharmony_ci}
115cabdff1aSopenharmony_ci#define mc_rep_bi_func(name, bitd, step, W, opt) \
116cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src,          \
117cabdff1aSopenharmony_ci                                                   ptrdiff_t _srcstride, int16_t* _src2,                        \
118cabdff1aSopenharmony_ci                                                   int height, intptr_t mx, intptr_t my, int width)             \
119cabdff1aSopenharmony_ci{                                                                                                               \
120cabdff1aSopenharmony_ci    int i;                                                                                                      \
121cabdff1aSopenharmony_ci    uint8_t  *src;                                                                                              \
122cabdff1aSopenharmony_ci    uint8_t  *dst;                                                                                              \
123cabdff1aSopenharmony_ci    int16_t  *src2;                                                                                             \
124cabdff1aSopenharmony_ci    for (i = 0; i < W ; i += step) {                                                                            \
125cabdff1aSopenharmony_ci        src  = _src + (i * ((bitd + 7) / 8));                                                                   \
126cabdff1aSopenharmony_ci        dst  = _dst + (i * ((bitd + 7) / 8));                                                                   \
127cabdff1aSopenharmony_ci        src2 = _src2 + i;                                                                                       \
128cabdff1aSopenharmony_ci        ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2,                \
129cabdff1aSopenharmony_ci                                                          height, mx, my, width);                               \
130cabdff1aSopenharmony_ci    }                                                                                                           \
131cabdff1aSopenharmony_ci}
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci#define mc_rep_funcs(name, bitd, step, W, opt)        \
134cabdff1aSopenharmony_ci    mc_rep_func(name, bitd, step, W, opt)            \
135cabdff1aSopenharmony_ci    mc_rep_uni_func(name, bitd, step, W, opt)        \
136cabdff1aSopenharmony_ci    mc_rep_bi_func(name, bitd, step, W, opt)
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci#define mc_rep_func2(name, bitd, step1, step2, W, opt) \
139cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst,                                                  \
140cabdff1aSopenharmony_ci                                                 uint8_t *src, ptrdiff_t _srcstride, int height,                \
141cabdff1aSopenharmony_ci                                                 intptr_t mx, intptr_t my, int width)                           \
142cabdff1aSopenharmony_ci{                                                                                                               \
143cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);               \
144cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)),              \
145cabdff1aSopenharmony_ci                                                    _srcstride, height, mx, my, width);                         \
146cabdff1aSopenharmony_ci}
147cabdff1aSopenharmony_ci#define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
148cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride,                         \
149cabdff1aSopenharmony_ci                                                     uint8_t *src, ptrdiff_t _srcstride, int height,            \
150cabdff1aSopenharmony_ci                                                     intptr_t mx, intptr_t my, int width)                       \
151cabdff1aSopenharmony_ci{                                                                                                               \
152cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
153cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,            \
154cabdff1aSopenharmony_ci                                                        src + (step1 * ((bitd + 7) / 8)), _srcstride,           \
155cabdff1aSopenharmony_ci                                                        height, mx, my, width);                                 \
156cabdff1aSopenharmony_ci}
157cabdff1aSopenharmony_ci#define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
158cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,            \
159cabdff1aSopenharmony_ci                                                    ptrdiff_t _srcstride, int16_t* src2,                        \
160cabdff1aSopenharmony_ci                                                    int height, intptr_t mx, intptr_t my, int width)            \
161cabdff1aSopenharmony_ci{                                                                                                               \
162cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
163cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,             \
164cabdff1aSopenharmony_ci                                                       src + (step1 * ((bitd + 7) / 8)), _srcstride,            \
165cabdff1aSopenharmony_ci                                                       src2 + step1, height, mx, my, width);                    \
166cabdff1aSopenharmony_ci}
167cabdff1aSopenharmony_ci
168cabdff1aSopenharmony_ci#define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
169cabdff1aSopenharmony_ci    mc_rep_func2(name, bitd, step1, step2, W, opt)      \
170cabdff1aSopenharmony_ci    mc_rep_uni_func2(name, bitd, step1, step2, W, opt)  \
171cabdff1aSopenharmony_ci    mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
174cabdff1aSopenharmony_ci
175cabdff1aSopenharmony_ci#define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                       \
176cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride,            \
177cabdff1aSopenharmony_ci                                                 int height, intptr_t mx, intptr_t my, int width)             \
178cabdff1aSopenharmony_ci                                                                                                              \
179cabdff1aSopenharmony_ci{                                                                                                             \
180cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width);                 \
181cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
182cabdff1aSopenharmony_ci}
183cabdff1aSopenharmony_ci
184cabdff1aSopenharmony_ci#define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                    \
185cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,          \
186cabdff1aSopenharmony_ci                                                    ptrdiff_t _srcstride, int16_t *src2,                      \
187cabdff1aSopenharmony_ci                                                    int height, intptr_t mx, intptr_t my, int width)          \
188cabdff1aSopenharmony_ci{                                                                                                             \
189cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2,                     \
190cabdff1aSopenharmony_ci                                                   height, mx, my, width);                                    \
191cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
192cabdff1aSopenharmony_ci                                                   height, mx, my, width);                                    \
193cabdff1aSopenharmony_ci}
194cabdff1aSopenharmony_ci
195cabdff1aSopenharmony_ci#define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                   \
196cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride,                       \
197cabdff1aSopenharmony_ci                                                     uint8_t *src, ptrdiff_t _srcstride, int height,          \
198cabdff1aSopenharmony_ci                                                     intptr_t mx, intptr_t my, int width)                     \
199cabdff1aSopenharmony_ci{                                                                                                             \
200cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride,                          \
201cabdff1aSopenharmony_ci                                                      height, mx, my, width);                                 \
202cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride,            \
203cabdff1aSopenharmony_ci                                                      height, mx, my, width);                                 \
204cabdff1aSopenharmony_ci}
205cabdff1aSopenharmony_ci
206cabdff1aSopenharmony_ci#define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4)   \
207cabdff1aSopenharmony_cimc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)            \
208cabdff1aSopenharmony_cimc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)         \
209cabdff1aSopenharmony_cimc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci#define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                                \
212cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride,             \
213cabdff1aSopenharmony_ci                                                int height, intptr_t mx, intptr_t my, int width)              \
214cabdff1aSopenharmony_ci                                                                                                              \
215cabdff1aSopenharmony_ci{                                                                                                             \
216cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width);                  \
217cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width);  \
218cabdff1aSopenharmony_ci}
219cabdff1aSopenharmony_ci
220cabdff1aSopenharmony_ci#define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                             \
221cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,           \
222cabdff1aSopenharmony_ci                                                   ptrdiff_t _srcstride, int16_t* src2,                       \
223cabdff1aSopenharmony_ci                                                   int height, intptr_t mx, intptr_t my, int width)           \
224cabdff1aSopenharmony_ci{                                                                                                             \
225cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                            \
226cabdff1aSopenharmony_ci                                                  src2, height, mx, my, width);                               \
227cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,              \
228cabdff1aSopenharmony_ci                                                  src2+width2, height, mx, my, width);                        \
229cabdff1aSopenharmony_ci}
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_ci#define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                            \
232cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride,                        \
233cabdff1aSopenharmony_ci                                                    uint8_t *src, ptrdiff_t _srcstride, int height,           \
234cabdff1aSopenharmony_ci                                                    intptr_t mx, intptr_t my, int width)                      \
235cabdff1aSopenharmony_ci{                                                                                                             \
236cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                           \
237cabdff1aSopenharmony_ci                                                   height, mx, my, width);                                    \
238cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,             \
239cabdff1aSopenharmony_ci                                                   height, mx, my, width);                                    \
240cabdff1aSopenharmony_ci}
241cabdff1aSopenharmony_ci
242cabdff1aSopenharmony_ci#define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2)   \
243cabdff1aSopenharmony_cimc_rep_mix_8(name, width1, width2, width3, opt1, opt2)            \
244cabdff1aSopenharmony_cimc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)         \
245cabdff1aSopenharmony_cimc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
246cabdff1aSopenharmony_ci
247cabdff1aSopenharmony_ci#if HAVE_AVX2_EXTERNAL
248cabdff1aSopenharmony_ci
249cabdff1aSopenharmony_cimc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
250cabdff1aSopenharmony_cimc_rep_mixs_8(epel_hv,    48, 32, 16, avx2, sse4)
251cabdff1aSopenharmony_cimc_rep_mixs_8(epel_h ,    48, 32, 16, avx2, sse4)
252cabdff1aSopenharmony_cimc_rep_mixs_8(epel_v ,    48, 32, 16, avx2, sse4)
253cabdff1aSopenharmony_ci
254cabdff1aSopenharmony_cimc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
255cabdff1aSopenharmony_cimc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
256cabdff1aSopenharmony_cimc_rep_mixs_10(epel_hv,   24, 16, 8, avx2, sse4, 32)
257cabdff1aSopenharmony_cimc_rep_mixs_10(epel_h ,   24, 16, 8, avx2, sse4, 32)
258cabdff1aSopenharmony_cimc_rep_mixs_10(epel_v ,   24, 16, 8, avx2, sse4, 32)
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_h ,   24, 16, 8, avx2, sse4, 32)
262cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_v ,   24, 16, 8, avx2, sse4, 32)
263cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_hv,   24, 16, 8, avx2, sse4, 32)
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci
266cabdff1aSopenharmony_cimc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
267cabdff1aSopenharmony_cimc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
270cabdff1aSopenharmony_ci
271cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 16, 32, avx2)
272cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 16, 48, avx2)
273cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 32, 64, avx2)
274cabdff1aSopenharmony_ci
275cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
276cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
277cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
278cabdff1aSopenharmony_ci
279cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 32, 64, avx2)
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 32, 64, avx2)
282cabdff1aSopenharmony_ci
283cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 16, 32, avx2)
284cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 16, 48, avx2)
285cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 32, 64, avx2)
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 16, 32, avx2)
288cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 16, 48, avx2)
289cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 32, 64, avx2)
290cabdff1aSopenharmony_ci
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,  8, 32, 64, avx2)
293cabdff1aSopenharmony_ci
294cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 16, 32, avx2)
295cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 16, 48, avx2)
296cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 32, 64, avx2)
297cabdff1aSopenharmony_ci
298cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 32, 64, avx2)
299cabdff1aSopenharmony_cimc_rep_mixs_8(qpel_h ,  48, 32, 16, avx2, sse4)
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 32, 64, avx2)
302cabdff1aSopenharmony_cimc_rep_mixs_8(qpel_v,  48, 32, 16, avx2, sse4)
303cabdff1aSopenharmony_ci
304cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 16, 32, avx2)
305cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 16, 48, avx2)
306cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 32, 64, avx2)
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 16, 32, avx2)
309cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 16, 48, avx2)
310cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 32, 64, avx2)
311cabdff1aSopenharmony_ci
312cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
313cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
314cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_ci#endif //AVX2
317cabdff1aSopenharmony_ci
318cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
319cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
320cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
321cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8,  8, 24, sse4)
322cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  8, 64, sse4)
323cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  8, 48, sse4)
324cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  8, 32, sse4)
325cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  8, 24, sse4)
326cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  8, 16, sse4)
327cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10,  4, 12, sse4)
328cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  8, 64, sse4)
329cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  8, 48, sse4)
330cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  8, 32, sse4)
331cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  8, 24, sse4)
332cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  8, 16, sse4)
333cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12,  4, 12, sse4)
334cabdff1aSopenharmony_ci
335cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 64, sse4)
336cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 48, sse4)
337cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 32, sse4)
338cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8,  8, 24, sse4)
339cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  8, 64, sse4)
340cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  8, 48, sse4)
341cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  8, 32, sse4)
342cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  8, 24, sse4)
343cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  8, 16, sse4)
344cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10,  4, 12, sse4)
345cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  8, 64, sse4)
346cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  8, 48, sse4)
347cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  8, 32, sse4)
348cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  8, 24, sse4)
349cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  8, 16, sse4)
350cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12,  4, 12, sse4)
351cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 64, sse4)
352cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 48, sse4)
353cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 32, sse4)
354cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8,  8, 24, sse4)
355cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  8, 64, sse4)
356cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  8, 48, sse4)
357cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  8, 32, sse4)
358cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  8, 24, sse4)
359cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  8, 16, sse4)
360cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10,  4, 12, sse4)
361cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  8, 64, sse4)
362cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  8, 48, sse4)
363cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  8, 32, sse4)
364cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  8, 24, sse4)
365cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  8, 16, sse4)
366cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12,  4, 12, sse4)
367cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 64, sse4)
368cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 48, sse4)
369cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 32, sse4)
370cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8,  8, 24, sse4)
371cabdff1aSopenharmony_cimc_rep_funcs2(epel_hv,8,  8,  4, 12, sse4)
372cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  8, 64, sse4)
373cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  8, 48, sse4)
374cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  8, 32, sse4)
375cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  8, 24, sse4)
376cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  8, 16, sse4)
377cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10,  4, 12, sse4)
378cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  8, 64, sse4)
379cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  8, 48, sse4)
380cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  8, 32, sse4)
381cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  8, 24, sse4)
382cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  8, 16, sse4)
383cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12,  4, 12, sse4)
384cabdff1aSopenharmony_ci
385cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 64, sse4)
386cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 48, sse4)
387cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 32, sse4)
388cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8,  8, 24, sse4)
389cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  8, 64, sse4)
390cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  8, 48, sse4)
391cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  8, 32, sse4)
392cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  8, 24, sse4)
393cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  8, 16, sse4)
394cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10,  4, 12, sse4)
395cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  8, 64, sse4)
396cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  8, 48, sse4)
397cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  8, 32, sse4)
398cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  8, 24, sse4)
399cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  8, 16, sse4)
400cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12,  4, 12, sse4)
401cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 64, sse4)
402cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 48, sse4)
403cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 32, sse4)
404cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8,  8, 24, sse4)
405cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  8, 64, sse4)
406cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  8, 48, sse4)
407cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  8, 32, sse4)
408cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  8, 24, sse4)
409cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  8, 16, sse4)
410cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10,  4, 12, sse4)
411cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  8, 64, sse4)
412cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  8, 48, sse4)
413cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  8, 32, sse4)
414cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  8, 24, sse4)
415cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  8, 16, sse4)
416cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12,  4, 12, sse4)
417cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8,  8, 64, sse4)
418cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8,  8, 48, sse4)
419cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8,  8, 32, sse4)
420cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8,  8, 24, sse4)
421cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8,  8, 16, sse4)
422cabdff1aSopenharmony_cimc_rep_funcs2(qpel_hv,8,  8,  4, 12, sse4)
423cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  8, 64, sse4)
424cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  8, 48, sse4)
425cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  8, 32, sse4)
426cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  8, 24, sse4)
427cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  8, 16, sse4)
428cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10,  4, 12, sse4)
429cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  8, 64, sse4)
430cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  8, 48, sse4)
431cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  8, 32, sse4)
432cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  8, 24, sse4)
433cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  8, 16, sse4)
434cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12,  4, 12, sse4)
435cabdff1aSopenharmony_ci
436cabdff1aSopenharmony_ci#define mc_rep_uni_w(bitd, step, W, opt) \
437cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
438cabdff1aSopenharmony_ci                                               int height, int denom,  int _wx, int _ox)                                \
439cabdff1aSopenharmony_ci{                                                                                                                       \
440cabdff1aSopenharmony_ci    int i;                                                                                                              \
441cabdff1aSopenharmony_ci    int16_t *src;                                                                                                       \
442cabdff1aSopenharmony_ci    uint8_t *dst;                                                                                                       \
443cabdff1aSopenharmony_ci    for (i = 0; i < W; i += step) {                                                                                     \
444cabdff1aSopenharmony_ci        src= _src + i;                                                                                                  \
445cabdff1aSopenharmony_ci        dst= _dst + (i * ((bitd + 7) / 8));                                                                             \
446cabdff1aSopenharmony_ci        ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src,                                   \
447cabdff1aSopenharmony_ci                                                     height, denom, _wx, _ox);                                          \
448cabdff1aSopenharmony_ci    }                                                                                                                   \
449cabdff1aSopenharmony_ci}
450cabdff1aSopenharmony_ci
451cabdff1aSopenharmony_cimc_rep_uni_w(8, 6, 12, sse4)
452cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 16, sse4)
453cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 24, sse4)
454cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 32, sse4)
455cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 48, sse4)
456cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 64, sse4)
457cabdff1aSopenharmony_ci
458cabdff1aSopenharmony_cimc_rep_uni_w(10, 6, 12, sse4)
459cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 16, sse4)
460cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 24, sse4)
461cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 32, sse4)
462cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 48, sse4)
463cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 64, sse4)
464cabdff1aSopenharmony_ci
465cabdff1aSopenharmony_cimc_rep_uni_w(12, 6, 12, sse4)
466cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 16, sse4)
467cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 24, sse4)
468cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 32, sse4)
469cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 48, sse4)
470cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 64, sse4)
471cabdff1aSopenharmony_ci
472cabdff1aSopenharmony_ci#define mc_rep_bi_w(bitd, step, W, opt) \
473cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
474cabdff1aSopenharmony_ci                                              int16_t *_src2, int height,                                               \
475cabdff1aSopenharmony_ci                                              int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)                      \
476cabdff1aSopenharmony_ci{                                                                                                                       \
477cabdff1aSopenharmony_ci    int i;                                                                                                              \
478cabdff1aSopenharmony_ci    int16_t *src;                                                                                                       \
479cabdff1aSopenharmony_ci    int16_t *src2;                                                                                                      \
480cabdff1aSopenharmony_ci    uint8_t *dst;                                                                                                       \
481cabdff1aSopenharmony_ci    for (i = 0; i < W; i += step) {                                                                                     \
482cabdff1aSopenharmony_ci        src  = _src  + i;                                                                                               \
483cabdff1aSopenharmony_ci        src2 = _src2 + i;                                                                                               \
484cabdff1aSopenharmony_ci        dst  = _dst  + (i * ((bitd + 7) / 8));                                                                          \
485cabdff1aSopenharmony_ci        ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2,                             \
486cabdff1aSopenharmony_ci                                                     height, denom, _wx0, _wx1, _ox0, _ox1);                             \
487cabdff1aSopenharmony_ci    }                                                                                                                   \
488cabdff1aSopenharmony_ci}
489cabdff1aSopenharmony_ci
490cabdff1aSopenharmony_cimc_rep_bi_w(8, 6, 12, sse4)
491cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 16, sse4)
492cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 24, sse4)
493cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 32, sse4)
494cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 48, sse4)
495cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 64, sse4)
496cabdff1aSopenharmony_ci
497cabdff1aSopenharmony_cimc_rep_bi_w(10, 6, 12, sse4)
498cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 16, sse4)
499cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 24, sse4)
500cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 32, sse4)
501cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 48, sse4)
502cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 64, sse4)
503cabdff1aSopenharmony_ci
504cabdff1aSopenharmony_cimc_rep_bi_w(12, 6, 12, sse4)
505cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 16, sse4)
506cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 24, sse4)
507cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 32, sse4)
508cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 48, sse4)
509cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 64, sse4)
510cabdff1aSopenharmony_ci
511cabdff1aSopenharmony_ci#define mc_uni_w_func(name, bitd, W, opt) \
512cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,         \
513cabdff1aSopenharmony_ci                                                      uint8_t *_src, ptrdiff_t _srcstride,          \
514cabdff1aSopenharmony_ci                                                      int height, int denom,                        \
515cabdff1aSopenharmony_ci                                                      int _wx, int _ox,                             \
516cabdff1aSopenharmony_ci                                                      intptr_t mx, intptr_t my, int width)          \
517cabdff1aSopenharmony_ci{                                                                                                   \
518cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                            \
519cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);     \
520cabdff1aSopenharmony_ci    ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
521cabdff1aSopenharmony_ci}
522cabdff1aSopenharmony_ci
523cabdff1aSopenharmony_ci#define mc_uni_w_funcs(name, bitd, opt)      \
524cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 4, opt)    \
525cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 8, opt)    \
526cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 12, opt)   \
527cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 16, opt)   \
528cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 24, opt)   \
529cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 32, opt)   \
530cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 48, opt)   \
531cabdff1aSopenharmony_ci        mc_uni_w_func(name, bitd, 64, opt)
532cabdff1aSopenharmony_ci
533cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 8, sse4)
534cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 8, 6, sse4)
535cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 8, sse4)
536cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 8, 6, sse4)
537cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 8, sse4)
538cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 8, 6, sse4)
539cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 8, sse4)
540cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 8, 6, sse4)
541cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 8, sse4)
542cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 8, sse4)
543cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 8, sse4)
544cabdff1aSopenharmony_ci
545cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 10, sse4)
546cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 10, 6, sse4)
547cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 10, sse4)
548cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 10, 6, sse4)
549cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 10, sse4)
550cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 10, 6, sse4)
551cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 10, sse4)
552cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 10, 6, sse4)
553cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 10, sse4)
554cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 10, sse4)
555cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 10, sse4)
556cabdff1aSopenharmony_ci
557cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 12, sse4)
558cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 12, 6, sse4)
559cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 12, sse4)
560cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 12, 6, sse4)
561cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 12, sse4)
562cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 12, 6, sse4)
563cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 12, sse4)
564cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 12, 6, sse4)
565cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 12, sse4)
566cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 12, sse4)
567cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 12, sse4)
568cabdff1aSopenharmony_ci
569cabdff1aSopenharmony_ci#define mc_bi_w_func(name, bitd, W, opt) \
570cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,           \
571cabdff1aSopenharmony_ci                                                     uint8_t *_src, ptrdiff_t _srcstride,            \
572cabdff1aSopenharmony_ci                                                     int16_t *_src2,                                 \
573cabdff1aSopenharmony_ci                                                     int height, int denom,                          \
574cabdff1aSopenharmony_ci                                                     int _wx0, int _wx1, int _ox0, int _ox1,         \
575cabdff1aSopenharmony_ci                                                     intptr_t mx, intptr_t my, int width)            \
576cabdff1aSopenharmony_ci{                                                                                                    \
577cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                             \
578cabdff1aSopenharmony_ci    ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);      \
579cabdff1aSopenharmony_ci    ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2,                         \
580cabdff1aSopenharmony_ci                                              height, denom, _wx0, _wx1, _ox0, _ox1);                \
581cabdff1aSopenharmony_ci}
582cabdff1aSopenharmony_ci
583cabdff1aSopenharmony_ci#define mc_bi_w_funcs(name, bitd, opt)      \
584cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 4, opt)    \
585cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 8, opt)    \
586cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 12, opt)   \
587cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 16, opt)   \
588cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 24, opt)   \
589cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 32, opt)   \
590cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 48, opt)   \
591cabdff1aSopenharmony_ci        mc_bi_w_func(name, bitd, 64, opt)
592cabdff1aSopenharmony_ci
593cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 8, sse4)
594cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 8, 6, sse4)
595cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 8, sse4)
596cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 8, 6, sse4)
597cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 8, sse4)
598cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 8, 6, sse4)
599cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 8, sse4)
600cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 8, 6, sse4)
601cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 8, sse4)
602cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 8, sse4)
603cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 8, sse4)
604cabdff1aSopenharmony_ci
605cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 10, sse4)
606cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 10, 6, sse4)
607cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 10, sse4)
608cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 10, 6, sse4)
609cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 10, sse4)
610cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 10, 6, sse4)
611cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 10, sse4)
612cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 10, 6, sse4)
613cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 10, sse4)
614cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 10, sse4)
615cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 10, sse4)
616cabdff1aSopenharmony_ci
617cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 12, sse4)
618cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 12, 6, sse4)
619cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 12, sse4)
620cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 12, 6, sse4)
621cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 12, sse4)
622cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 12, 6, sse4)
623cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 12, sse4)
624cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 12, 6, sse4)
625cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 12, sse4)
626cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 12, sse4)
627cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 12, sse4)
628cabdff1aSopenharmony_ci#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
629cabdff1aSopenharmony_ci
630cabdff1aSopenharmony_ci#define SAO_BAND_FILTER_FUNCS(bitd, opt)                                                                                   \
631cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,  \
632cabdff1aSopenharmony_ci                                            int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
633cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
634cabdff1aSopenharmony_ci                                            int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
635cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
636cabdff1aSopenharmony_ci                                            int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
637cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
638cabdff1aSopenharmony_ci                                            int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
639cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
640cabdff1aSopenharmony_ci                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);
641cabdff1aSopenharmony_ci
642cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8,  sse2)
643cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10, sse2)
644cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12, sse2)
645cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8,   avx)
646cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10,  avx)
647cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12,  avx)
648cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8,  avx2)
649cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10, avx2)
650cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12, avx2)
651cabdff1aSopenharmony_ci
652cabdff1aSopenharmony_ci#define SAO_BAND_INIT(bitd, opt) do {                                       \
653cabdff1aSopenharmony_ci    c->sao_band_filter[0]      = ff_hevc_sao_band_filter_8_##bitd##_##opt;  \
654cabdff1aSopenharmony_ci    c->sao_band_filter[1]      = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
655cabdff1aSopenharmony_ci    c->sao_band_filter[2]      = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
656cabdff1aSopenharmony_ci    c->sao_band_filter[3]      = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
657cabdff1aSopenharmony_ci    c->sao_band_filter[4]      = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
658cabdff1aSopenharmony_ci} while (0)
659cabdff1aSopenharmony_ci
660cabdff1aSopenharmony_ci#define SAO_EDGE_FILTER_FUNCS(bitd, opt)                                                                                    \
661cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,  \
662cabdff1aSopenharmony_ci                                              int eo, int width, int height);                                               \
663cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
664cabdff1aSopenharmony_ci                                               int eo, int width, int height);                                              \
665cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
666cabdff1aSopenharmony_ci                                               int eo, int width, int height);                                              \
667cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
668cabdff1aSopenharmony_ci                                               int eo, int width, int height);                                              \
669cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
670cabdff1aSopenharmony_ci                                               int eo, int width, int height);                                              \
671cabdff1aSopenharmony_ci
672cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(8, ssse3)
673cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(8, avx2)
674cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(10, sse2)
675cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(10, avx2)
676cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(12, sse2)
677cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(12, avx2)
678cabdff1aSopenharmony_ci
679cabdff1aSopenharmony_ci#define SAO_EDGE_INIT(bitd, opt) do {                                       \
680cabdff1aSopenharmony_ci    c->sao_edge_filter[0]      = ff_hevc_sao_edge_filter_8_##bitd##_##opt;  \
681cabdff1aSopenharmony_ci    c->sao_edge_filter[1]      = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
682cabdff1aSopenharmony_ci    c->sao_edge_filter[2]      = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
683cabdff1aSopenharmony_ci    c->sao_edge_filter[3]      = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
684cabdff1aSopenharmony_ci    c->sao_edge_filter[4]      = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
685cabdff1aSopenharmony_ci} while (0)
686cabdff1aSopenharmony_ci
687cabdff1aSopenharmony_ci#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt )           \
688cabdff1aSopenharmony_ci        PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
689cabdff1aSopenharmony_ci        PEL_LINK(pointer, 2, my , mx , fname##6 ,  bitd, opt ); \
690cabdff1aSopenharmony_ci        PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
691cabdff1aSopenharmony_ci        PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
692cabdff1aSopenharmony_ci        PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
693cabdff1aSopenharmony_ci        PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
694cabdff1aSopenharmony_ci        PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
695cabdff1aSopenharmony_ci        PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
696cabdff1aSopenharmony_ci        PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
697cabdff1aSopenharmony_ci#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)           \
698cabdff1aSopenharmony_ci        PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
699cabdff1aSopenharmony_ci        PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
700cabdff1aSopenharmony_ci        PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
701cabdff1aSopenharmony_ci        PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
702cabdff1aSopenharmony_ci        PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
703cabdff1aSopenharmony_ci        PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
704cabdff1aSopenharmony_ci        PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
705cabdff1aSopenharmony_ci        PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
706cabdff1aSopenharmony_ci
707cabdff1aSopenharmony_civoid ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
708cabdff1aSopenharmony_ci{
709cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
710cabdff1aSopenharmony_ci
711cabdff1aSopenharmony_ci    if (bit_depth == 8) {
712cabdff1aSopenharmony_ci        if (EXTERNAL_MMXEXT(cpu_flags)) {
713cabdff1aSopenharmony_ci            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
714cabdff1aSopenharmony_ci
715cabdff1aSopenharmony_ci            c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
716cabdff1aSopenharmony_ci        }
717cabdff1aSopenharmony_ci        if (EXTERNAL_SSE2(cpu_flags)) {
718cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
719cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
720cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
721cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
722cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
723cabdff1aSopenharmony_ci
724cabdff1aSopenharmony_ci                c->idct[2] = ff_hevc_idct_16x16_8_sse2;
725cabdff1aSopenharmony_ci                c->idct[3] = ff_hevc_idct_32x32_8_sse2;
726cabdff1aSopenharmony_ci            }
727cabdff1aSopenharmony_ci            SAO_BAND_INIT(8, sse2);
728cabdff1aSopenharmony_ci
729cabdff1aSopenharmony_ci            c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
730cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
731cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
732cabdff1aSopenharmony_ci
733cabdff1aSopenharmony_ci            c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
734cabdff1aSopenharmony_ci            c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
735cabdff1aSopenharmony_ci
736cabdff1aSopenharmony_ci            c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
737cabdff1aSopenharmony_ci            c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
738cabdff1aSopenharmony_ci            c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
739cabdff1aSopenharmony_ci        }
740cabdff1aSopenharmony_ci        if (EXTERNAL_SSSE3(cpu_flags)) {
741cabdff1aSopenharmony_ci            if(ARCH_X86_64) {
742cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
743cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
744cabdff1aSopenharmony_ci            }
745cabdff1aSopenharmony_ci            SAO_EDGE_INIT(8, ssse3);
746cabdff1aSopenharmony_ci        }
747cabdff1aSopenharmony_ci        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
748cabdff1aSopenharmony_ci
749cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
750cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
751cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
752cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);
753cabdff1aSopenharmony_ci
754cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
755cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
756cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
757cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
758cabdff1aSopenharmony_ci        }
759cabdff1aSopenharmony_ci        if (EXTERNAL_AVX(cpu_flags)) {
760cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
761cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
762cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
763cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
764cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
765cabdff1aSopenharmony_ci
766cabdff1aSopenharmony_ci                c->idct[2] = ff_hevc_idct_16x16_8_avx;
767cabdff1aSopenharmony_ci                c->idct[3] = ff_hevc_idct_32x32_8_avx;
768cabdff1aSopenharmony_ci            }
769cabdff1aSopenharmony_ci            SAO_BAND_INIT(8, avx);
770cabdff1aSopenharmony_ci
771cabdff1aSopenharmony_ci            c->idct[0] = ff_hevc_idct_4x4_8_avx;
772cabdff1aSopenharmony_ci            c->idct[1] = ff_hevc_idct_8x8_8_avx;
773cabdff1aSopenharmony_ci
774cabdff1aSopenharmony_ci            c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
775cabdff1aSopenharmony_ci            c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
776cabdff1aSopenharmony_ci            c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
777cabdff1aSopenharmony_ci        }
778cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2(cpu_flags)) {
779cabdff1aSopenharmony_ci            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
780cabdff1aSopenharmony_ci            c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
781cabdff1aSopenharmony_ci        }
782cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
783cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
784cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
785cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
786cabdff1aSopenharmony_ci                c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
787cabdff1aSopenharmony_ci                c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
788cabdff1aSopenharmony_ci                c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
789cabdff1aSopenharmony_ci
790cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
791cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
792cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
793cabdff1aSopenharmony_ci
794cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
795cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
796cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
797cabdff1aSopenharmony_ci
798cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
799cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
800cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
801cabdff1aSopenharmony_ci
802cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
803cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
804cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
805cabdff1aSopenharmony_ci
806cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
807cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
808cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
809cabdff1aSopenharmony_ci
810cabdff1aSopenharmony_ci                c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
811cabdff1aSopenharmony_ci                c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
812cabdff1aSopenharmony_ci                c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
813cabdff1aSopenharmony_ci
814cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
815cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
816cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
817cabdff1aSopenharmony_ci
818cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
819cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
820cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
821cabdff1aSopenharmony_ci
822cabdff1aSopenharmony_ci                c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
823cabdff1aSopenharmony_ci                c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
824cabdff1aSopenharmony_ci                c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
825cabdff1aSopenharmony_ci
826cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
827cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
828cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
829cabdff1aSopenharmony_ci
830cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
831cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
832cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
833cabdff1aSopenharmony_ci
834cabdff1aSopenharmony_ci                c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
835cabdff1aSopenharmony_ci                c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
836cabdff1aSopenharmony_ci                c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
837cabdff1aSopenharmony_ci
838cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
839cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
840cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
841cabdff1aSopenharmony_ci
842cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
843cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
844cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
845cabdff1aSopenharmony_ci
846cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
847cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
848cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
849cabdff1aSopenharmony_ci
850cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
851cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
852cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
853cabdff1aSopenharmony_ci
854cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
855cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
856cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
857cabdff1aSopenharmony_ci
858cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
859cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
860cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
861cabdff1aSopenharmony_ci
862cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
863cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
864cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
865cabdff1aSopenharmony_ci
866cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
867cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
868cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
869cabdff1aSopenharmony_ci            }
870cabdff1aSopenharmony_ci            SAO_BAND_INIT(8, avx2);
871cabdff1aSopenharmony_ci
872cabdff1aSopenharmony_ci            c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
873cabdff1aSopenharmony_ci            c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
874cabdff1aSopenharmony_ci            c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
875cabdff1aSopenharmony_ci
876cabdff1aSopenharmony_ci            c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
877cabdff1aSopenharmony_ci        }
878cabdff1aSopenharmony_ci        if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) {
879cabdff1aSopenharmony_ci            c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl;
880cabdff1aSopenharmony_ci            c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl;
881cabdff1aSopenharmony_ci            c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl;
882cabdff1aSopenharmony_ci            c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl;
883cabdff1aSopenharmony_ci            c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl;
884cabdff1aSopenharmony_ci            c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl;
885cabdff1aSopenharmony_ci        }
886cabdff1aSopenharmony_ci    } else if (bit_depth == 10) {
887cabdff1aSopenharmony_ci        if (EXTERNAL_MMXEXT(cpu_flags)) {
888cabdff1aSopenharmony_ci            c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
889cabdff1aSopenharmony_ci            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
890cabdff1aSopenharmony_ci        }
891cabdff1aSopenharmony_ci        if (EXTERNAL_SSE2(cpu_flags)) {
892cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
893cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
894cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
895cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
896cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
897cabdff1aSopenharmony_ci
898cabdff1aSopenharmony_ci                c->idct[2] = ff_hevc_idct_16x16_10_sse2;
899cabdff1aSopenharmony_ci                c->idct[3] = ff_hevc_idct_32x32_10_sse2;
900cabdff1aSopenharmony_ci            }
901cabdff1aSopenharmony_ci            SAO_BAND_INIT(10, sse2);
902cabdff1aSopenharmony_ci            SAO_EDGE_INIT(10, sse2);
903cabdff1aSopenharmony_ci
904cabdff1aSopenharmony_ci            c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
905cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
906cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
907cabdff1aSopenharmony_ci
908cabdff1aSopenharmony_ci            c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
909cabdff1aSopenharmony_ci            c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
910cabdff1aSopenharmony_ci
911cabdff1aSopenharmony_ci            c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
912cabdff1aSopenharmony_ci            c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
913cabdff1aSopenharmony_ci            c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
914cabdff1aSopenharmony_ci        }
915cabdff1aSopenharmony_ci        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
916cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
917cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
918cabdff1aSopenharmony_ci        }
919cabdff1aSopenharmony_ci        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
920cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
921cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
922cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
923cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);
924cabdff1aSopenharmony_ci
925cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
926cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
927cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
928cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
929cabdff1aSopenharmony_ci        }
930cabdff1aSopenharmony_ci        if (EXTERNAL_AVX(cpu_flags)) {
931cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
932cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
933cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
934cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
935cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
936cabdff1aSopenharmony_ci
937cabdff1aSopenharmony_ci                c->idct[2] = ff_hevc_idct_16x16_10_avx;
938cabdff1aSopenharmony_ci                c->idct[3] = ff_hevc_idct_32x32_10_avx;
939cabdff1aSopenharmony_ci            }
940cabdff1aSopenharmony_ci
941cabdff1aSopenharmony_ci            c->idct[0] = ff_hevc_idct_4x4_10_avx;
942cabdff1aSopenharmony_ci            c->idct[1] = ff_hevc_idct_8x8_10_avx;
943cabdff1aSopenharmony_ci
944cabdff1aSopenharmony_ci            SAO_BAND_INIT(10, avx);
945cabdff1aSopenharmony_ci        }
946cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2(cpu_flags)) {
947cabdff1aSopenharmony_ci            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
948cabdff1aSopenharmony_ci        }
949cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
950cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
951cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
952cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
953cabdff1aSopenharmony_ci                c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
954cabdff1aSopenharmony_ci                c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
955cabdff1aSopenharmony_ci                c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
956cabdff1aSopenharmony_ci                c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
957cabdff1aSopenharmony_ci                c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
958cabdff1aSopenharmony_ci
959cabdff1aSopenharmony_ci                c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
960cabdff1aSopenharmony_ci                c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
961cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
962cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
963cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
964cabdff1aSopenharmony_ci
965cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
966cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
967cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
968cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
969cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
970cabdff1aSopenharmony_ci
971cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
972cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
973cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
974cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
975cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
976cabdff1aSopenharmony_ci
977cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
978cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
979cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
980cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
981cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
982cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
983cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
984cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
985cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
986cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
987cabdff1aSopenharmony_ci
988cabdff1aSopenharmony_ci                c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
989cabdff1aSopenharmony_ci                c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
990cabdff1aSopenharmony_ci                c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
991cabdff1aSopenharmony_ci                c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
992cabdff1aSopenharmony_ci                c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
993cabdff1aSopenharmony_ci
994cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
995cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
996cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
997cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
998cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
999cabdff1aSopenharmony_ci
1000cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
1001cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
1002cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
1003cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
1004cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1005cabdff1aSopenharmony_ci
1006cabdff1aSopenharmony_ci                c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1007cabdff1aSopenharmony_ci                c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1008cabdff1aSopenharmony_ci                c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1009cabdff1aSopenharmony_ci                c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1010cabdff1aSopenharmony_ci                c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1011cabdff1aSopenharmony_ci
1012cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1013cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1014cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1015cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1016cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1017cabdff1aSopenharmony_ci
1018cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1019cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1020cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1021cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1022cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1023cabdff1aSopenharmony_ci
1024cabdff1aSopenharmony_ci                c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1025cabdff1aSopenharmony_ci                c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1026cabdff1aSopenharmony_ci                c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1027cabdff1aSopenharmony_ci                c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1028cabdff1aSopenharmony_ci                c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1029cabdff1aSopenharmony_ci
1030cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1031cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1032cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1033cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1034cabdff1aSopenharmony_ci                c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1035cabdff1aSopenharmony_ci
1036cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1037cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1038cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1039cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1040cabdff1aSopenharmony_ci                c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1041cabdff1aSopenharmony_ci
1042cabdff1aSopenharmony_ci                c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1043cabdff1aSopenharmony_ci                c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1044cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1045cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1046cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1047cabdff1aSopenharmony_ci
1048cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1049cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1050cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1051cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1052cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1053cabdff1aSopenharmony_ci
1054cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1055cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1056cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1057cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1058cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1059cabdff1aSopenharmony_ci
1060cabdff1aSopenharmony_ci                c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1061cabdff1aSopenharmony_ci                c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1062cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1063cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1064cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1065cabdff1aSopenharmony_ci
1066cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1067cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1068cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1069cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1070cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1071cabdff1aSopenharmony_ci
1072cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1073cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1074cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1075cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1076cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1077cabdff1aSopenharmony_ci
1078cabdff1aSopenharmony_ci                c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1079cabdff1aSopenharmony_ci                c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1080cabdff1aSopenharmony_ci                c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1081cabdff1aSopenharmony_ci                c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1082cabdff1aSopenharmony_ci                c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1083cabdff1aSopenharmony_ci
1084cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1085cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1086cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1087cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1088cabdff1aSopenharmony_ci                c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1089cabdff1aSopenharmony_ci
1090cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1091cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1092cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1093cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1094cabdff1aSopenharmony_ci                c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1095cabdff1aSopenharmony_ci            }
1096cabdff1aSopenharmony_ci            SAO_BAND_INIT(10, avx2);
1097cabdff1aSopenharmony_ci            SAO_EDGE_INIT(10, avx2);
1098cabdff1aSopenharmony_ci
1099cabdff1aSopenharmony_ci            c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1100cabdff1aSopenharmony_ci            c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1101cabdff1aSopenharmony_ci        }
1102cabdff1aSopenharmony_ci    } else if (bit_depth == 12) {
1103cabdff1aSopenharmony_ci        if (EXTERNAL_MMXEXT(cpu_flags)) {
1104cabdff1aSopenharmony_ci            c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1105cabdff1aSopenharmony_ci        }
1106cabdff1aSopenharmony_ci        if (EXTERNAL_SSE2(cpu_flags)) {
1107cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1108cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1109cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
1110cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1111cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1112cabdff1aSopenharmony_ci            }
1113cabdff1aSopenharmony_ci            SAO_BAND_INIT(12, sse2);
1114cabdff1aSopenharmony_ci            SAO_EDGE_INIT(12, sse2);
1115cabdff1aSopenharmony_ci
1116cabdff1aSopenharmony_ci            c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1117cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1118cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1119cabdff1aSopenharmony_ci        }
1120cabdff1aSopenharmony_ci        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1121cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1122cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1123cabdff1aSopenharmony_ci        }
1124cabdff1aSopenharmony_ci        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1125cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1126cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
1127cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
1128cabdff1aSopenharmony_ci            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);
1129cabdff1aSopenharmony_ci
1130cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1131cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
1132cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
1133cabdff1aSopenharmony_ci            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
1134cabdff1aSopenharmony_ci        }
1135cabdff1aSopenharmony_ci        if (EXTERNAL_AVX(cpu_flags)) {
1136cabdff1aSopenharmony_ci            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1137cabdff1aSopenharmony_ci            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1138cabdff1aSopenharmony_ci            if (ARCH_X86_64) {
1139cabdff1aSopenharmony_ci                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1140cabdff1aSopenharmony_ci                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1141cabdff1aSopenharmony_ci            }
1142cabdff1aSopenharmony_ci            SAO_BAND_INIT(12, avx);
1143cabdff1aSopenharmony_ci        }
1144cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2(cpu_flags)) {
1145cabdff1aSopenharmony_ci            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1146cabdff1aSopenharmony_ci        }
1147cabdff1aSopenharmony_ci        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
1148cabdff1aSopenharmony_ci            c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1149cabdff1aSopenharmony_ci            c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1150cabdff1aSopenharmony_ci
1151cabdff1aSopenharmony_ci            SAO_BAND_INIT(12, avx2);
1152cabdff1aSopenharmony_ci            SAO_EDGE_INIT(12, avx2);
1153cabdff1aSopenharmony_ci        }
1154cabdff1aSopenharmony_ci    }
1155cabdff1aSopenharmony_ci}
1156