1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD optimized SAO functions for HEVC 8bit decoding
3cabdff1aSopenharmony_ci;*
4cabdff1aSopenharmony_ci;* Copyright (c) 2013 Pierre-Edouard LEPERE
5cabdff1aSopenharmony_ci;* Copyright (c) 2014 James Almer
6cabdff1aSopenharmony_ci;*
7cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
8cabdff1aSopenharmony_ci;*
9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci;*
14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
18cabdff1aSopenharmony_ci;*
19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci;******************************************************************************
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ciSECTION_RODATA 32
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_cipb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
29cabdff1aSopenharmony_cipb_eo:                   db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
30cabdff1aSopenharmony_cicextern pb_1
31cabdff1aSopenharmony_cicextern pb_2
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ciSECTION .text
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_ci;******************************************************************************
36cabdff1aSopenharmony_ci;SAO Band Filter
37cabdff1aSopenharmony_ci;******************************************************************************
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci%macro HEVC_SAO_BAND_FILTER_INIT 0
40cabdff1aSopenharmony_ci    and            leftq, 31
41cabdff1aSopenharmony_ci    movd             xm0, leftd
42cabdff1aSopenharmony_ci    add            leftq, 1
43cabdff1aSopenharmony_ci    and            leftq, 31
44cabdff1aSopenharmony_ci    movd             xm1, leftd
45cabdff1aSopenharmony_ci    add            leftq, 1
46cabdff1aSopenharmony_ci    and            leftq, 31
47cabdff1aSopenharmony_ci    movd             xm2, leftd
48cabdff1aSopenharmony_ci    add            leftq, 1
49cabdff1aSopenharmony_ci    and            leftq, 31
50cabdff1aSopenharmony_ci    movd             xm3, leftd
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci    SPLATW            m0, xm0
53cabdff1aSopenharmony_ci    SPLATW            m1, xm1
54cabdff1aSopenharmony_ci    SPLATW            m2, xm2
55cabdff1aSopenharmony_ci    SPLATW            m3, xm3
56cabdff1aSopenharmony_ci%if mmsize > 16
57cabdff1aSopenharmony_ci    SPLATW            m4, [offsetq + 2]
58cabdff1aSopenharmony_ci    SPLATW            m5, [offsetq + 4]
59cabdff1aSopenharmony_ci    SPLATW            m6, [offsetq + 6]
60cabdff1aSopenharmony_ci    SPLATW            m7, [offsetq + 8]
61cabdff1aSopenharmony_ci%else
62cabdff1aSopenharmony_ci    movq              m7, [offsetq + 2]
63cabdff1aSopenharmony_ci    SPLATW            m4, m7, 0
64cabdff1aSopenharmony_ci    SPLATW            m5, m7, 1
65cabdff1aSopenharmony_ci    SPLATW            m6, m7, 2
66cabdff1aSopenharmony_ci    SPLATW            m7, m7, 3
67cabdff1aSopenharmony_ci%endif
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci%if ARCH_X86_64
70cabdff1aSopenharmony_ci    pxor             m14, m14
71cabdff1aSopenharmony_ci
72cabdff1aSopenharmony_ci%else ; ARCH_X86_32
73cabdff1aSopenharmony_ci    mova  [rsp+mmsize*0], m0
74cabdff1aSopenharmony_ci    mova  [rsp+mmsize*1], m1
75cabdff1aSopenharmony_ci    mova  [rsp+mmsize*2], m2
76cabdff1aSopenharmony_ci    mova  [rsp+mmsize*3], m3
77cabdff1aSopenharmony_ci    mova  [rsp+mmsize*4], m4
78cabdff1aSopenharmony_ci    mova  [rsp+mmsize*5], m5
79cabdff1aSopenharmony_ci    mova  [rsp+mmsize*6], m6
80cabdff1aSopenharmony_ci    pxor              m0, m0
81cabdff1aSopenharmony_ci    %assign MMSIZE mmsize
82cabdff1aSopenharmony_ci    %define m14 m0
83cabdff1aSopenharmony_ci    %define m13 m1
84cabdff1aSopenharmony_ci    %define  m9 m2
85cabdff1aSopenharmony_ci    %define  m8 m3
86cabdff1aSopenharmony_ci%endif ; ARCH
87cabdff1aSopenharmony_ciDEFINE_ARGS dst, src, dststride, srcstride, offset, height
88cabdff1aSopenharmony_ci    mov          heightd, r7m
89cabdff1aSopenharmony_ci%endmacro
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci%macro HEVC_SAO_BAND_FILTER_COMPUTE 2
92cabdff1aSopenharmony_ci    psraw             %1, %2, 3
93cabdff1aSopenharmony_ci%if ARCH_X86_64
94cabdff1aSopenharmony_ci    pcmpeqw          m10, %1, m0
95cabdff1aSopenharmony_ci    pcmpeqw          m11, %1, m1
96cabdff1aSopenharmony_ci    pcmpeqw          m12, %1, m2
97cabdff1aSopenharmony_ci    pcmpeqw           %1, m3
98cabdff1aSopenharmony_ci    pand             m10, m4
99cabdff1aSopenharmony_ci    pand             m11, m5
100cabdff1aSopenharmony_ci    pand             m12, m6
101cabdff1aSopenharmony_ci    pand              %1, m7
102cabdff1aSopenharmony_ci    por              m10, m11
103cabdff1aSopenharmony_ci    por              m12, %1
104cabdff1aSopenharmony_ci    por              m10, m12
105cabdff1aSopenharmony_ci    paddw             %2, m10
106cabdff1aSopenharmony_ci%else ; ARCH_X86_32
107cabdff1aSopenharmony_ci    pcmpeqw           m4, %1, [rsp+MMSIZE*0]
108cabdff1aSopenharmony_ci    pcmpeqw           m5, %1, [rsp+MMSIZE*1]
109cabdff1aSopenharmony_ci    pcmpeqw           m6, %1, [rsp+MMSIZE*2]
110cabdff1aSopenharmony_ci    pcmpeqw           %1, [rsp+MMSIZE*3]
111cabdff1aSopenharmony_ci    pand              m4, [rsp+MMSIZE*4]
112cabdff1aSopenharmony_ci    pand              m5, [rsp+MMSIZE*5]
113cabdff1aSopenharmony_ci    pand              m6, [rsp+MMSIZE*6]
114cabdff1aSopenharmony_ci    pand              %1, m7
115cabdff1aSopenharmony_ci    por               m4, m5
116cabdff1aSopenharmony_ci    por               m6, %1
117cabdff1aSopenharmony_ci    por               m4, m6
118cabdff1aSopenharmony_ci    paddw             %2, m4
119cabdff1aSopenharmony_ci%endif ; ARCH
120cabdff1aSopenharmony_ci%endmacro
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_ci;void ff_hevc_sao_band_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
123cabdff1aSopenharmony_ci;                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);
124cabdff1aSopenharmony_ci%macro HEVC_SAO_BAND_FILTER 2
125cabdff1aSopenharmony_cicglobal hevc_sao_band_filter_%1_8, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
126cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_INIT
127cabdff1aSopenharmony_ci
128cabdff1aSopenharmony_cialign 16
129cabdff1aSopenharmony_ci.loop:
130cabdff1aSopenharmony_ci%if %1 == 8
131cabdff1aSopenharmony_ci    movq              m8, [srcq]
132cabdff1aSopenharmony_ci    punpcklbw         m8, m14
133cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_COMPUTE m9, m8
134cabdff1aSopenharmony_ci    packuswb          m8, m14
135cabdff1aSopenharmony_ci    movq          [dstq], m8
136cabdff1aSopenharmony_ci%endif ; %1 == 8
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci%assign i 0
139cabdff1aSopenharmony_ci%rep %2
140cabdff1aSopenharmony_ci    mova             m13, [srcq + i]
141cabdff1aSopenharmony_ci    punpcklbw         m8, m13, m14
142cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_COMPUTE m9,  m8
143cabdff1aSopenharmony_ci    punpckhbw        m13, m14
144cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
145cabdff1aSopenharmony_ci    packuswb          m8, m13
146cabdff1aSopenharmony_ci    mova      [dstq + i], m8
147cabdff1aSopenharmony_ci%assign i i+mmsize
148cabdff1aSopenharmony_ci%endrep
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci%if %1 == 48
151cabdff1aSopenharmony_ciINIT_XMM cpuname
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci    mova             m13, [srcq + i]
154cabdff1aSopenharmony_ci    punpcklbw         m8, m13, m14
155cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_COMPUTE m9,  m8
156cabdff1aSopenharmony_ci    punpckhbw        m13, m14
157cabdff1aSopenharmony_ci    HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
158cabdff1aSopenharmony_ci    packuswb          m8, m13
159cabdff1aSopenharmony_ci    mova      [dstq + i], m8
160cabdff1aSopenharmony_ci%if cpuflag(avx2)
161cabdff1aSopenharmony_ciINIT_YMM cpuname
162cabdff1aSopenharmony_ci%endif
163cabdff1aSopenharmony_ci%endif ; %1 == 48
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci    add             dstq, dststrideq             ; dst += dststride
166cabdff1aSopenharmony_ci    add             srcq, srcstrideq             ; src += srcstride
167cabdff1aSopenharmony_ci    dec          heightd                         ; cmp height
168cabdff1aSopenharmony_ci    jnz               .loop                      ; height loop
169cabdff1aSopenharmony_ci    REP_RET
170cabdff1aSopenharmony_ci%endmacro
171cabdff1aSopenharmony_ci
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci%macro HEVC_SAO_BAND_FILTER_FUNCS 0
174cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER  8, 0
175cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 16, 1
176cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 32, 2
177cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 48, 2
178cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 64, 4
179cabdff1aSopenharmony_ci%endmacro
180cabdff1aSopenharmony_ci
181cabdff1aSopenharmony_ciINIT_XMM sse2
182cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER_FUNCS
183cabdff1aSopenharmony_ciINIT_XMM avx
184cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER_FUNCS
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL
187cabdff1aSopenharmony_ciINIT_XMM avx2
188cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER  8, 0
189cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 16, 1
190cabdff1aSopenharmony_ciINIT_YMM avx2
191cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 32, 1
192cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 48, 1
193cabdff1aSopenharmony_ciHEVC_SAO_BAND_FILTER 64, 2
194cabdff1aSopenharmony_ci%endif
195cabdff1aSopenharmony_ci
196cabdff1aSopenharmony_ci;******************************************************************************
197cabdff1aSopenharmony_ci;SAO Edge Filter
198cabdff1aSopenharmony_ci;******************************************************************************
199cabdff1aSopenharmony_ci
200cabdff1aSopenharmony_ci%define MAX_PB_SIZE  64
201cabdff1aSopenharmony_ci%define PADDING_SIZE 64 ; AV_INPUT_BUFFER_PADDING_SIZE
202cabdff1aSopenharmony_ci%define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
203cabdff1aSopenharmony_ci
204cabdff1aSopenharmony_ci%macro HEVC_SAO_EDGE_FILTER_INIT 0
205cabdff1aSopenharmony_ci%if WIN64
206cabdff1aSopenharmony_ci    movsxd           eoq, dword eom
207cabdff1aSopenharmony_ci%elif ARCH_X86_64
208cabdff1aSopenharmony_ci    movsxd           eoq, eod
209cabdff1aSopenharmony_ci%else
210cabdff1aSopenharmony_ci    mov              eoq, r4m
211cabdff1aSopenharmony_ci%endif
212cabdff1aSopenharmony_ci    lea            tmp2q, [pb_eo]
213cabdff1aSopenharmony_ci    movsx      a_strideq, byte [tmp2q+eoq*4+1]
214cabdff1aSopenharmony_ci    movsx      b_strideq, byte [tmp2q+eoq*4+3]
215cabdff1aSopenharmony_ci    imul       a_strideq, EDGE_SRCSTRIDE
216cabdff1aSopenharmony_ci    imul       b_strideq, EDGE_SRCSTRIDE
217cabdff1aSopenharmony_ci    movsx           tmpq, byte [tmp2q+eoq*4]
218cabdff1aSopenharmony_ci    add        a_strideq, tmpq
219cabdff1aSopenharmony_ci    movsx           tmpq, byte [tmp2q+eoq*4+2]
220cabdff1aSopenharmony_ci    add        b_strideq, tmpq
221cabdff1aSopenharmony_ci%endmacro
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ci%macro HEVC_SAO_EDGE_FILTER_COMPUTE 1
224cabdff1aSopenharmony_ci    pminub            m4, m1, m2
225cabdff1aSopenharmony_ci    pminub            m5, m1, m3
226cabdff1aSopenharmony_ci    pcmpeqb           m2, m4
227cabdff1aSopenharmony_ci    pcmpeqb           m3, m5
228cabdff1aSopenharmony_ci    pcmpeqb           m4, m1
229cabdff1aSopenharmony_ci    pcmpeqb           m5, m1
230cabdff1aSopenharmony_ci    psubb             m4, m2
231cabdff1aSopenharmony_ci    psubb             m5, m3
232cabdff1aSopenharmony_ci    paddb             m4, m6
233cabdff1aSopenharmony_ci    paddb             m4, m5
234cabdff1aSopenharmony_ci
235cabdff1aSopenharmony_ci    pshufb            m2, m0, m4
236cabdff1aSopenharmony_ci%if %1 > 8
237cabdff1aSopenharmony_ci    punpckhbw         m5, m7, m1
238cabdff1aSopenharmony_ci    punpckhbw         m4, m2, m7
239cabdff1aSopenharmony_ci    punpcklbw         m3, m7, m1
240cabdff1aSopenharmony_ci    punpcklbw         m2, m7
241cabdff1aSopenharmony_ci    pmaddubsw         m5, m4
242cabdff1aSopenharmony_ci    pmaddubsw         m3, m2
243cabdff1aSopenharmony_ci    packuswb          m3, m5
244cabdff1aSopenharmony_ci%else
245cabdff1aSopenharmony_ci    punpcklbw         m3, m7, m1
246cabdff1aSopenharmony_ci    punpcklbw         m2, m7
247cabdff1aSopenharmony_ci    pmaddubsw         m3, m2
248cabdff1aSopenharmony_ci    packuswb          m3, m3
249cabdff1aSopenharmony_ci%endif
250cabdff1aSopenharmony_ci%endmacro
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci;void ff_hevc_sao_edge_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
253cabdff1aSopenharmony_ci;                                             int eo, int width, int height);
254cabdff1aSopenharmony_ci%macro HEVC_SAO_EDGE_FILTER 2-3
255cabdff1aSopenharmony_ci%if ARCH_X86_64
256cabdff1aSopenharmony_cicglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
257cabdff1aSopenharmony_ci%define tmp2q heightq
258cabdff1aSopenharmony_ci    HEVC_SAO_EDGE_FILTER_INIT
259cabdff1aSopenharmony_ci    mov          heightd, r6m
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_ci%else ; ARCH_X86_32
262cabdff1aSopenharmony_cicglobal hevc_sao_edge_filter_%1_8, 1, 6, 8, dst, src, dststride, a_stride, b_stride, height
263cabdff1aSopenharmony_ci%define eoq   srcq
264cabdff1aSopenharmony_ci%define tmpq  heightq
265cabdff1aSopenharmony_ci%define tmp2q dststrideq
266cabdff1aSopenharmony_ci%define offsetq heightq
267cabdff1aSopenharmony_ci    HEVC_SAO_EDGE_FILTER_INIT
268cabdff1aSopenharmony_ci    mov             srcq, srcm
269cabdff1aSopenharmony_ci    mov          offsetq, r3m
270cabdff1aSopenharmony_ci    mov       dststrideq, dststridem
271cabdff1aSopenharmony_ci%endif ; ARCH
272cabdff1aSopenharmony_ci
273cabdff1aSopenharmony_ci%if mmsize > 16
274cabdff1aSopenharmony_ci    vbroadcasti128    m0, [offsetq]
275cabdff1aSopenharmony_ci%else
276cabdff1aSopenharmony_ci    movu              m0, [offsetq]
277cabdff1aSopenharmony_ci%endif
278cabdff1aSopenharmony_ci    mova              m1, [pb_edge_shuffle]
279cabdff1aSopenharmony_ci    packsswb          m0, m0
280cabdff1aSopenharmony_ci    mova              m7, [pb_1]
281cabdff1aSopenharmony_ci    pshufb            m0, m1
282cabdff1aSopenharmony_ci    mova              m6, [pb_2]
283cabdff1aSopenharmony_ci%if ARCH_X86_32
284cabdff1aSopenharmony_ci    mov          heightd, r6m
285cabdff1aSopenharmony_ci%endif
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_cialign 16
288cabdff1aSopenharmony_ci.loop:
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci%if %1 == 8
291cabdff1aSopenharmony_ci    movq              m1, [srcq]
292cabdff1aSopenharmony_ci    movq              m2, [srcq + a_strideq]
293cabdff1aSopenharmony_ci    movq              m3, [srcq + b_strideq]
294cabdff1aSopenharmony_ci    HEVC_SAO_EDGE_FILTER_COMPUTE %1
295cabdff1aSopenharmony_ci    movq          [dstq], m3
296cabdff1aSopenharmony_ci%endif
297cabdff1aSopenharmony_ci
298cabdff1aSopenharmony_ci%assign i 0
299cabdff1aSopenharmony_ci%rep %2
300cabdff1aSopenharmony_ci    mova              m1, [srcq + i]
301cabdff1aSopenharmony_ci    movu              m2, [srcq + a_strideq + i]
302cabdff1aSopenharmony_ci    movu              m3, [srcq + b_strideq + i]
303cabdff1aSopenharmony_ci    HEVC_SAO_EDGE_FILTER_COMPUTE %1
304cabdff1aSopenharmony_ci    mov%3     [dstq + i], m3
305cabdff1aSopenharmony_ci%assign i i+mmsize
306cabdff1aSopenharmony_ci%endrep
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci%if %1 == 48
309cabdff1aSopenharmony_ciINIT_XMM cpuname
310cabdff1aSopenharmony_ci
311cabdff1aSopenharmony_ci    mova              m1, [srcq + i]
312cabdff1aSopenharmony_ci    movu              m2, [srcq + a_strideq + i]
313cabdff1aSopenharmony_ci    movu              m3, [srcq + b_strideq + i]
314cabdff1aSopenharmony_ci    HEVC_SAO_EDGE_FILTER_COMPUTE %1
315cabdff1aSopenharmony_ci    mova      [dstq + i], m3
316cabdff1aSopenharmony_ci%if cpuflag(avx2)
317cabdff1aSopenharmony_ciINIT_YMM cpuname
318cabdff1aSopenharmony_ci%endif
319cabdff1aSopenharmony_ci%endif
320cabdff1aSopenharmony_ci
321cabdff1aSopenharmony_ci    add             dstq, dststrideq
322cabdff1aSopenharmony_ci    add             srcq, EDGE_SRCSTRIDE
323cabdff1aSopenharmony_ci    dec          heightd
324cabdff1aSopenharmony_ci    jg .loop
325cabdff1aSopenharmony_ci    RET
326cabdff1aSopenharmony_ci%endmacro
327cabdff1aSopenharmony_ci
328cabdff1aSopenharmony_ciINIT_XMM ssse3
329cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER  8, 0
330cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 16, 1, a
331cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 32, 2, a
332cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 48, 2, a
333cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 64, 4, a
334cabdff1aSopenharmony_ci
335cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL
336cabdff1aSopenharmony_ciINIT_YMM avx2
337cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 32, 1, a
338cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 48, 1, u
339cabdff1aSopenharmony_ciHEVC_SAO_EDGE_FILTER 64, 2, a
340cabdff1aSopenharmony_ci%endif
341