1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * SIMD-optimized motion estimation
3cabdff1aSopenharmony_ci * Copyright (c) 2000, 2001 Fabrice Bellard
4cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * This file is part of FFmpeg.
9cabdff1aSopenharmony_ci *
10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
14cabdff1aSopenharmony_ci *
15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18cabdff1aSopenharmony_ci * Lesser General Public License for more details.
19cabdff1aSopenharmony_ci *
20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23cabdff1aSopenharmony_ci */
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
26cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
27cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
28cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h"
29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
30cabdff1aSopenharmony_ci#include "libavcodec/me_cmp.h"
31cabdff1aSopenharmony_ci#include "libavcodec/mpegvideo.h"
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ciint ff_sum_abs_dctelem_sse2(int16_t *block);
34cabdff1aSopenharmony_ciint ff_sum_abs_dctelem_ssse3(int16_t *block);
35cabdff1aSopenharmony_ciint ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
36cabdff1aSopenharmony_ci                ptrdiff_t stride, int h);
37cabdff1aSopenharmony_ciint ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
38cabdff1aSopenharmony_ci                 ptrdiff_t stride, int h);
39cabdff1aSopenharmony_ciint ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
40cabdff1aSopenharmony_ci                  ptrdiff_t stride, int h);
41cabdff1aSopenharmony_ciint ff_hf_noise8_mmx(uint8_t *pix1, ptrdiff_t stride, int h);
42cabdff1aSopenharmony_ciint ff_hf_noise16_mmx(uint8_t *pix1, ptrdiff_t stride, int h);
43cabdff1aSopenharmony_ciint ff_sad8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
44cabdff1aSopenharmony_ci                   ptrdiff_t stride, int h);
45cabdff1aSopenharmony_ciint ff_sad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
46cabdff1aSopenharmony_ci                    ptrdiff_t stride, int h);
47cabdff1aSopenharmony_ciint ff_sad16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
48cabdff1aSopenharmony_ci                  ptrdiff_t stride, int h);
49cabdff1aSopenharmony_ciint ff_sad8_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
50cabdff1aSopenharmony_ci                      ptrdiff_t stride, int h);
51cabdff1aSopenharmony_ciint ff_sad16_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
52cabdff1aSopenharmony_ci                       ptrdiff_t stride, int h);
53cabdff1aSopenharmony_ciint ff_sad16_x2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
54cabdff1aSopenharmony_ci                     ptrdiff_t stride, int h);
55cabdff1aSopenharmony_ciint ff_sad8_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
56cabdff1aSopenharmony_ci                      ptrdiff_t stride, int h);
57cabdff1aSopenharmony_ciint ff_sad16_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
58cabdff1aSopenharmony_ci                       ptrdiff_t stride, int h);
59cabdff1aSopenharmony_ciint ff_sad16_y2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
60cabdff1aSopenharmony_ci                     ptrdiff_t stride, int h);
61cabdff1aSopenharmony_ciint ff_sad8_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
62cabdff1aSopenharmony_ci                              ptrdiff_t stride, int h);
63cabdff1aSopenharmony_ciint ff_sad16_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
64cabdff1aSopenharmony_ci                               ptrdiff_t stride, int h);
65cabdff1aSopenharmony_ciint ff_sad16_approx_xy2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
66cabdff1aSopenharmony_ci                             ptrdiff_t stride, int h);
67cabdff1aSopenharmony_ciint ff_vsad_intra8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
68cabdff1aSopenharmony_ci                          ptrdiff_t stride, int h);
69cabdff1aSopenharmony_ciint ff_vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
70cabdff1aSopenharmony_ci                           ptrdiff_t stride, int h);
71cabdff1aSopenharmony_ciint ff_vsad_intra16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
72cabdff1aSopenharmony_ci                         ptrdiff_t stride, int h);
73cabdff1aSopenharmony_ciint ff_vsad8_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
74cabdff1aSopenharmony_ci                    ptrdiff_t stride, int h);
75cabdff1aSopenharmony_ciint ff_vsad16_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
76cabdff1aSopenharmony_ci                     ptrdiff_t stride, int h);
77cabdff1aSopenharmony_ciint ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
78cabdff1aSopenharmony_ci                   ptrdiff_t stride, int h);
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_ci#define hadamard_func(cpu)                                                    \
81cabdff1aSopenharmony_ci    int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1,           \
82cabdff1aSopenharmony_ci                                  uint8_t *src2, ptrdiff_t stride, int h);    \
83cabdff1aSopenharmony_ci    int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1,         \
84cabdff1aSopenharmony_ci                                    uint8_t *src2, ptrdiff_t stride, int h);
85cabdff1aSopenharmony_ci
86cabdff1aSopenharmony_cihadamard_func(mmxext)
87cabdff1aSopenharmony_cihadamard_func(sse2)
88cabdff1aSopenharmony_cihadamard_func(ssse3)
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci#if HAVE_X86ASM
91cabdff1aSopenharmony_cistatic int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
92cabdff1aSopenharmony_ci                      ptrdiff_t stride, int h)
93cabdff1aSopenharmony_ci{
94cabdff1aSopenharmony_ci    int score1, score2;
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_ci    if (c)
97cabdff1aSopenharmony_ci        score1 = c->mecc.sse[0](c, pix1, pix2, stride, h);
98cabdff1aSopenharmony_ci    else
99cabdff1aSopenharmony_ci        score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
100cabdff1aSopenharmony_ci    score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)
101cabdff1aSopenharmony_ci           - ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h);
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci    if (c)
104cabdff1aSopenharmony_ci        return score1 + FFABS(score2) * c->avctx->nsse_weight;
105cabdff1aSopenharmony_ci    else
106cabdff1aSopenharmony_ci        return score1 + FFABS(score2) * 8;
107cabdff1aSopenharmony_ci}
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_cistatic int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
110cabdff1aSopenharmony_ci                     ptrdiff_t stride, int h)
111cabdff1aSopenharmony_ci{
112cabdff1aSopenharmony_ci    int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h);
113cabdff1aSopenharmony_ci    int score2 = ff_hf_noise8_mmx(pix1, stride, h) -
114cabdff1aSopenharmony_ci                 ff_hf_noise8_mmx(pix2, stride, h);
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci    if (c)
117cabdff1aSopenharmony_ci        return score1 + FFABS(score2) * c->avctx->nsse_weight;
118cabdff1aSopenharmony_ci    else
119cabdff1aSopenharmony_ci        return score1 + FFABS(score2) * 8;
120cabdff1aSopenharmony_ci}
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
127cabdff1aSopenharmony_ci    0x0000000000000000ULL,
128cabdff1aSopenharmony_ci    0x0001000100010001ULL,
129cabdff1aSopenharmony_ci    0x0002000200020002ULL,
130cabdff1aSopenharmony_ci};
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_cistatic inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
133cabdff1aSopenharmony_ci                              ptrdiff_t stride, int h)
134cabdff1aSopenharmony_ci{
135cabdff1aSopenharmony_ci    x86_reg len = -stride * h;
136cabdff1aSopenharmony_ci    __asm__ volatile (
137cabdff1aSopenharmony_ci        "movq  (%1, %%"FF_REG_a"), %%mm0\n\t"
138cabdff1aSopenharmony_ci        "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
139cabdff1aSopenharmony_ci        "movq %%mm0, %%mm1              \n\t"
140cabdff1aSopenharmony_ci        "movq %%mm2, %%mm3              \n\t"
141cabdff1aSopenharmony_ci        "punpcklbw %%mm7, %%mm0         \n\t"
142cabdff1aSopenharmony_ci        "punpckhbw %%mm7, %%mm1         \n\t"
143cabdff1aSopenharmony_ci        "punpcklbw %%mm7, %%mm2         \n\t"
144cabdff1aSopenharmony_ci        "punpckhbw %%mm7, %%mm3         \n\t"
145cabdff1aSopenharmony_ci        "paddw %%mm2, %%mm0             \n\t"
146cabdff1aSopenharmony_ci        "paddw %%mm3, %%mm1             \n\t"
147cabdff1aSopenharmony_ci        ".p2align 4                     \n\t"
148cabdff1aSopenharmony_ci        "1:                             \n\t"
149cabdff1aSopenharmony_ci        "movq  (%2, %%"FF_REG_a"), %%mm2\n\t"
150cabdff1aSopenharmony_ci        "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
151cabdff1aSopenharmony_ci        "movq %%mm2, %%mm3              \n\t"
152cabdff1aSopenharmony_ci        "movq %%mm4, %%mm5              \n\t"
153cabdff1aSopenharmony_ci        "punpcklbw %%mm7, %%mm2         \n\t"
154cabdff1aSopenharmony_ci        "punpckhbw %%mm7, %%mm3         \n\t"
155cabdff1aSopenharmony_ci        "punpcklbw %%mm7, %%mm4         \n\t"
156cabdff1aSopenharmony_ci        "punpckhbw %%mm7, %%mm5         \n\t"
157cabdff1aSopenharmony_ci        "paddw %%mm4, %%mm2             \n\t"
158cabdff1aSopenharmony_ci        "paddw %%mm5, %%mm3             \n\t"
159cabdff1aSopenharmony_ci        "movq %5, %%mm5                 \n\t"
160cabdff1aSopenharmony_ci        "paddw %%mm2, %%mm0             \n\t"
161cabdff1aSopenharmony_ci        "paddw %%mm3, %%mm1             \n\t"
162cabdff1aSopenharmony_ci        "paddw %%mm5, %%mm0             \n\t"
163cabdff1aSopenharmony_ci        "paddw %%mm5, %%mm1             \n\t"
164cabdff1aSopenharmony_ci        "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
165cabdff1aSopenharmony_ci        "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
166cabdff1aSopenharmony_ci        "psrlw $2, %%mm0                \n\t"
167cabdff1aSopenharmony_ci        "psrlw $2, %%mm1                \n\t"
168cabdff1aSopenharmony_ci        "packuswb %%mm1, %%mm0          \n\t"
169cabdff1aSopenharmony_ci        "psubusb %%mm0, %%mm4           \n\t"
170cabdff1aSopenharmony_ci        "psubusb %%mm5, %%mm0           \n\t"
171cabdff1aSopenharmony_ci        "por %%mm4, %%mm0               \n\t"
172cabdff1aSopenharmony_ci        "movq %%mm0, %%mm4              \n\t"
173cabdff1aSopenharmony_ci        "punpcklbw %%mm7, %%mm0         \n\t"
174cabdff1aSopenharmony_ci        "punpckhbw %%mm7, %%mm4         \n\t"
175cabdff1aSopenharmony_ci        "paddw %%mm0, %%mm6             \n\t"
176cabdff1aSopenharmony_ci        "paddw %%mm4, %%mm6             \n\t"
177cabdff1aSopenharmony_ci        "movq  %%mm2, %%mm0             \n\t"
178cabdff1aSopenharmony_ci        "movq  %%mm3, %%mm1             \n\t"
179cabdff1aSopenharmony_ci        "add %4, %%"FF_REG_a"           \n\t"
180cabdff1aSopenharmony_ci        " js 1b                         \n\t"
181cabdff1aSopenharmony_ci        : "+a" (len)
182cabdff1aSopenharmony_ci        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
183cabdff1aSopenharmony_ci          "r" (stride), "m" (round_tab[2]));
184cabdff1aSopenharmony_ci}
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_cistatic inline int sum_mmx(void)
187cabdff1aSopenharmony_ci{
188cabdff1aSopenharmony_ci    int ret;
189cabdff1aSopenharmony_ci    __asm__ volatile (
190cabdff1aSopenharmony_ci        "movq %%mm6, %%mm0              \n\t"
191cabdff1aSopenharmony_ci        "psrlq $32, %%mm6               \n\t"
192cabdff1aSopenharmony_ci        "paddw %%mm0, %%mm6             \n\t"
193cabdff1aSopenharmony_ci        "movq %%mm6, %%mm0              \n\t"
194cabdff1aSopenharmony_ci        "psrlq $16, %%mm6               \n\t"
195cabdff1aSopenharmony_ci        "paddw %%mm0, %%mm6             \n\t"
196cabdff1aSopenharmony_ci        "movd %%mm6, %0                 \n\t"
197cabdff1aSopenharmony_ci        : "=r" (ret));
198cabdff1aSopenharmony_ci    return ret & 0xFFFF;
199cabdff1aSopenharmony_ci}
200cabdff1aSopenharmony_ci
201cabdff1aSopenharmony_ci#define PIX_SADXY(suf)                                                  \
202cabdff1aSopenharmony_cistatic int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2,           \
203cabdff1aSopenharmony_ci                            uint8_t *blk1, ptrdiff_t stride, int h)     \
204cabdff1aSopenharmony_ci{                                                                       \
205cabdff1aSopenharmony_ci    av_assert2(h == 8);                                                     \
206cabdff1aSopenharmony_ci    __asm__ volatile (                                                  \
207cabdff1aSopenharmony_ci        "pxor %%mm7, %%mm7     \n\t"                                    \
208cabdff1aSopenharmony_ci        "pxor %%mm6, %%mm6     \n\t"                                    \
209cabdff1aSopenharmony_ci        ::);                                                            \
210cabdff1aSopenharmony_ci                                                                        \
211cabdff1aSopenharmony_ci    sad8_4_ ## suf(blk1, blk2, stride, 8);                              \
212cabdff1aSopenharmony_ci                                                                        \
213cabdff1aSopenharmony_ci    return sum_ ## suf();                                               \
214cabdff1aSopenharmony_ci}                                                                       \
215cabdff1aSopenharmony_ci                                                                        \
216cabdff1aSopenharmony_cistatic int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2,          \
217cabdff1aSopenharmony_ci                             uint8_t *blk1, ptrdiff_t stride, int h)    \
218cabdff1aSopenharmony_ci{                                                                       \
219cabdff1aSopenharmony_ci    __asm__ volatile (                                                  \
220cabdff1aSopenharmony_ci        "pxor %%mm7, %%mm7     \n\t"                                    \
221cabdff1aSopenharmony_ci        "pxor %%mm6, %%mm6     \n\t"                                    \
222cabdff1aSopenharmony_ci        ::);                                                            \
223cabdff1aSopenharmony_ci                                                                        \
224cabdff1aSopenharmony_ci    sad8_4_ ## suf(blk1,     blk2,     stride, h);                      \
225cabdff1aSopenharmony_ci    sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h);                      \
226cabdff1aSopenharmony_ci                                                                        \
227cabdff1aSopenharmony_ci    return sum_ ## suf();                                               \
228cabdff1aSopenharmony_ci}                                                                       \
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ciPIX_SADXY(mmx)
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
233cabdff1aSopenharmony_ci
234cabdff1aSopenharmony_ciav_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
235cabdff1aSopenharmony_ci{
236cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
239cabdff1aSopenharmony_ci    if (INLINE_MMX(cpu_flags)) {
240cabdff1aSopenharmony_ci        c->pix_abs[0][3] = sad16_xy2_mmx;
241cabdff1aSopenharmony_ci        c->pix_abs[1][3] = sad8_xy2_mmx;
242cabdff1aSopenharmony_ci    }
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci    if (EXTERNAL_MMX(cpu_flags)) {
247cabdff1aSopenharmony_ci        c->sse[1]            = ff_sse8_mmx;
248cabdff1aSopenharmony_ci#if HAVE_X86ASM
249cabdff1aSopenharmony_ci        c->nsse[0]           = nsse16_mmx;
250cabdff1aSopenharmony_ci        c->nsse[1]           = nsse8_mmx;
251cabdff1aSopenharmony_ci#endif
252cabdff1aSopenharmony_ci    }
253cabdff1aSopenharmony_ci
254cabdff1aSopenharmony_ci    if (EXTERNAL_MMXEXT(cpu_flags)) {
255cabdff1aSopenharmony_ci#if !HAVE_ALIGNED_STACK
256cabdff1aSopenharmony_ci        c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
257cabdff1aSopenharmony_ci        c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
258cabdff1aSopenharmony_ci#endif
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci        c->sad[0] = ff_sad16_mmxext;
261cabdff1aSopenharmony_ci        c->sad[1] = ff_sad8_mmxext;
262cabdff1aSopenharmony_ci
263cabdff1aSopenharmony_ci        c->pix_abs[0][0] = ff_sad16_mmxext;
264cabdff1aSopenharmony_ci        c->pix_abs[0][1] = ff_sad16_x2_mmxext;
265cabdff1aSopenharmony_ci        c->pix_abs[0][2] = ff_sad16_y2_mmxext;
266cabdff1aSopenharmony_ci        c->pix_abs[1][0] = ff_sad8_mmxext;
267cabdff1aSopenharmony_ci        c->pix_abs[1][1] = ff_sad8_x2_mmxext;
268cabdff1aSopenharmony_ci        c->pix_abs[1][2] = ff_sad8_y2_mmxext;
269cabdff1aSopenharmony_ci
270cabdff1aSopenharmony_ci        c->vsad[4] = ff_vsad_intra16_mmxext;
271cabdff1aSopenharmony_ci        c->vsad[5] = ff_vsad_intra8_mmxext;
272cabdff1aSopenharmony_ci
273cabdff1aSopenharmony_ci        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
274cabdff1aSopenharmony_ci            c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
275cabdff1aSopenharmony_ci            c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
276cabdff1aSopenharmony_ci
277cabdff1aSopenharmony_ci            c->vsad[0] = ff_vsad16_approx_mmxext;
278cabdff1aSopenharmony_ci            c->vsad[1] = ff_vsad8_approx_mmxext;
279cabdff1aSopenharmony_ci        }
280cabdff1aSopenharmony_ci    }
281cabdff1aSopenharmony_ci
282cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(cpu_flags)) {
283cabdff1aSopenharmony_ci        c->sse[0] = ff_sse16_sse2;
284cabdff1aSopenharmony_ci        c->sum_abs_dctelem   = ff_sum_abs_dctelem_sse2;
285cabdff1aSopenharmony_ci
286cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK
287cabdff1aSopenharmony_ci        c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
288cabdff1aSopenharmony_ci        c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
289cabdff1aSopenharmony_ci#endif
290cabdff1aSopenharmony_ci        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
291cabdff1aSopenharmony_ci            c->sad[0]        = ff_sad16_sse2;
292cabdff1aSopenharmony_ci            c->pix_abs[0][0] = ff_sad16_sse2;
293cabdff1aSopenharmony_ci            c->pix_abs[0][1] = ff_sad16_x2_sse2;
294cabdff1aSopenharmony_ci            c->pix_abs[0][2] = ff_sad16_y2_sse2;
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci            c->vsad[4]       = ff_vsad_intra16_sse2;
297cabdff1aSopenharmony_ci            if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
298cabdff1aSopenharmony_ci                c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
299cabdff1aSopenharmony_ci                c->vsad[0]       = ff_vsad16_approx_sse2;
300cabdff1aSopenharmony_ci            }
301cabdff1aSopenharmony_ci        }
302cabdff1aSopenharmony_ci    }
303cabdff1aSopenharmony_ci
304cabdff1aSopenharmony_ci    if (EXTERNAL_SSSE3(cpu_flags)) {
305cabdff1aSopenharmony_ci        c->sum_abs_dctelem   = ff_sum_abs_dctelem_ssse3;
306cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK
307cabdff1aSopenharmony_ci        c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
308cabdff1aSopenharmony_ci        c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
309cabdff1aSopenharmony_ci#endif
310cabdff1aSopenharmony_ci    }
311cabdff1aSopenharmony_ci}
312