1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * SIMD-optimized motion estimation 3cabdff1aSopenharmony_ci * Copyright (c) 2000, 2001 Fabrice Bellard 4cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * MMX optimization by Nick Kurshev <nickols_k@mail.ru> 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * This file is part of FFmpeg. 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 14cabdff1aSopenharmony_ci * 15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18cabdff1aSopenharmony_ci * Lesser General Public License for more details. 19cabdff1aSopenharmony_ci * 20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23cabdff1aSopenharmony_ci */ 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 26cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 27cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 28cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 30cabdff1aSopenharmony_ci#include "libavcodec/me_cmp.h" 31cabdff1aSopenharmony_ci#include "libavcodec/mpegvideo.h" 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ciint ff_sum_abs_dctelem_sse2(int16_t *block); 34cabdff1aSopenharmony_ciint ff_sum_abs_dctelem_ssse3(int16_t *block); 35cabdff1aSopenharmony_ciint ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 36cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 37cabdff1aSopenharmony_ciint ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 38cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 39cabdff1aSopenharmony_ciint ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 40cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 41cabdff1aSopenharmony_ciint ff_hf_noise8_mmx(uint8_t *pix1, ptrdiff_t stride, int h); 42cabdff1aSopenharmony_ciint ff_hf_noise16_mmx(uint8_t *pix1, ptrdiff_t stride, int h); 43cabdff1aSopenharmony_ciint ff_sad8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 44cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 45cabdff1aSopenharmony_ciint ff_sad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 46cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 47cabdff1aSopenharmony_ciint ff_sad16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 48cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 49cabdff1aSopenharmony_ciint ff_sad8_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 50cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 51cabdff1aSopenharmony_ciint ff_sad16_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 52cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 53cabdff1aSopenharmony_ciint ff_sad16_x2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 54cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 55cabdff1aSopenharmony_ciint ff_sad8_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 56cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 57cabdff1aSopenharmony_ciint ff_sad16_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 58cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 59cabdff1aSopenharmony_ciint ff_sad16_y2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 60cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 61cabdff1aSopenharmony_ciint ff_sad8_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 62cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 63cabdff1aSopenharmony_ciint ff_sad16_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 64cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 65cabdff1aSopenharmony_ciint ff_sad16_approx_xy2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 66cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 67cabdff1aSopenharmony_ciint ff_vsad_intra8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 68cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 69cabdff1aSopenharmony_ciint ff_vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 70cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 71cabdff1aSopenharmony_ciint ff_vsad_intra16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 72cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 73cabdff1aSopenharmony_ciint ff_vsad8_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 74cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 75cabdff1aSopenharmony_ciint ff_vsad16_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 76cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 77cabdff1aSopenharmony_ciint ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 78cabdff1aSopenharmony_ci ptrdiff_t stride, int h); 79cabdff1aSopenharmony_ci 80cabdff1aSopenharmony_ci#define hadamard_func(cpu) \ 81cabdff1aSopenharmony_ci int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \ 82cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h); \ 83cabdff1aSopenharmony_ci int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \ 84cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h); 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_cihadamard_func(mmxext) 87cabdff1aSopenharmony_cihadamard_func(sse2) 88cabdff1aSopenharmony_cihadamard_func(ssse3) 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci#if HAVE_X86ASM 91cabdff1aSopenharmony_cistatic int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2, 92cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 93cabdff1aSopenharmony_ci{ 94cabdff1aSopenharmony_ci int score1, score2; 95cabdff1aSopenharmony_ci 96cabdff1aSopenharmony_ci if (c) 97cabdff1aSopenharmony_ci score1 = c->mecc.sse[0](c, pix1, pix2, stride, h); 98cabdff1aSopenharmony_ci else 99cabdff1aSopenharmony_ci score1 = ff_sse16_mmx(c, pix1, pix2, stride, h); 100cabdff1aSopenharmony_ci score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h) 101cabdff1aSopenharmony_ci - ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h); 102cabdff1aSopenharmony_ci 103cabdff1aSopenharmony_ci if (c) 104cabdff1aSopenharmony_ci return score1 + FFABS(score2) * c->avctx->nsse_weight; 105cabdff1aSopenharmony_ci else 106cabdff1aSopenharmony_ci return score1 + FFABS(score2) * 8; 107cabdff1aSopenharmony_ci} 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_cistatic int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2, 110cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 111cabdff1aSopenharmony_ci{ 112cabdff1aSopenharmony_ci int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h); 113cabdff1aSopenharmony_ci int score2 = ff_hf_noise8_mmx(pix1, stride, h) - 114cabdff1aSopenharmony_ci ff_hf_noise8_mmx(pix2, stride, h); 115cabdff1aSopenharmony_ci 116cabdff1aSopenharmony_ci if (c) 117cabdff1aSopenharmony_ci return score1 + FFABS(score2) * c->avctx->nsse_weight; 118cabdff1aSopenharmony_ci else 119cabdff1aSopenharmony_ci return score1 + FFABS(score2) * 8; 120cabdff1aSopenharmony_ci} 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = { 127cabdff1aSopenharmony_ci 0x0000000000000000ULL, 128cabdff1aSopenharmony_ci 0x0001000100010001ULL, 129cabdff1aSopenharmony_ci 0x0002000200020002ULL, 130cabdff1aSopenharmony_ci}; 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_cistatic inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, 133cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 134cabdff1aSopenharmony_ci{ 135cabdff1aSopenharmony_ci x86_reg len = -stride * h; 136cabdff1aSopenharmony_ci __asm__ volatile ( 137cabdff1aSopenharmony_ci "movq (%1, %%"FF_REG_a"), %%mm0\n\t" 138cabdff1aSopenharmony_ci "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t" 139cabdff1aSopenharmony_ci "movq %%mm0, %%mm1 \n\t" 140cabdff1aSopenharmony_ci "movq %%mm2, %%mm3 \n\t" 141cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm0 \n\t" 142cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm1 \n\t" 143cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm2 \n\t" 144cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm3 \n\t" 145cabdff1aSopenharmony_ci "paddw %%mm2, %%mm0 \n\t" 146cabdff1aSopenharmony_ci "paddw %%mm3, %%mm1 \n\t" 147cabdff1aSopenharmony_ci ".p2align 4 \n\t" 148cabdff1aSopenharmony_ci "1: \n\t" 149cabdff1aSopenharmony_ci "movq (%2, %%"FF_REG_a"), %%mm2\n\t" 150cabdff1aSopenharmony_ci "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t" 151cabdff1aSopenharmony_ci "movq %%mm2, %%mm3 \n\t" 152cabdff1aSopenharmony_ci "movq %%mm4, %%mm5 \n\t" 153cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm2 \n\t" 154cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm3 \n\t" 155cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm4 \n\t" 156cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm5 \n\t" 157cabdff1aSopenharmony_ci "paddw %%mm4, %%mm2 \n\t" 158cabdff1aSopenharmony_ci "paddw %%mm5, %%mm3 \n\t" 159cabdff1aSopenharmony_ci "movq %5, %%mm5 \n\t" 160cabdff1aSopenharmony_ci "paddw %%mm2, %%mm0 \n\t" 161cabdff1aSopenharmony_ci "paddw %%mm3, %%mm1 \n\t" 162cabdff1aSopenharmony_ci "paddw %%mm5, %%mm0 \n\t" 163cabdff1aSopenharmony_ci "paddw %%mm5, %%mm1 \n\t" 164cabdff1aSopenharmony_ci "movq (%3, %%"FF_REG_a"), %%mm4 \n\t" 165cabdff1aSopenharmony_ci "movq (%3, %%"FF_REG_a"), %%mm5 \n\t" 166cabdff1aSopenharmony_ci "psrlw $2, %%mm0 \n\t" 167cabdff1aSopenharmony_ci "psrlw $2, %%mm1 \n\t" 168cabdff1aSopenharmony_ci "packuswb %%mm1, %%mm0 \n\t" 169cabdff1aSopenharmony_ci "psubusb %%mm0, %%mm4 \n\t" 170cabdff1aSopenharmony_ci "psubusb %%mm5, %%mm0 \n\t" 171cabdff1aSopenharmony_ci "por %%mm4, %%mm0 \n\t" 172cabdff1aSopenharmony_ci "movq %%mm0, %%mm4 \n\t" 173cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm0 \n\t" 174cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm4 \n\t" 175cabdff1aSopenharmony_ci "paddw %%mm0, %%mm6 \n\t" 176cabdff1aSopenharmony_ci "paddw %%mm4, %%mm6 \n\t" 177cabdff1aSopenharmony_ci "movq %%mm2, %%mm0 \n\t" 178cabdff1aSopenharmony_ci "movq %%mm3, %%mm1 \n\t" 179cabdff1aSopenharmony_ci "add %4, %%"FF_REG_a" \n\t" 180cabdff1aSopenharmony_ci " js 1b \n\t" 181cabdff1aSopenharmony_ci : "+a" (len) 182cabdff1aSopenharmony_ci : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), 183cabdff1aSopenharmony_ci "r" (stride), "m" (round_tab[2])); 184cabdff1aSopenharmony_ci} 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_cistatic inline int sum_mmx(void) 187cabdff1aSopenharmony_ci{ 188cabdff1aSopenharmony_ci int ret; 189cabdff1aSopenharmony_ci __asm__ volatile ( 190cabdff1aSopenharmony_ci "movq %%mm6, %%mm0 \n\t" 191cabdff1aSopenharmony_ci "psrlq $32, %%mm6 \n\t" 192cabdff1aSopenharmony_ci "paddw %%mm0, %%mm6 \n\t" 193cabdff1aSopenharmony_ci "movq %%mm6, %%mm0 \n\t" 194cabdff1aSopenharmony_ci "psrlq $16, %%mm6 \n\t" 195cabdff1aSopenharmony_ci "paddw %%mm0, %%mm6 \n\t" 196cabdff1aSopenharmony_ci "movd %%mm6, %0 \n\t" 197cabdff1aSopenharmony_ci : "=r" (ret)); 198cabdff1aSopenharmony_ci return ret & 0xFFFF; 199cabdff1aSopenharmony_ci} 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_ci#define PIX_SADXY(suf) \ 202cabdff1aSopenharmony_cistatic int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \ 203cabdff1aSopenharmony_ci uint8_t *blk1, ptrdiff_t stride, int h) \ 204cabdff1aSopenharmony_ci{ \ 205cabdff1aSopenharmony_ci av_assert2(h == 8); \ 206cabdff1aSopenharmony_ci __asm__ volatile ( \ 207cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t" \ 208cabdff1aSopenharmony_ci "pxor %%mm6, %%mm6 \n\t" \ 209cabdff1aSopenharmony_ci ::); \ 210cabdff1aSopenharmony_ci \ 211cabdff1aSopenharmony_ci sad8_4_ ## suf(blk1, blk2, stride, 8); \ 212cabdff1aSopenharmony_ci \ 213cabdff1aSopenharmony_ci return sum_ ## suf(); \ 214cabdff1aSopenharmony_ci} \ 215cabdff1aSopenharmony_ci \ 216cabdff1aSopenharmony_cistatic int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \ 217cabdff1aSopenharmony_ci uint8_t *blk1, ptrdiff_t stride, int h) \ 218cabdff1aSopenharmony_ci{ \ 219cabdff1aSopenharmony_ci __asm__ volatile ( \ 220cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t" \ 221cabdff1aSopenharmony_ci "pxor %%mm6, %%mm6 \n\t" \ 222cabdff1aSopenharmony_ci ::); \ 223cabdff1aSopenharmony_ci \ 224cabdff1aSopenharmony_ci sad8_4_ ## suf(blk1, blk2, stride, h); \ 225cabdff1aSopenharmony_ci sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \ 226cabdff1aSopenharmony_ci \ 227cabdff1aSopenharmony_ci return sum_ ## suf(); \ 228cabdff1aSopenharmony_ci} \ 229cabdff1aSopenharmony_ci 230cabdff1aSopenharmony_ciPIX_SADXY(mmx) 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_ciav_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx) 235cabdff1aSopenharmony_ci{ 236cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 239cabdff1aSopenharmony_ci if (INLINE_MMX(cpu_flags)) { 240cabdff1aSopenharmony_ci c->pix_abs[0][3] = sad16_xy2_mmx; 241cabdff1aSopenharmony_ci c->pix_abs[1][3] = sad8_xy2_mmx; 242cabdff1aSopenharmony_ci } 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci if (EXTERNAL_MMX(cpu_flags)) { 247cabdff1aSopenharmony_ci c->sse[1] = ff_sse8_mmx; 248cabdff1aSopenharmony_ci#if HAVE_X86ASM 249cabdff1aSopenharmony_ci c->nsse[0] = nsse16_mmx; 250cabdff1aSopenharmony_ci c->nsse[1] = nsse8_mmx; 251cabdff1aSopenharmony_ci#endif 252cabdff1aSopenharmony_ci } 253cabdff1aSopenharmony_ci 254cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 255cabdff1aSopenharmony_ci#if !HAVE_ALIGNED_STACK 256cabdff1aSopenharmony_ci c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext; 257cabdff1aSopenharmony_ci c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext; 258cabdff1aSopenharmony_ci#endif 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci c->sad[0] = ff_sad16_mmxext; 261cabdff1aSopenharmony_ci c->sad[1] = ff_sad8_mmxext; 262cabdff1aSopenharmony_ci 263cabdff1aSopenharmony_ci c->pix_abs[0][0] = ff_sad16_mmxext; 264cabdff1aSopenharmony_ci c->pix_abs[0][1] = ff_sad16_x2_mmxext; 265cabdff1aSopenharmony_ci c->pix_abs[0][2] = ff_sad16_y2_mmxext; 266cabdff1aSopenharmony_ci c->pix_abs[1][0] = ff_sad8_mmxext; 267cabdff1aSopenharmony_ci c->pix_abs[1][1] = ff_sad8_x2_mmxext; 268cabdff1aSopenharmony_ci c->pix_abs[1][2] = ff_sad8_y2_mmxext; 269cabdff1aSopenharmony_ci 270cabdff1aSopenharmony_ci c->vsad[4] = ff_vsad_intra16_mmxext; 271cabdff1aSopenharmony_ci c->vsad[5] = ff_vsad_intra8_mmxext; 272cabdff1aSopenharmony_ci 273cabdff1aSopenharmony_ci if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) { 274cabdff1aSopenharmony_ci c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext; 275cabdff1aSopenharmony_ci c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext; 276cabdff1aSopenharmony_ci 277cabdff1aSopenharmony_ci c->vsad[0] = ff_vsad16_approx_mmxext; 278cabdff1aSopenharmony_ci c->vsad[1] = ff_vsad8_approx_mmxext; 279cabdff1aSopenharmony_ci } 280cabdff1aSopenharmony_ci } 281cabdff1aSopenharmony_ci 282cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 283cabdff1aSopenharmony_ci c->sse[0] = ff_sse16_sse2; 284cabdff1aSopenharmony_ci c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; 285cabdff1aSopenharmony_ci 286cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 287cabdff1aSopenharmony_ci c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; 288cabdff1aSopenharmony_ci c->hadamard8_diff[1] = ff_hadamard8_diff_sse2; 289cabdff1aSopenharmony_ci#endif 290cabdff1aSopenharmony_ci if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { 291cabdff1aSopenharmony_ci c->sad[0] = ff_sad16_sse2; 292cabdff1aSopenharmony_ci c->pix_abs[0][0] = ff_sad16_sse2; 293cabdff1aSopenharmony_ci c->pix_abs[0][1] = ff_sad16_x2_sse2; 294cabdff1aSopenharmony_ci c->pix_abs[0][2] = ff_sad16_y2_sse2; 295cabdff1aSopenharmony_ci 296cabdff1aSopenharmony_ci c->vsad[4] = ff_vsad_intra16_sse2; 297cabdff1aSopenharmony_ci if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) { 298cabdff1aSopenharmony_ci c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2; 299cabdff1aSopenharmony_ci c->vsad[0] = ff_vsad16_approx_sse2; 300cabdff1aSopenharmony_ci } 301cabdff1aSopenharmony_ci } 302cabdff1aSopenharmony_ci } 303cabdff1aSopenharmony_ci 304cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 305cabdff1aSopenharmony_ci c->sum_abs_dctelem = ff_sum_abs_dctelem_ssse3; 306cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 307cabdff1aSopenharmony_ci c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; 308cabdff1aSopenharmony_ci c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; 309cabdff1aSopenharmony_ci#endif 310cabdff1aSopenharmony_ci } 311cabdff1aSopenharmony_ci} 312