1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 22cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 23cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 24cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 25cabdff1aSopenharmony_ci#include "libavcodec/h264dsp.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci/***********************************/ 28cabdff1aSopenharmony_ci/* IDCT */ 29cabdff1aSopenharmony_ci#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ 30cabdff1aSopenharmony_civoid ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 31cabdff1aSopenharmony_ci int16_t *block, \ 32cabdff1aSopenharmony_ci int stride); 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ciIDCT_ADD_FUNC(, 8, sse2) 35cabdff1aSopenharmony_ciIDCT_ADD_FUNC(, 8, avx) 36cabdff1aSopenharmony_ciIDCT_ADD_FUNC(, 10, sse2) 37cabdff1aSopenharmony_ciIDCT_ADD_FUNC(_dc, 8, sse2) 38cabdff1aSopenharmony_ciIDCT_ADD_FUNC(_dc, 8, avx) 39cabdff1aSopenharmony_ciIDCT_ADD_FUNC(_dc, 10, mmxext) 40cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8_dc, 8, mmxext) 41cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8_dc, 10, sse2) 42cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8, 8, sse2) 43cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8, 10, sse2) 44cabdff1aSopenharmony_ciIDCT_ADD_FUNC(, 10, avx) 45cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8_dc, 10, avx) 46cabdff1aSopenharmony_ciIDCT_ADD_FUNC(8, 10, avx) 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ 50cabdff1aSopenharmony_civoid ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ 51cabdff1aSopenharmony_ci (uint8_t *dst, const int *block_offset, \ 52cabdff1aSopenharmony_ci int16_t *block, int stride, const uint8_t nnzc[5 * 8]); 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(8, 4, 8, sse2) 55cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(8, 4, 10, sse2) 56cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(8, 4, 10, avx) 57cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16, 8, sse2) 58cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16, 10, sse2) 59cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16intra, 8, sse2) 60cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16intra, 10, sse2) 61cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16, 10, avx) 62cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC(, 16intra, 10, avx) 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ 66cabdff1aSopenharmony_civoid ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ 67cabdff1aSopenharmony_ci (uint8_t **dst, const int *block_offset, \ 68cabdff1aSopenharmony_ci int16_t *block, int stride, const uint8_t nnzc[15 * 8]); 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8, 8, sse2) 71cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8, 10, sse2) 72cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8, 10, avx) 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8_422, 8, mmx) 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8_422, 10, sse2) 77cabdff1aSopenharmony_ciIDCT_ADD_REP_FUNC2(, 8_422, 10, avx) 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_civoid ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul); 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_ci/***********************************/ 82cabdff1aSopenharmony_ci/* deblocking */ 83cabdff1aSopenharmony_ci 84cabdff1aSopenharmony_civoid ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40], 85cabdff1aSopenharmony_ci int8_t ref[2][40], 86cabdff1aSopenharmony_ci int16_t mv[2][40][2], 87cabdff1aSopenharmony_ci int bidir, int edges, int step, 88cabdff1aSopenharmony_ci int mask_mv0, int mask_mv1, int field); 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ 91cabdff1aSopenharmony_civoid ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ 92cabdff1aSopenharmony_ci ptrdiff_t stride, \ 93cabdff1aSopenharmony_ci int alpha, \ 94cabdff1aSopenharmony_ci int beta, \ 95cabdff1aSopenharmony_ci int8_t *tc0); 96cabdff1aSopenharmony_ci#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ 97cabdff1aSopenharmony_civoid ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ 98cabdff1aSopenharmony_ci ptrdiff_t stride, \ 99cabdff1aSopenharmony_ci int alpha, \ 100cabdff1aSopenharmony_ci int beta); 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci#define LF_FUNCS(type, depth) \ 103cabdff1aSopenharmony_ciLF_FUNC(h, luma, depth, sse2) \ 104cabdff1aSopenharmony_ciLF_IFUNC(h, luma_intra, depth, sse2) \ 105cabdff1aSopenharmony_ciLF_FUNC(v, luma, depth, sse2) \ 106cabdff1aSopenharmony_ciLF_IFUNC(v, luma_intra, depth, sse2) \ 107cabdff1aSopenharmony_ciLF_FUNC(h, chroma, depth, sse2) \ 108cabdff1aSopenharmony_ciLF_IFUNC(h, chroma_intra, depth, sse2) \ 109cabdff1aSopenharmony_ciLF_FUNC(h, chroma422, depth, sse2) \ 110cabdff1aSopenharmony_ciLF_IFUNC(h, chroma422_intra, depth, sse2) \ 111cabdff1aSopenharmony_ciLF_FUNC(v, chroma, depth, sse2) \ 112cabdff1aSopenharmony_ciLF_IFUNC(v, chroma_intra, depth, sse2) \ 113cabdff1aSopenharmony_ciLF_FUNC(h, luma, depth, avx) \ 114cabdff1aSopenharmony_ciLF_IFUNC(h, luma_intra, depth, avx) \ 115cabdff1aSopenharmony_ciLF_FUNC(v, luma, depth, avx) \ 116cabdff1aSopenharmony_ciLF_IFUNC(v, luma_intra, depth, avx) \ 117cabdff1aSopenharmony_ciLF_FUNC(h, chroma, depth, avx) \ 118cabdff1aSopenharmony_ciLF_IFUNC(h, chroma_intra, depth, avx) \ 119cabdff1aSopenharmony_ciLF_FUNC(h, chroma422, depth, avx) \ 120cabdff1aSopenharmony_ciLF_IFUNC(h, chroma422_intra, depth, avx) \ 121cabdff1aSopenharmony_ciLF_FUNC(v, chroma, depth, avx) \ 122cabdff1aSopenharmony_ciLF_IFUNC(v, chroma_intra, depth, avx) 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ciLF_FUNC(h, luma_mbaff, 8, sse2) 125cabdff1aSopenharmony_ciLF_FUNC(h, luma_mbaff, 8, avx) 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ciLF_FUNCS(uint8_t, 8) 128cabdff1aSopenharmony_ciLF_FUNCS(uint16_t, 10) 129cabdff1aSopenharmony_ci 130cabdff1aSopenharmony_ciLF_FUNC(v, luma, 10, mmxext) 131cabdff1aSopenharmony_ciLF_FUNC(h, luma, 10, mmxext) 132cabdff1aSopenharmony_ciLF_IFUNC(v, luma_intra, 10, mmxext) 133cabdff1aSopenharmony_ciLF_IFUNC(h, luma_intra, 10, mmxext) 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci/***********************************/ 136cabdff1aSopenharmony_ci/* weighted prediction */ 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci#define H264_WEIGHT(W, OPT) \ 139cabdff1aSopenharmony_civoid ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, ptrdiff_t stride, \ 140cabdff1aSopenharmony_ci int height, int log2_denom, \ 141cabdff1aSopenharmony_ci int weight, int offset); 142cabdff1aSopenharmony_ci 143cabdff1aSopenharmony_ci#define H264_BIWEIGHT(W, OPT) \ 144cabdff1aSopenharmony_civoid ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \ 145cabdff1aSopenharmony_ci ptrdiff_t stride, int height, \ 146cabdff1aSopenharmony_ci int log2_denom, int weightd, \ 147cabdff1aSopenharmony_ci int weights, int offset); 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci#define H264_BIWEIGHT_MMX(W) \ 150cabdff1aSopenharmony_ci H264_WEIGHT(W, mmxext) \ 151cabdff1aSopenharmony_ci H264_BIWEIGHT(W, mmxext) 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci#define H264_BIWEIGHT_SSE(W) \ 154cabdff1aSopenharmony_ci H264_WEIGHT(W, sse2) \ 155cabdff1aSopenharmony_ci H264_BIWEIGHT(W, sse2) \ 156cabdff1aSopenharmony_ci H264_BIWEIGHT(W, ssse3) 157cabdff1aSopenharmony_ci 158cabdff1aSopenharmony_ciH264_BIWEIGHT_SSE(16) 159cabdff1aSopenharmony_ciH264_BIWEIGHT_SSE(8) 160cabdff1aSopenharmony_ciH264_BIWEIGHT_MMX(4) 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci#define H264_WEIGHT_10(W, DEPTH, OPT) \ 163cabdff1aSopenharmony_civoid ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 164cabdff1aSopenharmony_ci ptrdiff_t stride, \ 165cabdff1aSopenharmony_ci int height, \ 166cabdff1aSopenharmony_ci int log2_denom, \ 167cabdff1aSopenharmony_ci int weight, \ 168cabdff1aSopenharmony_ci int offset); 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci#define H264_BIWEIGHT_10(W, DEPTH, OPT) \ 171cabdff1aSopenharmony_civoid ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ 172cabdff1aSopenharmony_ci uint8_t *src, \ 173cabdff1aSopenharmony_ci ptrdiff_t stride, \ 174cabdff1aSopenharmony_ci int height, \ 175cabdff1aSopenharmony_ci int log2_denom, \ 176cabdff1aSopenharmony_ci int weightd, \ 177cabdff1aSopenharmony_ci int weights, \ 178cabdff1aSopenharmony_ci int offset); 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci#define H264_BIWEIGHT_10_SSE(W, DEPTH) \ 181cabdff1aSopenharmony_ci H264_WEIGHT_10(W, DEPTH, sse2) \ 182cabdff1aSopenharmony_ci H264_WEIGHT_10(W, DEPTH, sse4) \ 183cabdff1aSopenharmony_ci H264_BIWEIGHT_10(W, DEPTH, sse2) \ 184cabdff1aSopenharmony_ci H264_BIWEIGHT_10(W, DEPTH, sse4) 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_ciH264_BIWEIGHT_10_SSE(16, 10) 187cabdff1aSopenharmony_ciH264_BIWEIGHT_10_SSE(8, 10) 188cabdff1aSopenharmony_ciH264_BIWEIGHT_10_SSE(4, 10) 189cabdff1aSopenharmony_ci 190cabdff1aSopenharmony_ciav_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, 191cabdff1aSopenharmony_ci const int chroma_format_idc) 192cabdff1aSopenharmony_ci{ 193cabdff1aSopenharmony_ci#if HAVE_X86ASM 194cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1) 197cabdff1aSopenharmony_ci c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext; 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci if (bit_depth == 8) { 200cabdff1aSopenharmony_ci if (EXTERNAL_MMX(cpu_flags)) { 201cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 202cabdff1aSopenharmony_ci } else { 203cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx; 204cabdff1aSopenharmony_ci } 205cabdff1aSopenharmony_ci } 206cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 207cabdff1aSopenharmony_ci c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext; 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext; 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext; 212cabdff1aSopenharmony_ci } 213cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 214cabdff1aSopenharmony_ci c->h264_idct8_add = ff_h264_idct8_add_8_sse2; 215cabdff1aSopenharmony_ci 216cabdff1aSopenharmony_ci c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; 217cabdff1aSopenharmony_ci c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; 218cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) 219cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; 220cabdff1aSopenharmony_ci c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; 221cabdff1aSopenharmony_ci c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2; 222cabdff1aSopenharmony_ci 223cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2; 224cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2; 225cabdff1aSopenharmony_ci 226cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2; 227cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2; 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; 230cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; 231cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; 232cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_ci#if ARCH_X86_64 235cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2; 236cabdff1aSopenharmony_ci#endif 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_sse2; 239cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2; 240cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 241cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_sse2; 242cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2; 243cabdff1aSopenharmony_ci } else { 244cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2; 245cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2; 246cabdff1aSopenharmony_ci } 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_ci c->h264_idct_add = ff_h264_idct_add_8_sse2; 249cabdff1aSopenharmony_ci c->h264_idct_dc_add = ff_h264_idct_dc_add_8_sse2; 250cabdff1aSopenharmony_ci } 251cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 252cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; 253cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; 254cabdff1aSopenharmony_ci } 255cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 256cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; 257cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; 258cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; 259cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; 260cabdff1aSopenharmony_ci#if ARCH_X86_64 261cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx; 262cabdff1aSopenharmony_ci#endif 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx; 265cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx; 266cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 267cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_avx; 268cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_avx; 269cabdff1aSopenharmony_ci } else { 270cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx; 271cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_avx; 272cabdff1aSopenharmony_ci } 273cabdff1aSopenharmony_ci 274cabdff1aSopenharmony_ci c->h264_idct_add = ff_h264_idct_add_8_avx; 275cabdff1aSopenharmony_ci c->h264_idct_dc_add = ff_h264_idct_dc_add_8_avx; 276cabdff1aSopenharmony_ci } 277cabdff1aSopenharmony_ci } else if (bit_depth == 10) { 278cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 279cabdff1aSopenharmony_ci#if ARCH_X86_32 && !HAVE_ALIGNED_STACK 280cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext; 281cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext; 282cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; 283cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; 284cabdff1aSopenharmony_ci#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */ 285cabdff1aSopenharmony_ci c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext; 286cabdff1aSopenharmony_ci } 287cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 288cabdff1aSopenharmony_ci c->h264_idct_add = ff_h264_idct_add_10_sse2; 289cabdff1aSopenharmony_ci c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; 290cabdff1aSopenharmony_ci 291cabdff1aSopenharmony_ci c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; 292cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 293cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; 294cabdff1aSopenharmony_ci } else { 295cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2; 296cabdff1aSopenharmony_ci } 297cabdff1aSopenharmony_ci c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2; 298cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 299cabdff1aSopenharmony_ci c->h264_idct8_add = ff_h264_idct8_add_10_sse2; 300cabdff1aSopenharmony_ci c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; 301cabdff1aSopenharmony_ci#endif /* HAVE_ALIGNED_STACK */ 302cabdff1aSopenharmony_ci 303cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; 304cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; 305cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; 306cabdff1aSopenharmony_ci 307cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; 308cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; 309cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; 310cabdff1aSopenharmony_ci 311cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2; 312cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2; 313cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 314cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2; 315cabdff1aSopenharmony_ci } else { 316cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2; 317cabdff1aSopenharmony_ci } 318cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 319cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; 320cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; 321cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; 322cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; 323cabdff1aSopenharmony_ci#endif /* HAVE_ALIGNED_STACK */ 324cabdff1aSopenharmony_ci } 325cabdff1aSopenharmony_ci if (EXTERNAL_SSE4(cpu_flags)) { 326cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; 327cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; 328cabdff1aSopenharmony_ci c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; 331cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; 332cabdff1aSopenharmony_ci c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; 333cabdff1aSopenharmony_ci } 334cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 335cabdff1aSopenharmony_ci c->h264_idct_dc_add = 336cabdff1aSopenharmony_ci c->h264_idct_add = ff_h264_idct_add_10_avx; 337cabdff1aSopenharmony_ci c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; 338cabdff1aSopenharmony_ci 339cabdff1aSopenharmony_ci c->h264_idct_add16 = ff_h264_idct_add16_10_avx; 340cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 341cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_10_avx; 342cabdff1aSopenharmony_ci } else { 343cabdff1aSopenharmony_ci c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx; 344cabdff1aSopenharmony_ci } 345cabdff1aSopenharmony_ci c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx; 346cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 347cabdff1aSopenharmony_ci c->h264_idct8_add = ff_h264_idct8_add_10_avx; 348cabdff1aSopenharmony_ci c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; 349cabdff1aSopenharmony_ci#endif /* HAVE_ALIGNED_STACK */ 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx; 352cabdff1aSopenharmony_ci c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx; 353cabdff1aSopenharmony_ci if (chroma_format_idc <= 1) { 354cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx; 355cabdff1aSopenharmony_ci } else { 356cabdff1aSopenharmony_ci c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx; 357cabdff1aSopenharmony_ci } 358cabdff1aSopenharmony_ci#if HAVE_ALIGNED_STACK 359cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; 360cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; 361cabdff1aSopenharmony_ci c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; 362cabdff1aSopenharmony_ci c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; 363cabdff1aSopenharmony_ci#endif /* HAVE_ALIGNED_STACK */ 364cabdff1aSopenharmony_ci } 365cabdff1aSopenharmony_ci } 366cabdff1aSopenharmony_ci#endif 367cabdff1aSopenharmony_ci} 368