1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VP9 SIMD optimizations 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 24cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 25cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 26cabdff1aSopenharmony_ci#include "libavcodec/vp9dsp.h" 27cabdff1aSopenharmony_ci#include "libavcodec/x86/vp9dsp_init.h" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#if HAVE_X86ASM 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_cidecl_fpel_func(put, 4, , mmx); 32cabdff1aSopenharmony_cidecl_fpel_func(put, 8, , mmx); 33cabdff1aSopenharmony_cidecl_fpel_func(put, 16, , sse); 34cabdff1aSopenharmony_cidecl_fpel_func(put, 32, , sse); 35cabdff1aSopenharmony_cidecl_fpel_func(put, 64, , sse); 36cabdff1aSopenharmony_cidecl_fpel_func(avg, 4, _8, mmxext); 37cabdff1aSopenharmony_cidecl_fpel_func(avg, 8, _8, mmxext); 38cabdff1aSopenharmony_cidecl_fpel_func(avg, 16, _8, sse2); 39cabdff1aSopenharmony_cidecl_fpel_func(avg, 32, _8, sse2); 40cabdff1aSopenharmony_cidecl_fpel_func(avg, 64, _8, sse2); 41cabdff1aSopenharmony_cidecl_fpel_func(put, 32, , avx); 42cabdff1aSopenharmony_cidecl_fpel_func(put, 64, , avx); 43cabdff1aSopenharmony_cidecl_fpel_func(avg, 32, _8, avx2); 44cabdff1aSopenharmony_cidecl_fpel_func(avg, 64, _8, avx2); 45cabdff1aSopenharmony_ci 46cabdff1aSopenharmony_cidecl_mc_funcs(4, mmxext, int16_t, 8, 8); 47cabdff1aSopenharmony_cidecl_mc_funcs(8, sse2, int16_t, 8, 8); 48cabdff1aSopenharmony_cidecl_mc_funcs(4, ssse3, int8_t, 32, 8); 49cabdff1aSopenharmony_cidecl_mc_funcs(8, ssse3, int8_t, 32, 8); 50cabdff1aSopenharmony_ci#if ARCH_X86_64 51cabdff1aSopenharmony_cidecl_mc_funcs(16, ssse3, int8_t, 32, 8); 52cabdff1aSopenharmony_cidecl_mc_funcs(32, avx2, int8_t, 32, 8); 53cabdff1aSopenharmony_ci#endif 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_cimc_rep_funcs(16, 8, 8, sse2, int16_t, 8, 8) 56cabdff1aSopenharmony_ci#if ARCH_X86_32 57cabdff1aSopenharmony_cimc_rep_funcs(16, 8, 8, ssse3, int8_t, 32, 8) 58cabdff1aSopenharmony_ci#endif 59cabdff1aSopenharmony_cimc_rep_funcs(32, 16, 16, sse2, int16_t, 8, 8) 60cabdff1aSopenharmony_cimc_rep_funcs(32, 16, 16, ssse3, int8_t, 32, 8) 61cabdff1aSopenharmony_cimc_rep_funcs(64, 32, 32, sse2, int16_t, 8, 8) 62cabdff1aSopenharmony_cimc_rep_funcs(64, 32, 32, ssse3, int8_t, 32, 8) 63cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL 64cabdff1aSopenharmony_cimc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8) 65cabdff1aSopenharmony_ci#endif 66cabdff1aSopenharmony_ci 67cabdff1aSopenharmony_ciextern const int8_t ff_filters_ssse3[3][15][4][32]; 68cabdff1aSopenharmony_ciextern const int16_t ff_filters_sse2[3][15][8][8]; 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_cifilters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2) 71cabdff1aSopenharmony_cifilters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2) 72cabdff1aSopenharmony_cifilters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3) 73cabdff1aSopenharmony_cifilters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3) 74cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL 75cabdff1aSopenharmony_cifilters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3) 76cabdff1aSopenharmony_cifilters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3) 77cabdff1aSopenharmony_cifilters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3) 78cabdff1aSopenharmony_cifilters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3) 79cabdff1aSopenharmony_ci#endif 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_cifilters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2) 82cabdff1aSopenharmony_cifilters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2) 83cabdff1aSopenharmony_cifilters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3) 84cabdff1aSopenharmony_cifilters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3) 85cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL 86cabdff1aSopenharmony_cifilters_8tap_1d_fn2(put, 64, 8, avx2, ssse3) 87cabdff1aSopenharmony_cifilters_8tap_1d_fn2(put, 32, 8, avx2, ssse3) 88cabdff1aSopenharmony_cifilters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3) 89cabdff1aSopenharmony_cifilters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3) 90cabdff1aSopenharmony_ci#endif 91cabdff1aSopenharmony_ci 92cabdff1aSopenharmony_ci#define itxfm_func(typea, typeb, size, opt) \ 93cabdff1aSopenharmony_civoid ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \ 94cabdff1aSopenharmony_ci int16_t *block, int eob) 95cabdff1aSopenharmony_ci#define itxfm_funcs(size, opt) \ 96cabdff1aSopenharmony_ciitxfm_func(idct, idct, size, opt); \ 97cabdff1aSopenharmony_ciitxfm_func(iadst, idct, size, opt); \ 98cabdff1aSopenharmony_ciitxfm_func(idct, iadst, size, opt); \ 99cabdff1aSopenharmony_ciitxfm_func(iadst, iadst, size, opt) 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ciitxfm_func(idct, idct, 4, mmxext); 102cabdff1aSopenharmony_ciitxfm_func(idct, iadst, 4, sse2); 103cabdff1aSopenharmony_ciitxfm_func(iadst, idct, 4, sse2); 104cabdff1aSopenharmony_ciitxfm_func(iadst, iadst, 4, sse2); 105cabdff1aSopenharmony_ciitxfm_funcs(4, ssse3); 106cabdff1aSopenharmony_ciitxfm_funcs(8, sse2); 107cabdff1aSopenharmony_ciitxfm_funcs(8, ssse3); 108cabdff1aSopenharmony_ciitxfm_funcs(8, avx); 109cabdff1aSopenharmony_ciitxfm_funcs(16, sse2); 110cabdff1aSopenharmony_ciitxfm_funcs(16, ssse3); 111cabdff1aSopenharmony_ciitxfm_funcs(16, avx); 112cabdff1aSopenharmony_ciitxfm_func(idct, idct, 32, sse2); 113cabdff1aSopenharmony_ciitxfm_func(idct, idct, 32, ssse3); 114cabdff1aSopenharmony_ciitxfm_func(idct, idct, 32, avx); 115cabdff1aSopenharmony_ciitxfm_func(iwht, iwht, 4, mmx); 116cabdff1aSopenharmony_ciitxfm_funcs(16, avx2); 117cabdff1aSopenharmony_ciitxfm_func(idct, idct, 32, avx2); 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ci#undef itxfm_func 120cabdff1aSopenharmony_ci#undef itxfm_funcs 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci#define lpf_funcs(size1, size2, opt) \ 123cabdff1aSopenharmony_civoid ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ 124cabdff1aSopenharmony_ci int E, int I, int H); \ 125cabdff1aSopenharmony_civoid ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ 126cabdff1aSopenharmony_ci int E, int I, int H) 127cabdff1aSopenharmony_ci 128cabdff1aSopenharmony_cilpf_funcs(4, 8, mmxext); 129cabdff1aSopenharmony_cilpf_funcs(8, 8, mmxext); 130cabdff1aSopenharmony_cilpf_funcs(16, 16, sse2); 131cabdff1aSopenharmony_cilpf_funcs(16, 16, ssse3); 132cabdff1aSopenharmony_cilpf_funcs(16, 16, avx); 133cabdff1aSopenharmony_cilpf_funcs(44, 16, sse2); 134cabdff1aSopenharmony_cilpf_funcs(44, 16, ssse3); 135cabdff1aSopenharmony_cilpf_funcs(44, 16, avx); 136cabdff1aSopenharmony_cilpf_funcs(84, 16, sse2); 137cabdff1aSopenharmony_cilpf_funcs(84, 16, ssse3); 138cabdff1aSopenharmony_cilpf_funcs(84, 16, avx); 139cabdff1aSopenharmony_cilpf_funcs(48, 16, sse2); 140cabdff1aSopenharmony_cilpf_funcs(48, 16, ssse3); 141cabdff1aSopenharmony_cilpf_funcs(48, 16, avx); 142cabdff1aSopenharmony_cilpf_funcs(88, 16, sse2); 143cabdff1aSopenharmony_cilpf_funcs(88, 16, ssse3); 144cabdff1aSopenharmony_cilpf_funcs(88, 16, avx); 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci#undef lpf_funcs 147cabdff1aSopenharmony_ci 148cabdff1aSopenharmony_ci#define ipred_func(size, type, opt) \ 149cabdff1aSopenharmony_civoid ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \ 150cabdff1aSopenharmony_ci const uint8_t *l, const uint8_t *a) 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_ciipred_func(8, v, mmx); 153cabdff1aSopenharmony_ci 154cabdff1aSopenharmony_ci#define ipred_dc_funcs(size, opt) \ 155cabdff1aSopenharmony_ciipred_func(size, dc, opt); \ 156cabdff1aSopenharmony_ciipred_func(size, dc_left, opt); \ 157cabdff1aSopenharmony_ciipred_func(size, dc_top, opt) 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ciipred_dc_funcs(4, mmxext); 160cabdff1aSopenharmony_ciipred_dc_funcs(8, mmxext); 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci#define ipred_dir_tm_funcs(size, opt) \ 163cabdff1aSopenharmony_ciipred_func(size, tm, opt); \ 164cabdff1aSopenharmony_ciipred_func(size, dl, opt); \ 165cabdff1aSopenharmony_ciipred_func(size, dr, opt); \ 166cabdff1aSopenharmony_ciipred_func(size, hd, opt); \ 167cabdff1aSopenharmony_ciipred_func(size, hu, opt); \ 168cabdff1aSopenharmony_ciipred_func(size, vl, opt); \ 169cabdff1aSopenharmony_ciipred_func(size, vr, opt) 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ciipred_dir_tm_funcs(4, mmxext); 172cabdff1aSopenharmony_ci 173cabdff1aSopenharmony_ciipred_func(16, v, sse); 174cabdff1aSopenharmony_ciipred_func(32, v, sse); 175cabdff1aSopenharmony_ci 176cabdff1aSopenharmony_ciipred_dc_funcs(16, sse2); 177cabdff1aSopenharmony_ciipred_dc_funcs(32, sse2); 178cabdff1aSopenharmony_ci 179cabdff1aSopenharmony_ci#define ipred_dir_tm_h_funcs(size, opt) \ 180cabdff1aSopenharmony_ciipred_dir_tm_funcs(size, opt); \ 181cabdff1aSopenharmony_ciipred_func(size, h, opt) 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(8, sse2); 184cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(16, sse2); 185cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(32, sse2); 186cabdff1aSopenharmony_ci 187cabdff1aSopenharmony_ciipred_func(4, h, sse2); 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_ci#define ipred_all_funcs(size, opt) \ 190cabdff1aSopenharmony_ciipred_dc_funcs(size, opt); \ 191cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(size, opt) 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci// FIXME hd/vl_4x4_ssse3 does not exist 194cabdff1aSopenharmony_ciipred_all_funcs(4, ssse3); 195cabdff1aSopenharmony_ciipred_all_funcs(8, ssse3); 196cabdff1aSopenharmony_ciipred_all_funcs(16, ssse3); 197cabdff1aSopenharmony_ciipred_all_funcs(32, ssse3); 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(8, avx); 200cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(16, avx); 201cabdff1aSopenharmony_ciipred_dir_tm_h_funcs(32, avx); 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ciipred_func(32, v, avx); 204cabdff1aSopenharmony_ci 205cabdff1aSopenharmony_ciipred_dc_funcs(32, avx2); 206cabdff1aSopenharmony_ciipred_func(32, h, avx2); 207cabdff1aSopenharmony_ciipred_func(32, tm, avx2); 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_ci#undef ipred_func 210cabdff1aSopenharmony_ci#undef ipred_dir_tm_h_funcs 211cabdff1aSopenharmony_ci#undef ipred_dir_tm_funcs 212cabdff1aSopenharmony_ci#undef ipred_dc_funcs 213cabdff1aSopenharmony_ci 214cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 215cabdff1aSopenharmony_ci 216cabdff1aSopenharmony_ciav_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) 217cabdff1aSopenharmony_ci{ 218cabdff1aSopenharmony_ci#if HAVE_X86ASM 219cabdff1aSopenharmony_ci int cpu_flags; 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_ci if (bpp == 10) { 222cabdff1aSopenharmony_ci ff_vp9dsp_init_10bpp_x86(dsp, bitexact); 223cabdff1aSopenharmony_ci return; 224cabdff1aSopenharmony_ci } else if (bpp == 12) { 225cabdff1aSopenharmony_ci ff_vp9dsp_init_12bpp_x86(dsp, bitexact); 226cabdff1aSopenharmony_ci return; 227cabdff1aSopenharmony_ci } 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci cpu_flags = av_get_cpu_flags(); 230cabdff1aSopenharmony_ci 231cabdff1aSopenharmony_ci#define init_lpf(opt) do { \ 232cabdff1aSopenharmony_ci dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \ 233cabdff1aSopenharmony_ci dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ 234cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ 235cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ 236cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ 237cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ 238cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ 239cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ 240cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ 241cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ 242cabdff1aSopenharmony_ci} while (0) 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci#define init_ipred(sz, opt, t, e) \ 245cabdff1aSopenharmony_ci dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt 246cabdff1aSopenharmony_ci 247cabdff1aSopenharmony_ci#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext 248cabdff1aSopenharmony_ci#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext 249cabdff1aSopenharmony_ci#define init_dir_tm_ipred(sz, opt) do { \ 250cabdff1aSopenharmony_ci init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \ 251cabdff1aSopenharmony_ci init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \ 252cabdff1aSopenharmony_ci init_ipred(sz, opt, hd, HOR_DOWN); \ 253cabdff1aSopenharmony_ci init_ipred(sz, opt, vl, VERT_LEFT); \ 254cabdff1aSopenharmony_ci init_ipred(sz, opt, hu, HOR_UP); \ 255cabdff1aSopenharmony_ci init_ipred(sz, opt, tm, TM_VP8); \ 256cabdff1aSopenharmony_ci init_ipred(sz, opt, vr, VERT_RIGHT); \ 257cabdff1aSopenharmony_ci} while (0) 258cabdff1aSopenharmony_ci#define init_dir_tm_h_ipred(sz, opt) do { \ 259cabdff1aSopenharmony_ci init_dir_tm_ipred(sz, opt); \ 260cabdff1aSopenharmony_ci init_ipred(sz, opt, h, HOR); \ 261cabdff1aSopenharmony_ci} while (0) 262cabdff1aSopenharmony_ci#define init_dc_ipred(sz, opt) do { \ 263cabdff1aSopenharmony_ci init_ipred(sz, opt, dc, DC); \ 264cabdff1aSopenharmony_ci init_ipred(sz, opt, dc_left, LEFT_DC); \ 265cabdff1aSopenharmony_ci init_ipred(sz, opt, dc_top, TOP_DC); \ 266cabdff1aSopenharmony_ci} while (0) 267cabdff1aSopenharmony_ci#define init_all_ipred(sz, opt) do { \ 268cabdff1aSopenharmony_ci init_dc_ipred(sz, opt); \ 269cabdff1aSopenharmony_ci init_dir_tm_h_ipred(sz, opt); \ 270cabdff1aSopenharmony_ci} while (0) 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci if (EXTERNAL_MMX(cpu_flags)) { 273cabdff1aSopenharmony_ci init_fpel_func(4, 0, 4, put, , mmx); 274cabdff1aSopenharmony_ci init_fpel_func(3, 0, 8, put, , mmx); 275cabdff1aSopenharmony_ci if (!bitexact) { 276cabdff1aSopenharmony_ci dsp->itxfm_add[4 /* lossless */][DCT_DCT] = 277cabdff1aSopenharmony_ci dsp->itxfm_add[4 /* lossless */][ADST_DCT] = 278cabdff1aSopenharmony_ci dsp->itxfm_add[4 /* lossless */][DCT_ADST] = 279cabdff1aSopenharmony_ci dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; 280cabdff1aSopenharmony_ci } 281cabdff1aSopenharmony_ci init_ipred(8, mmx, v, VERT); 282cabdff1aSopenharmony_ci } 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 285cabdff1aSopenharmony_ci dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext; 286cabdff1aSopenharmony_ci dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext; 287cabdff1aSopenharmony_ci dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext; 288cabdff1aSopenharmony_ci dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; 289cabdff1aSopenharmony_ci init_subpel2(4, 0, 4, put, 8, mmxext); 290cabdff1aSopenharmony_ci init_subpel2(4, 1, 4, avg, 8, mmxext); 291cabdff1aSopenharmony_ci init_fpel_func(4, 1, 4, avg, _8, mmxext); 292cabdff1aSopenharmony_ci init_fpel_func(3, 1, 8, avg, _8, mmxext); 293cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; 294cabdff1aSopenharmony_ci init_dc_ipred(4, mmxext); 295cabdff1aSopenharmony_ci init_dc_ipred(8, mmxext); 296cabdff1aSopenharmony_ci init_dir_tm_ipred(4, mmxext); 297cabdff1aSopenharmony_ci } 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_ci if (EXTERNAL_SSE(cpu_flags)) { 300cabdff1aSopenharmony_ci init_fpel_func(2, 0, 16, put, , sse); 301cabdff1aSopenharmony_ci init_fpel_func(1, 0, 32, put, , sse); 302cabdff1aSopenharmony_ci init_fpel_func(0, 0, 64, put, , sse); 303cabdff1aSopenharmony_ci init_ipred(16, sse, v, VERT); 304cabdff1aSopenharmony_ci init_ipred(32, sse, v, VERT); 305cabdff1aSopenharmony_ci } 306cabdff1aSopenharmony_ci 307cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 308cabdff1aSopenharmony_ci init_subpel3_8to64(0, put, 8, sse2); 309cabdff1aSopenharmony_ci init_subpel3_8to64(1, avg, 8, sse2); 310cabdff1aSopenharmony_ci init_fpel_func(2, 1, 16, avg, _8, sse2); 311cabdff1aSopenharmony_ci init_fpel_func(1, 1, 32, avg, _8, sse2); 312cabdff1aSopenharmony_ci init_fpel_func(0, 1, 64, avg, _8, sse2); 313cabdff1aSopenharmony_ci init_lpf(sse2); 314cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2; 315cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2; 316cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2; 317cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2; 318cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2; 319cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2; 320cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2; 321cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2; 322cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2; 323cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2; 324cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2; 325cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_ADST] = 326cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_DCT] = 327cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_ADST] = 328cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2; 329cabdff1aSopenharmony_ci init_dc_ipred(16, sse2); 330cabdff1aSopenharmony_ci init_dc_ipred(32, sse2); 331cabdff1aSopenharmony_ci init_dir_tm_h_ipred(8, sse2); 332cabdff1aSopenharmony_ci init_dir_tm_h_ipred(16, sse2); 333cabdff1aSopenharmony_ci init_dir_tm_h_ipred(32, sse2); 334cabdff1aSopenharmony_ci init_ipred(4, sse2, h, HOR); 335cabdff1aSopenharmony_ci } 336cabdff1aSopenharmony_ci 337cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 338cabdff1aSopenharmony_ci init_subpel3(0, put, 8, ssse3); 339cabdff1aSopenharmony_ci init_subpel3(1, avg, 8, ssse3); 340cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3; 341cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3; 342cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3; 343cabdff1aSopenharmony_ci dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3; 344cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; 345cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; 346cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; 347cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; 348cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; 349cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; 350cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; 351cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; 352cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_ADST] = 353cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_DCT] = 354cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_ADST] = 355cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; 356cabdff1aSopenharmony_ci init_lpf(ssse3); 357cabdff1aSopenharmony_ci init_all_ipred(4, ssse3); 358cabdff1aSopenharmony_ci init_all_ipred(8, ssse3); 359cabdff1aSopenharmony_ci init_all_ipred(16, ssse3); 360cabdff1aSopenharmony_ci init_all_ipred(32, ssse3); 361cabdff1aSopenharmony_ci } 362cabdff1aSopenharmony_ci 363cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 364cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; 365cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; 366cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; 367cabdff1aSopenharmony_ci dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; 368cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; 369cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; 370cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; 371cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; 372cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_ADST] = 373cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_DCT] = 374cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_ADST] = 375cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; 376cabdff1aSopenharmony_ci init_lpf(avx); 377cabdff1aSopenharmony_ci init_dir_tm_h_ipred(8, avx); 378cabdff1aSopenharmony_ci init_dir_tm_h_ipred(16, avx); 379cabdff1aSopenharmony_ci init_dir_tm_h_ipred(32, avx); 380cabdff1aSopenharmony_ci } 381cabdff1aSopenharmony_ci if (EXTERNAL_AVX_FAST(cpu_flags)) { 382cabdff1aSopenharmony_ci init_fpel_func(1, 0, 32, put, , avx); 383cabdff1aSopenharmony_ci init_fpel_func(0, 0, 64, put, , avx); 384cabdff1aSopenharmony_ci init_ipred(32, avx, v, VERT); 385cabdff1aSopenharmony_ci } 386cabdff1aSopenharmony_ci 387cabdff1aSopenharmony_ci if (EXTERNAL_AVX2_FAST(cpu_flags)) { 388cabdff1aSopenharmony_ci init_fpel_func(1, 1, 32, avg, _8, avx2); 389cabdff1aSopenharmony_ci init_fpel_func(0, 1, 64, avg, _8, avx2); 390cabdff1aSopenharmony_ci if (ARCH_X86_64) { 391cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL 392cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2; 393cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx2; 394cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx2; 395cabdff1aSopenharmony_ci dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx2; 396cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_ADST] = 397cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][ADST_DCT] = 398cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_ADST] = 399cabdff1aSopenharmony_ci dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2; 400cabdff1aSopenharmony_ci init_subpel3_32_64(0, put, 8, avx2); 401cabdff1aSopenharmony_ci init_subpel3_32_64(1, avg, 8, avx2); 402cabdff1aSopenharmony_ci#endif 403cabdff1aSopenharmony_ci } 404cabdff1aSopenharmony_ci init_dc_ipred(32, avx2); 405cabdff1aSopenharmony_ci init_ipred(32, avx2, h, HOR); 406cabdff1aSopenharmony_ci init_ipred(32, avx2, tm, TM_VP8); 407cabdff1aSopenharmony_ci } 408cabdff1aSopenharmony_ci 409cabdff1aSopenharmony_ci#undef init_fpel 410cabdff1aSopenharmony_ci#undef init_subpel1 411cabdff1aSopenharmony_ci#undef init_subpel2 412cabdff1aSopenharmony_ci#undef init_subpel3 413cabdff1aSopenharmony_ci 414cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 415cabdff1aSopenharmony_ci} 416