1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2013 Seppo Tomperi 3cabdff1aSopenharmony_ci * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "config.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 26cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 27cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 28cabdff1aSopenharmony_ci#include "libavcodec/hevcdsp.h" 29cabdff1aSopenharmony_ci#include "libavcodec/x86/hevcdsp.h" 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci#define LFC_FUNC(DIR, DEPTH, OPT) \ 32cabdff1aSopenharmony_civoid ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q); 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci#define LFL_FUNC(DIR, DEPTH, OPT) \ 35cabdff1aSopenharmony_civoid ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q); 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci#define LFC_FUNCS(type, depth, opt) \ 38cabdff1aSopenharmony_ci LFC_FUNC(h, depth, opt) \ 39cabdff1aSopenharmony_ci LFC_FUNC(v, depth, opt) 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci#define LFL_FUNCS(type, depth, opt) \ 42cabdff1aSopenharmony_ci LFL_FUNC(h, depth, opt) \ 43cabdff1aSopenharmony_ci LFL_FUNC(v, depth, opt) 44cabdff1aSopenharmony_ci 45cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 8, sse2) 46cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 10, sse2) 47cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 12, sse2) 48cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 8, avx) 49cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 10, avx) 50cabdff1aSopenharmony_ciLFC_FUNCS(uint8_t, 12, avx) 51cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 8, sse2) 52cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 10, sse2) 53cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 12, sse2) 54cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 8, ssse3) 55cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 10, ssse3) 56cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 12, ssse3) 57cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 8, avx) 58cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 10, avx) 59cabdff1aSopenharmony_ciLFL_FUNCS(uint8_t, 12, avx) 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci#define IDCT_DC_FUNCS(W, opt) \ 62cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ 63cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ 64cabdff1aSopenharmony_civoid ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ciIDCT_DC_FUNCS(4x4, mmxext); 67cabdff1aSopenharmony_ciIDCT_DC_FUNCS(8x8, sse2); 68cabdff1aSopenharmony_ciIDCT_DC_FUNCS(16x16, sse2); 69cabdff1aSopenharmony_ciIDCT_DC_FUNCS(32x32, sse2); 70cabdff1aSopenharmony_ciIDCT_DC_FUNCS(16x16, avx2); 71cabdff1aSopenharmony_ciIDCT_DC_FUNCS(32x32, avx2); 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci#define IDCT_FUNCS(opt) \ 74cabdff1aSopenharmony_civoid ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \ 75cabdff1aSopenharmony_civoid ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \ 76cabdff1aSopenharmony_civoid ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \ 77cabdff1aSopenharmony_civoid ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \ 78cabdff1aSopenharmony_civoid ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \ 79cabdff1aSopenharmony_civoid ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ 80cabdff1aSopenharmony_civoid ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \ 81cabdff1aSopenharmony_civoid ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_ciIDCT_FUNCS(sse2) 84cabdff1aSopenharmony_ciIDCT_FUNCS(avx) 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_ci#define mc_rep_func(name, bitd, step, W, opt) \ 87cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \ 88cabdff1aSopenharmony_ci uint8_t *_src, ptrdiff_t _srcstride, int height, \ 89cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 90cabdff1aSopenharmony_ci{ \ 91cabdff1aSopenharmony_ci int i; \ 92cabdff1aSopenharmony_ci uint8_t *src; \ 93cabdff1aSopenharmony_ci int16_t *dst; \ 94cabdff1aSopenharmony_ci for (i = 0; i < W; i += step) { \ 95cabdff1aSopenharmony_ci src = _src + (i * ((bitd + 7) / 8)); \ 96cabdff1aSopenharmony_ci dst = _dst + i; \ 97cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ 98cabdff1aSopenharmony_ci } \ 99cabdff1aSopenharmony_ci} 100cabdff1aSopenharmony_ci#define mc_rep_uni_func(name, bitd, step, W, opt) \ 101cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ 102cabdff1aSopenharmony_ci uint8_t *_src, ptrdiff_t _srcstride, int height, \ 103cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 104cabdff1aSopenharmony_ci{ \ 105cabdff1aSopenharmony_ci int i; \ 106cabdff1aSopenharmony_ci uint8_t *src; \ 107cabdff1aSopenharmony_ci uint8_t *dst; \ 108cabdff1aSopenharmony_ci for (i = 0; i < W; i += step) { \ 109cabdff1aSopenharmony_ci src = _src + (i * ((bitd + 7) / 8)); \ 110cabdff1aSopenharmony_ci dst = _dst + (i * ((bitd + 7) / 8)); \ 111cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ 112cabdff1aSopenharmony_ci height, mx, my, width); \ 113cabdff1aSopenharmony_ci } \ 114cabdff1aSopenharmony_ci} 115cabdff1aSopenharmony_ci#define mc_rep_bi_func(name, bitd, step, W, opt) \ 116cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src, \ 117cabdff1aSopenharmony_ci ptrdiff_t _srcstride, int16_t* _src2, \ 118cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 119cabdff1aSopenharmony_ci{ \ 120cabdff1aSopenharmony_ci int i; \ 121cabdff1aSopenharmony_ci uint8_t *src; \ 122cabdff1aSopenharmony_ci uint8_t *dst; \ 123cabdff1aSopenharmony_ci int16_t *src2; \ 124cabdff1aSopenharmony_ci for (i = 0; i < W ; i += step) { \ 125cabdff1aSopenharmony_ci src = _src + (i * ((bitd + 7) / 8)); \ 126cabdff1aSopenharmony_ci dst = _dst + (i * ((bitd + 7) / 8)); \ 127cabdff1aSopenharmony_ci src2 = _src2 + i; \ 128cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ 129cabdff1aSopenharmony_ci height, mx, my, width); \ 130cabdff1aSopenharmony_ci } \ 131cabdff1aSopenharmony_ci} 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci#define mc_rep_funcs(name, bitd, step, W, opt) \ 134cabdff1aSopenharmony_ci mc_rep_func(name, bitd, step, W, opt) \ 135cabdff1aSopenharmony_ci mc_rep_uni_func(name, bitd, step, W, opt) \ 136cabdff1aSopenharmony_ci mc_rep_bi_func(name, bitd, step, W, opt) 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci#define mc_rep_func2(name, bitd, step1, step2, W, opt) \ 139cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \ 140cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t _srcstride, int height, \ 141cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 142cabdff1aSopenharmony_ci{ \ 143cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ 144cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ 145cabdff1aSopenharmony_ci _srcstride, height, mx, my, width); \ 146cabdff1aSopenharmony_ci} 147cabdff1aSopenharmony_ci#define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ 148cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ 149cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t _srcstride, int height, \ 150cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 151cabdff1aSopenharmony_ci{ \ 152cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\ 153cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ 154cabdff1aSopenharmony_ci src + (step1 * ((bitd + 7) / 8)), _srcstride, \ 155cabdff1aSopenharmony_ci height, mx, my, width); \ 156cabdff1aSopenharmony_ci} 157cabdff1aSopenharmony_ci#define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ 158cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ 159cabdff1aSopenharmony_ci ptrdiff_t _srcstride, int16_t* src2, \ 160cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 161cabdff1aSopenharmony_ci{ \ 162cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ 163cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ 164cabdff1aSopenharmony_ci src + (step1 * ((bitd + 7) / 8)), _srcstride, \ 165cabdff1aSopenharmony_ci src2 + step1, height, mx, my, width); \ 166cabdff1aSopenharmony_ci} 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci#define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ 169cabdff1aSopenharmony_ci mc_rep_func2(name, bitd, step1, step2, W, opt) \ 170cabdff1aSopenharmony_ci mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ 171cabdff1aSopenharmony_ci mc_rep_bi_func2(name, bitd, step1, step2, W, opt) 172cabdff1aSopenharmony_ci 173cabdff1aSopenharmony_ci#if ARCH_X86_64 && HAVE_SSE4_EXTERNAL 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci#define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ 176cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \ 177cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 178cabdff1aSopenharmony_ci \ 179cabdff1aSopenharmony_ci{ \ 180cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ 181cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ 182cabdff1aSopenharmony_ci} 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci#define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ 185cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ 186cabdff1aSopenharmony_ci ptrdiff_t _srcstride, int16_t *src2, \ 187cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 188cabdff1aSopenharmony_ci{ \ 189cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ 190cabdff1aSopenharmony_ci height, mx, my, width); \ 191cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\ 192cabdff1aSopenharmony_ci height, mx, my, width); \ 193cabdff1aSopenharmony_ci} 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_ci#define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ 196cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ 197cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t _srcstride, int height, \ 198cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 199cabdff1aSopenharmony_ci{ \ 200cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ 201cabdff1aSopenharmony_ci height, mx, my, width); \ 202cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ 203cabdff1aSopenharmony_ci height, mx, my, width); \ 204cabdff1aSopenharmony_ci} 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci#define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ 207cabdff1aSopenharmony_cimc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ 208cabdff1aSopenharmony_cimc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ 209cabdff1aSopenharmony_cimc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci#define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ 212cabdff1aSopenharmony_civoid ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \ 213cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 214cabdff1aSopenharmony_ci \ 215cabdff1aSopenharmony_ci{ \ 216cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ 217cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ 218cabdff1aSopenharmony_ci} 219cabdff1aSopenharmony_ci 220cabdff1aSopenharmony_ci#define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ 221cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ 222cabdff1aSopenharmony_ci ptrdiff_t _srcstride, int16_t* src2, \ 223cabdff1aSopenharmony_ci int height, intptr_t mx, intptr_t my, int width) \ 224cabdff1aSopenharmony_ci{ \ 225cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ 226cabdff1aSopenharmony_ci src2, height, mx, my, width); \ 227cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ 228cabdff1aSopenharmony_ci src2+width2, height, mx, my, width); \ 229cabdff1aSopenharmony_ci} 230cabdff1aSopenharmony_ci 231cabdff1aSopenharmony_ci#define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ 232cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ 233cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t _srcstride, int height, \ 234cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 235cabdff1aSopenharmony_ci{ \ 236cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ 237cabdff1aSopenharmony_ci height, mx, my, width); \ 238cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ 239cabdff1aSopenharmony_ci height, mx, my, width); \ 240cabdff1aSopenharmony_ci} 241cabdff1aSopenharmony_ci 242cabdff1aSopenharmony_ci#define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ 243cabdff1aSopenharmony_cimc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ 244cabdff1aSopenharmony_cimc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ 245cabdff1aSopenharmony_cimc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) 246cabdff1aSopenharmony_ci 247cabdff1aSopenharmony_ci#if HAVE_AVX2_EXTERNAL 248cabdff1aSopenharmony_ci 249cabdff1aSopenharmony_cimc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4) 250cabdff1aSopenharmony_cimc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4) 251cabdff1aSopenharmony_cimc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4) 252cabdff1aSopenharmony_cimc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4) 253cabdff1aSopenharmony_ci 254cabdff1aSopenharmony_cimc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32) 255cabdff1aSopenharmony_cimc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32) 256cabdff1aSopenharmony_cimc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32) 257cabdff1aSopenharmony_cimc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32) 258cabdff1aSopenharmony_cimc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32) 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32) 262cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32) 263cabdff1aSopenharmony_cimc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32) 264cabdff1aSopenharmony_ci 265cabdff1aSopenharmony_ci 266cabdff1aSopenharmony_cimc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit 267cabdff1aSopenharmony_cimc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 32, 64, avx2) 270cabdff1aSopenharmony_ci 271cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 16, 32, avx2) 272cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 16, 48, avx2) 273cabdff1aSopenharmony_cimc_rep_func(pel_pixels, 10, 32, 64, avx2) 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 16, 32, avx2) 276cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 16, 48, avx2) 277cabdff1aSopenharmony_cimc_rep_bi_func(pel_pixels, 10, 32, 64, avx2) 278cabdff1aSopenharmony_ci 279cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 32, 64, avx2) 280cabdff1aSopenharmony_ci 281cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 32, 64, avx2) 282cabdff1aSopenharmony_ci 283cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 16, 32, avx2) 284cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 16, 48, avx2) 285cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 10, 32, 64, avx2) 286cabdff1aSopenharmony_ci 287cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 16, 32, avx2) 288cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 16, 48, avx2) 289cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 10, 32, 64, avx2) 290cabdff1aSopenharmony_ci 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 32, 64, avx2) 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 16, 32, avx2) 295cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 16, 48, avx2) 296cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 10, 32, 64, avx2) 297cabdff1aSopenharmony_ci 298cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 32, 64, avx2) 299cabdff1aSopenharmony_cimc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4) 300cabdff1aSopenharmony_ci 301cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 32, 64, avx2) 302cabdff1aSopenharmony_cimc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4) 303cabdff1aSopenharmony_ci 304cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 16, 32, avx2) 305cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 16, 48, avx2) 306cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 10, 32, 64, avx2) 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 16, 32, avx2) 309cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 16, 48, avx2) 310cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 10, 32, 64, avx2) 311cabdff1aSopenharmony_ci 312cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 16, 32, avx2) 313cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 16, 48, avx2) 314cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 10, 32, 64, avx2) 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci#endif //AVX2 317cabdff1aSopenharmony_ci 318cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 64, sse4) 319cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 48, sse4) 320cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 16, 32, sse4) 321cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels, 8, 8, 24, sse4) 322cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 8, 64, sse4) 323cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 8, 48, sse4) 324cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 8, 32, sse4) 325cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 8, 24, sse4) 326cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 8, 16, sse4) 327cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,10, 4, 12, sse4) 328cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 8, 64, sse4) 329cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 8, 48, sse4) 330cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 8, 32, sse4) 331cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 8, 24, sse4) 332cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 8, 16, sse4) 333cabdff1aSopenharmony_cimc_rep_funcs(pel_pixels,12, 4, 12, sse4) 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 64, sse4) 336cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 48, sse4) 337cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 16, 32, sse4) 338cabdff1aSopenharmony_cimc_rep_funcs(epel_h, 8, 8, 24, sse4) 339cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 8, 64, sse4) 340cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 8, 48, sse4) 341cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 8, 32, sse4) 342cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 8, 24, sse4) 343cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 8, 16, sse4) 344cabdff1aSopenharmony_cimc_rep_funcs(epel_h,10, 4, 12, sse4) 345cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 8, 64, sse4) 346cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 8, 48, sse4) 347cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 8, 32, sse4) 348cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 8, 24, sse4) 349cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 8, 16, sse4) 350cabdff1aSopenharmony_cimc_rep_funcs(epel_h,12, 4, 12, sse4) 351cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 64, sse4) 352cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 48, sse4) 353cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 16, 32, sse4) 354cabdff1aSopenharmony_cimc_rep_funcs(epel_v, 8, 8, 24, sse4) 355cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 8, 64, sse4) 356cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 8, 48, sse4) 357cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 8, 32, sse4) 358cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 8, 24, sse4) 359cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 8, 16, sse4) 360cabdff1aSopenharmony_cimc_rep_funcs(epel_v,10, 4, 12, sse4) 361cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 8, 64, sse4) 362cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 8, 48, sse4) 363cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 8, 32, sse4) 364cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 8, 24, sse4) 365cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 8, 16, sse4) 366cabdff1aSopenharmony_cimc_rep_funcs(epel_v,12, 4, 12, sse4) 367cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 64, sse4) 368cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 48, sse4) 369cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 16, 32, sse4) 370cabdff1aSopenharmony_cimc_rep_funcs(epel_hv, 8, 8, 24, sse4) 371cabdff1aSopenharmony_cimc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4) 372cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 8, 64, sse4) 373cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 8, 48, sse4) 374cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 8, 32, sse4) 375cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 8, 24, sse4) 376cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 8, 16, sse4) 377cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,10, 4, 12, sse4) 378cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 8, 64, sse4) 379cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 8, 48, sse4) 380cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 8, 32, sse4) 381cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 8, 24, sse4) 382cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 8, 16, sse4) 383cabdff1aSopenharmony_cimc_rep_funcs(epel_hv,12, 4, 12, sse4) 384cabdff1aSopenharmony_ci 385cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 64, sse4) 386cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 48, sse4) 387cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 16, 32, sse4) 388cabdff1aSopenharmony_cimc_rep_funcs(qpel_h, 8, 8, 24, sse4) 389cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 8, 64, sse4) 390cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 8, 48, sse4) 391cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 8, 32, sse4) 392cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 8, 24, sse4) 393cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 8, 16, sse4) 394cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,10, 4, 12, sse4) 395cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 8, 64, sse4) 396cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 8, 48, sse4) 397cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 8, 32, sse4) 398cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 8, 24, sse4) 399cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 8, 16, sse4) 400cabdff1aSopenharmony_cimc_rep_funcs(qpel_h,12, 4, 12, sse4) 401cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 64, sse4) 402cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 48, sse4) 403cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 16, 32, sse4) 404cabdff1aSopenharmony_cimc_rep_funcs(qpel_v, 8, 8, 24, sse4) 405cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 8, 64, sse4) 406cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 8, 48, sse4) 407cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 8, 32, sse4) 408cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 8, 24, sse4) 409cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 8, 16, sse4) 410cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,10, 4, 12, sse4) 411cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 8, 64, sse4) 412cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 8, 48, sse4) 413cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 8, 32, sse4) 414cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 8, 24, sse4) 415cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 8, 16, sse4) 416cabdff1aSopenharmony_cimc_rep_funcs(qpel_v,12, 4, 12, sse4) 417cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8, 8, 64, sse4) 418cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8, 8, 48, sse4) 419cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8, 8, 32, sse4) 420cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8, 8, 24, sse4) 421cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv, 8, 8, 16, sse4) 422cabdff1aSopenharmony_cimc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4) 423cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 8, 64, sse4) 424cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 8, 48, sse4) 425cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 8, 32, sse4) 426cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 8, 24, sse4) 427cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 8, 16, sse4) 428cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,10, 4, 12, sse4) 429cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 8, 64, sse4) 430cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 8, 48, sse4) 431cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 8, 32, sse4) 432cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 8, 24, sse4) 433cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 8, 16, sse4) 434cabdff1aSopenharmony_cimc_rep_funcs(qpel_hv,12, 4, 12, sse4) 435cabdff1aSopenharmony_ci 436cabdff1aSopenharmony_ci#define mc_rep_uni_w(bitd, step, W, opt) \ 437cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \ 438cabdff1aSopenharmony_ci int height, int denom, int _wx, int _ox) \ 439cabdff1aSopenharmony_ci{ \ 440cabdff1aSopenharmony_ci int i; \ 441cabdff1aSopenharmony_ci int16_t *src; \ 442cabdff1aSopenharmony_ci uint8_t *dst; \ 443cabdff1aSopenharmony_ci for (i = 0; i < W; i += step) { \ 444cabdff1aSopenharmony_ci src= _src + i; \ 445cabdff1aSopenharmony_ci dst= _dst + (i * ((bitd + 7) / 8)); \ 446cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ 447cabdff1aSopenharmony_ci height, denom, _wx, _ox); \ 448cabdff1aSopenharmony_ci } \ 449cabdff1aSopenharmony_ci} 450cabdff1aSopenharmony_ci 451cabdff1aSopenharmony_cimc_rep_uni_w(8, 6, 12, sse4) 452cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 16, sse4) 453cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 24, sse4) 454cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 32, sse4) 455cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 48, sse4) 456cabdff1aSopenharmony_cimc_rep_uni_w(8, 8, 64, sse4) 457cabdff1aSopenharmony_ci 458cabdff1aSopenharmony_cimc_rep_uni_w(10, 6, 12, sse4) 459cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 16, sse4) 460cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 24, sse4) 461cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 32, sse4) 462cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 48, sse4) 463cabdff1aSopenharmony_cimc_rep_uni_w(10, 8, 64, sse4) 464cabdff1aSopenharmony_ci 465cabdff1aSopenharmony_cimc_rep_uni_w(12, 6, 12, sse4) 466cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 16, sse4) 467cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 24, sse4) 468cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 32, sse4) 469cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 48, sse4) 470cabdff1aSopenharmony_cimc_rep_uni_w(12, 8, 64, sse4) 471cabdff1aSopenharmony_ci 472cabdff1aSopenharmony_ci#define mc_rep_bi_w(bitd, step, W, opt) \ 473cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \ 474cabdff1aSopenharmony_ci int16_t *_src2, int height, \ 475cabdff1aSopenharmony_ci int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ 476cabdff1aSopenharmony_ci{ \ 477cabdff1aSopenharmony_ci int i; \ 478cabdff1aSopenharmony_ci int16_t *src; \ 479cabdff1aSopenharmony_ci int16_t *src2; \ 480cabdff1aSopenharmony_ci uint8_t *dst; \ 481cabdff1aSopenharmony_ci for (i = 0; i < W; i += step) { \ 482cabdff1aSopenharmony_ci src = _src + i; \ 483cabdff1aSopenharmony_ci src2 = _src2 + i; \ 484cabdff1aSopenharmony_ci dst = _dst + (i * ((bitd + 7) / 8)); \ 485cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ 486cabdff1aSopenharmony_ci height, denom, _wx0, _wx1, _ox0, _ox1); \ 487cabdff1aSopenharmony_ci } \ 488cabdff1aSopenharmony_ci} 489cabdff1aSopenharmony_ci 490cabdff1aSopenharmony_cimc_rep_bi_w(8, 6, 12, sse4) 491cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 16, sse4) 492cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 24, sse4) 493cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 32, sse4) 494cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 48, sse4) 495cabdff1aSopenharmony_cimc_rep_bi_w(8, 8, 64, sse4) 496cabdff1aSopenharmony_ci 497cabdff1aSopenharmony_cimc_rep_bi_w(10, 6, 12, sse4) 498cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 16, sse4) 499cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 24, sse4) 500cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 32, sse4) 501cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 48, sse4) 502cabdff1aSopenharmony_cimc_rep_bi_w(10, 8, 64, sse4) 503cabdff1aSopenharmony_ci 504cabdff1aSopenharmony_cimc_rep_bi_w(12, 6, 12, sse4) 505cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 16, sse4) 506cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 24, sse4) 507cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 32, sse4) 508cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 48, sse4) 509cabdff1aSopenharmony_cimc_rep_bi_w(12, 8, 64, sse4) 510cabdff1aSopenharmony_ci 511cabdff1aSopenharmony_ci#define mc_uni_w_func(name, bitd, W, opt) \ 512cabdff1aSopenharmony_civoid ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ 513cabdff1aSopenharmony_ci uint8_t *_src, ptrdiff_t _srcstride, \ 514cabdff1aSopenharmony_ci int height, int denom, \ 515cabdff1aSopenharmony_ci int _wx, int _ox, \ 516cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 517cabdff1aSopenharmony_ci{ \ 518cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ 519cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ 520cabdff1aSopenharmony_ci ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\ 521cabdff1aSopenharmony_ci} 522cabdff1aSopenharmony_ci 523cabdff1aSopenharmony_ci#define mc_uni_w_funcs(name, bitd, opt) \ 524cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 4, opt) \ 525cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 8, opt) \ 526cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 12, opt) \ 527cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 16, opt) \ 528cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 24, opt) \ 529cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 32, opt) \ 530cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 48, opt) \ 531cabdff1aSopenharmony_ci mc_uni_w_func(name, bitd, 64, opt) 532cabdff1aSopenharmony_ci 533cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 8, sse4) 534cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 8, 6, sse4) 535cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 8, sse4) 536cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 8, 6, sse4) 537cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 8, sse4) 538cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 8, 6, sse4) 539cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 8, sse4) 540cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 8, 6, sse4) 541cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 8, sse4) 542cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 8, sse4) 543cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 8, sse4) 544cabdff1aSopenharmony_ci 545cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 10, sse4) 546cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 10, 6, sse4) 547cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 10, sse4) 548cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 10, 6, sse4) 549cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 10, sse4) 550cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 10, 6, sse4) 551cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 10, sse4) 552cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 10, 6, sse4) 553cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 10, sse4) 554cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 10, sse4) 555cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 10, sse4) 556cabdff1aSopenharmony_ci 557cabdff1aSopenharmony_cimc_uni_w_funcs(pel_pixels, 12, sse4) 558cabdff1aSopenharmony_cimc_uni_w_func(pel_pixels, 12, 6, sse4) 559cabdff1aSopenharmony_cimc_uni_w_funcs(epel_h, 12, sse4) 560cabdff1aSopenharmony_cimc_uni_w_func(epel_h, 12, 6, sse4) 561cabdff1aSopenharmony_cimc_uni_w_funcs(epel_v, 12, sse4) 562cabdff1aSopenharmony_cimc_uni_w_func(epel_v, 12, 6, sse4) 563cabdff1aSopenharmony_cimc_uni_w_funcs(epel_hv, 12, sse4) 564cabdff1aSopenharmony_cimc_uni_w_func(epel_hv, 12, 6, sse4) 565cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_h, 12, sse4) 566cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_v, 12, sse4) 567cabdff1aSopenharmony_cimc_uni_w_funcs(qpel_hv, 12, sse4) 568cabdff1aSopenharmony_ci 569cabdff1aSopenharmony_ci#define mc_bi_w_func(name, bitd, W, opt) \ 570cabdff1aSopenharmony_civoid ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ 571cabdff1aSopenharmony_ci uint8_t *_src, ptrdiff_t _srcstride, \ 572cabdff1aSopenharmony_ci int16_t *_src2, \ 573cabdff1aSopenharmony_ci int height, int denom, \ 574cabdff1aSopenharmony_ci int _wx0, int _wx1, int _ox0, int _ox1, \ 575cabdff1aSopenharmony_ci intptr_t mx, intptr_t my, int width) \ 576cabdff1aSopenharmony_ci{ \ 577cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ 578cabdff1aSopenharmony_ci ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ 579cabdff1aSopenharmony_ci ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ 580cabdff1aSopenharmony_ci height, denom, _wx0, _wx1, _ox0, _ox1); \ 581cabdff1aSopenharmony_ci} 582cabdff1aSopenharmony_ci 583cabdff1aSopenharmony_ci#define mc_bi_w_funcs(name, bitd, opt) \ 584cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 4, opt) \ 585cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 8, opt) \ 586cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 12, opt) \ 587cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 16, opt) \ 588cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 24, opt) \ 589cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 32, opt) \ 590cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 48, opt) \ 591cabdff1aSopenharmony_ci mc_bi_w_func(name, bitd, 64, opt) 592cabdff1aSopenharmony_ci 593cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 8, sse4) 594cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 8, 6, sse4) 595cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 8, sse4) 596cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 8, 6, sse4) 597cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 8, sse4) 598cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 8, 6, sse4) 599cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 8, sse4) 600cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 8, 6, sse4) 601cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 8, sse4) 602cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 8, sse4) 603cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 8, sse4) 604cabdff1aSopenharmony_ci 605cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 10, sse4) 606cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 10, 6, sse4) 607cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 10, sse4) 608cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 10, 6, sse4) 609cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 10, sse4) 610cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 10, 6, sse4) 611cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 10, sse4) 612cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 10, 6, sse4) 613cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 10, sse4) 614cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 10, sse4) 615cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 10, sse4) 616cabdff1aSopenharmony_ci 617cabdff1aSopenharmony_cimc_bi_w_funcs(pel_pixels, 12, sse4) 618cabdff1aSopenharmony_cimc_bi_w_func(pel_pixels, 12, 6, sse4) 619cabdff1aSopenharmony_cimc_bi_w_funcs(epel_h, 12, sse4) 620cabdff1aSopenharmony_cimc_bi_w_func(epel_h, 12, 6, sse4) 621cabdff1aSopenharmony_cimc_bi_w_funcs(epel_v, 12, sse4) 622cabdff1aSopenharmony_cimc_bi_w_func(epel_v, 12, 6, sse4) 623cabdff1aSopenharmony_cimc_bi_w_funcs(epel_hv, 12, sse4) 624cabdff1aSopenharmony_cimc_bi_w_func(epel_hv, 12, 6, sse4) 625cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_h, 12, sse4) 626cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_v, 12, sse4) 627cabdff1aSopenharmony_cimc_bi_w_funcs(qpel_hv, 12, sse4) 628cabdff1aSopenharmony_ci#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL 629cabdff1aSopenharmony_ci 630cabdff1aSopenharmony_ci#define SAO_BAND_FILTER_FUNCS(bitd, opt) \ 631cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 632cabdff1aSopenharmony_ci int16_t *sao_offset_val, int sao_left_class, int width, int height); \ 633cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 634cabdff1aSopenharmony_ci int16_t *sao_offset_val, int sao_left_class, int width, int height); \ 635cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 636cabdff1aSopenharmony_ci int16_t *sao_offset_val, int sao_left_class, int width, int height); \ 637cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 638cabdff1aSopenharmony_ci int16_t *sao_offset_val, int sao_left_class, int width, int height); \ 639cabdff1aSopenharmony_civoid ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 640cabdff1aSopenharmony_ci int16_t *sao_offset_val, int sao_left_class, int width, int height); 641cabdff1aSopenharmony_ci 642cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8, sse2) 643cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10, sse2) 644cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12, sse2) 645cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8, avx) 646cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10, avx) 647cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12, avx) 648cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(8, avx2) 649cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(10, avx2) 650cabdff1aSopenharmony_ciSAO_BAND_FILTER_FUNCS(12, avx2) 651cabdff1aSopenharmony_ci 652cabdff1aSopenharmony_ci#define SAO_BAND_INIT(bitd, opt) do { \ 653cabdff1aSopenharmony_ci c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \ 654cabdff1aSopenharmony_ci c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \ 655cabdff1aSopenharmony_ci c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \ 656cabdff1aSopenharmony_ci c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \ 657cabdff1aSopenharmony_ci c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \ 658cabdff1aSopenharmony_ci} while (0) 659cabdff1aSopenharmony_ci 660cabdff1aSopenharmony_ci#define SAO_EDGE_FILTER_FUNCS(bitd, opt) \ 661cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \ 662cabdff1aSopenharmony_ci int eo, int width, int height); \ 663cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \ 664cabdff1aSopenharmony_ci int eo, int width, int height); \ 665cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \ 666cabdff1aSopenharmony_ci int eo, int width, int height); \ 667cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \ 668cabdff1aSopenharmony_ci int eo, int width, int height); \ 669cabdff1aSopenharmony_civoid ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \ 670cabdff1aSopenharmony_ci int eo, int width, int height); \ 671cabdff1aSopenharmony_ci 672cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(8, ssse3) 673cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(8, avx2) 674cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(10, sse2) 675cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(10, avx2) 676cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(12, sse2) 677cabdff1aSopenharmony_ciSAO_EDGE_FILTER_FUNCS(12, avx2) 678cabdff1aSopenharmony_ci 679cabdff1aSopenharmony_ci#define SAO_EDGE_INIT(bitd, opt) do { \ 680cabdff1aSopenharmony_ci c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \ 681cabdff1aSopenharmony_ci c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \ 682cabdff1aSopenharmony_ci c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \ 683cabdff1aSopenharmony_ci c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \ 684cabdff1aSopenharmony_ci c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \ 685cabdff1aSopenharmony_ci} while (0) 686cabdff1aSopenharmony_ci 687cabdff1aSopenharmony_ci#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \ 688cabdff1aSopenharmony_ci PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ 689cabdff1aSopenharmony_ci PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \ 690cabdff1aSopenharmony_ci PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ 691cabdff1aSopenharmony_ci PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ 692cabdff1aSopenharmony_ci PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ 693cabdff1aSopenharmony_ci PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ 694cabdff1aSopenharmony_ci PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ 695cabdff1aSopenharmony_ci PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ 696cabdff1aSopenharmony_ci PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) 697cabdff1aSopenharmony_ci#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \ 698cabdff1aSopenharmony_ci PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \ 699cabdff1aSopenharmony_ci PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \ 700cabdff1aSopenharmony_ci PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \ 701cabdff1aSopenharmony_ci PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \ 702cabdff1aSopenharmony_ci PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \ 703cabdff1aSopenharmony_ci PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \ 704cabdff1aSopenharmony_ci PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \ 705cabdff1aSopenharmony_ci PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt ) 706cabdff1aSopenharmony_ci 707cabdff1aSopenharmony_civoid ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) 708cabdff1aSopenharmony_ci{ 709cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 710cabdff1aSopenharmony_ci 711cabdff1aSopenharmony_ci if (bit_depth == 8) { 712cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 713cabdff1aSopenharmony_ci c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext; 714cabdff1aSopenharmony_ci 715cabdff1aSopenharmony_ci c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext; 716cabdff1aSopenharmony_ci } 717cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 718cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; 719cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; 720cabdff1aSopenharmony_ci if (ARCH_X86_64) { 721cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; 722cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; 723cabdff1aSopenharmony_ci 724cabdff1aSopenharmony_ci c->idct[2] = ff_hevc_idct_16x16_8_sse2; 725cabdff1aSopenharmony_ci c->idct[3] = ff_hevc_idct_32x32_8_sse2; 726cabdff1aSopenharmony_ci } 727cabdff1aSopenharmony_ci SAO_BAND_INIT(8, sse2); 728cabdff1aSopenharmony_ci 729cabdff1aSopenharmony_ci c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; 730cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; 731cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; 732cabdff1aSopenharmony_ci 733cabdff1aSopenharmony_ci c->idct[0] = ff_hevc_idct_4x4_8_sse2; 734cabdff1aSopenharmony_ci c->idct[1] = ff_hevc_idct_8x8_8_sse2; 735cabdff1aSopenharmony_ci 736cabdff1aSopenharmony_ci c->add_residual[1] = ff_hevc_add_residual_8_8_sse2; 737cabdff1aSopenharmony_ci c->add_residual[2] = ff_hevc_add_residual_16_8_sse2; 738cabdff1aSopenharmony_ci c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; 739cabdff1aSopenharmony_ci } 740cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 741cabdff1aSopenharmony_ci if(ARCH_X86_64) { 742cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; 743cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; 744cabdff1aSopenharmony_ci } 745cabdff1aSopenharmony_ci SAO_EDGE_INIT(8, ssse3); 746cabdff1aSopenharmony_ci } 747cabdff1aSopenharmony_ci if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { 748cabdff1aSopenharmony_ci 749cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); 750cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); 751cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); 752cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4); 753cabdff1aSopenharmony_ci 754cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4); 755cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4); 756cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); 757cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); 758cabdff1aSopenharmony_ci } 759cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 760cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; 761cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; 762cabdff1aSopenharmony_ci if (ARCH_X86_64) { 763cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; 764cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; 765cabdff1aSopenharmony_ci 766cabdff1aSopenharmony_ci c->idct[2] = ff_hevc_idct_16x16_8_avx; 767cabdff1aSopenharmony_ci c->idct[3] = ff_hevc_idct_32x32_8_avx; 768cabdff1aSopenharmony_ci } 769cabdff1aSopenharmony_ci SAO_BAND_INIT(8, avx); 770cabdff1aSopenharmony_ci 771cabdff1aSopenharmony_ci c->idct[0] = ff_hevc_idct_4x4_8_avx; 772cabdff1aSopenharmony_ci c->idct[1] = ff_hevc_idct_8x8_8_avx; 773cabdff1aSopenharmony_ci 774cabdff1aSopenharmony_ci c->add_residual[1] = ff_hevc_add_residual_8_8_avx; 775cabdff1aSopenharmony_ci c->add_residual[2] = ff_hevc_add_residual_16_8_avx; 776cabdff1aSopenharmony_ci c->add_residual[3] = ff_hevc_add_residual_32_8_avx; 777cabdff1aSopenharmony_ci } 778cabdff1aSopenharmony_ci if (EXTERNAL_AVX2(cpu_flags)) { 779cabdff1aSopenharmony_ci c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; 780cabdff1aSopenharmony_ci c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; 781cabdff1aSopenharmony_ci } 782cabdff1aSopenharmony_ci if (EXTERNAL_AVX2_FAST(cpu_flags)) { 783cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; 784cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; 785cabdff1aSopenharmony_ci if (ARCH_X86_64) { 786cabdff1aSopenharmony_ci c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; 787cabdff1aSopenharmony_ci c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; 788cabdff1aSopenharmony_ci c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; 789cabdff1aSopenharmony_ci 790cabdff1aSopenharmony_ci c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; 791cabdff1aSopenharmony_ci c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; 792cabdff1aSopenharmony_ci c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; 793cabdff1aSopenharmony_ci 794cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; 795cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; 796cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; 797cabdff1aSopenharmony_ci 798cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; 799cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; 800cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; 801cabdff1aSopenharmony_ci 802cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; 803cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; 804cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; 805cabdff1aSopenharmony_ci 806cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; 807cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; 808cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; 809cabdff1aSopenharmony_ci 810cabdff1aSopenharmony_ci c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; 811cabdff1aSopenharmony_ci c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; 812cabdff1aSopenharmony_ci c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; 813cabdff1aSopenharmony_ci 814cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; 815cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; 816cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; 817cabdff1aSopenharmony_ci 818cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; 819cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; 820cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; 821cabdff1aSopenharmony_ci 822cabdff1aSopenharmony_ci c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; 823cabdff1aSopenharmony_ci c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; 824cabdff1aSopenharmony_ci c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; 825cabdff1aSopenharmony_ci 826cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; 827cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; 828cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; 829cabdff1aSopenharmony_ci 830cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; 831cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; 832cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; 833cabdff1aSopenharmony_ci 834cabdff1aSopenharmony_ci c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; 835cabdff1aSopenharmony_ci c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; 836cabdff1aSopenharmony_ci c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; 837cabdff1aSopenharmony_ci 838cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; 839cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; 840cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; 841cabdff1aSopenharmony_ci 842cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; 843cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; 844cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; 845cabdff1aSopenharmony_ci 846cabdff1aSopenharmony_ci c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; 847cabdff1aSopenharmony_ci c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; 848cabdff1aSopenharmony_ci c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; 849cabdff1aSopenharmony_ci 850cabdff1aSopenharmony_ci c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; 851cabdff1aSopenharmony_ci c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; 852cabdff1aSopenharmony_ci c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; 853cabdff1aSopenharmony_ci 854cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; 855cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; 856cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; 857cabdff1aSopenharmony_ci 858cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; 859cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; 860cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; 861cabdff1aSopenharmony_ci 862cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; 863cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; 864cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; 865cabdff1aSopenharmony_ci 866cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; 867cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; 868cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; 869cabdff1aSopenharmony_ci } 870cabdff1aSopenharmony_ci SAO_BAND_INIT(8, avx2); 871cabdff1aSopenharmony_ci 872cabdff1aSopenharmony_ci c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; 873cabdff1aSopenharmony_ci c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; 874cabdff1aSopenharmony_ci c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2; 875cabdff1aSopenharmony_ci 876cabdff1aSopenharmony_ci c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; 877cabdff1aSopenharmony_ci } 878cabdff1aSopenharmony_ci if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { 879cabdff1aSopenharmony_ci c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl; 880cabdff1aSopenharmony_ci c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl; 881cabdff1aSopenharmony_ci c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl; 882cabdff1aSopenharmony_ci c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl; 883cabdff1aSopenharmony_ci c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl; 884cabdff1aSopenharmony_ci c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl; 885cabdff1aSopenharmony_ci } 886cabdff1aSopenharmony_ci } else if (bit_depth == 10) { 887cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 888cabdff1aSopenharmony_ci c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext; 889cabdff1aSopenharmony_ci c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext; 890cabdff1aSopenharmony_ci } 891cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 892cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; 893cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; 894cabdff1aSopenharmony_ci if (ARCH_X86_64) { 895cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; 896cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; 897cabdff1aSopenharmony_ci 898cabdff1aSopenharmony_ci c->idct[2] = ff_hevc_idct_16x16_10_sse2; 899cabdff1aSopenharmony_ci c->idct[3] = ff_hevc_idct_32x32_10_sse2; 900cabdff1aSopenharmony_ci } 901cabdff1aSopenharmony_ci SAO_BAND_INIT(10, sse2); 902cabdff1aSopenharmony_ci SAO_EDGE_INIT(10, sse2); 903cabdff1aSopenharmony_ci 904cabdff1aSopenharmony_ci c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2; 905cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; 906cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; 907cabdff1aSopenharmony_ci 908cabdff1aSopenharmony_ci c->idct[0] = ff_hevc_idct_4x4_10_sse2; 909cabdff1aSopenharmony_ci c->idct[1] = ff_hevc_idct_8x8_10_sse2; 910cabdff1aSopenharmony_ci 911cabdff1aSopenharmony_ci c->add_residual[1] = ff_hevc_add_residual_8_10_sse2; 912cabdff1aSopenharmony_ci c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; 913cabdff1aSopenharmony_ci c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; 914cabdff1aSopenharmony_ci } 915cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { 916cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; 917cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; 918cabdff1aSopenharmony_ci } 919cabdff1aSopenharmony_ci if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { 920cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); 921cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); 922cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); 923cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4); 924cabdff1aSopenharmony_ci 925cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4); 926cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4); 927cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); 928cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); 929cabdff1aSopenharmony_ci } 930cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 931cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; 932cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; 933cabdff1aSopenharmony_ci if (ARCH_X86_64) { 934cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; 935cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; 936cabdff1aSopenharmony_ci 937cabdff1aSopenharmony_ci c->idct[2] = ff_hevc_idct_16x16_10_avx; 938cabdff1aSopenharmony_ci c->idct[3] = ff_hevc_idct_32x32_10_avx; 939cabdff1aSopenharmony_ci } 940cabdff1aSopenharmony_ci 941cabdff1aSopenharmony_ci c->idct[0] = ff_hevc_idct_4x4_10_avx; 942cabdff1aSopenharmony_ci c->idct[1] = ff_hevc_idct_8x8_10_avx; 943cabdff1aSopenharmony_ci 944cabdff1aSopenharmony_ci SAO_BAND_INIT(10, avx); 945cabdff1aSopenharmony_ci } 946cabdff1aSopenharmony_ci if (EXTERNAL_AVX2(cpu_flags)) { 947cabdff1aSopenharmony_ci c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; 948cabdff1aSopenharmony_ci } 949cabdff1aSopenharmony_ci if (EXTERNAL_AVX2_FAST(cpu_flags)) { 950cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; 951cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; 952cabdff1aSopenharmony_ci if (ARCH_X86_64) { 953cabdff1aSopenharmony_ci c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; 954cabdff1aSopenharmony_ci c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; 955cabdff1aSopenharmony_ci c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; 956cabdff1aSopenharmony_ci c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; 957cabdff1aSopenharmony_ci c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; 958cabdff1aSopenharmony_ci 959cabdff1aSopenharmony_ci c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; 960cabdff1aSopenharmony_ci c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; 961cabdff1aSopenharmony_ci c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; 962cabdff1aSopenharmony_ci c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; 963cabdff1aSopenharmony_ci c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; 964cabdff1aSopenharmony_ci 965cabdff1aSopenharmony_ci c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; 966cabdff1aSopenharmony_ci c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; 967cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; 968cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; 969cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; 970cabdff1aSopenharmony_ci 971cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; 972cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; 973cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; 974cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; 975cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; 976cabdff1aSopenharmony_ci 977cabdff1aSopenharmony_ci c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; 978cabdff1aSopenharmony_ci c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; 979cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; 980cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; 981cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; 982cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; 983cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; 984cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; 985cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; 986cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; 987cabdff1aSopenharmony_ci 988cabdff1aSopenharmony_ci c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; 989cabdff1aSopenharmony_ci c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; 990cabdff1aSopenharmony_ci c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; 991cabdff1aSopenharmony_ci c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; 992cabdff1aSopenharmony_ci c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; 993cabdff1aSopenharmony_ci 994cabdff1aSopenharmony_ci c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; 995cabdff1aSopenharmony_ci c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; 996cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; 997cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; 998cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; 999cabdff1aSopenharmony_ci 1000cabdff1aSopenharmony_ci c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; 1001cabdff1aSopenharmony_ci c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; 1002cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; 1003cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; 1004cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; 1005cabdff1aSopenharmony_ci 1006cabdff1aSopenharmony_ci c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; 1007cabdff1aSopenharmony_ci c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; 1008cabdff1aSopenharmony_ci c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; 1009cabdff1aSopenharmony_ci c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; 1010cabdff1aSopenharmony_ci c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; 1011cabdff1aSopenharmony_ci 1012cabdff1aSopenharmony_ci c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; 1013cabdff1aSopenharmony_ci c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; 1014cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; 1015cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; 1016cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; 1017cabdff1aSopenharmony_ci 1018cabdff1aSopenharmony_ci c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; 1019cabdff1aSopenharmony_ci c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; 1020cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; 1021cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; 1022cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; 1023cabdff1aSopenharmony_ci 1024cabdff1aSopenharmony_ci c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; 1025cabdff1aSopenharmony_ci c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; 1026cabdff1aSopenharmony_ci c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; 1027cabdff1aSopenharmony_ci c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; 1028cabdff1aSopenharmony_ci c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; 1029cabdff1aSopenharmony_ci 1030cabdff1aSopenharmony_ci c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; 1031cabdff1aSopenharmony_ci c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; 1032cabdff1aSopenharmony_ci c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; 1033cabdff1aSopenharmony_ci c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; 1034cabdff1aSopenharmony_ci c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; 1035cabdff1aSopenharmony_ci 1036cabdff1aSopenharmony_ci c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; 1037cabdff1aSopenharmony_ci c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; 1038cabdff1aSopenharmony_ci c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; 1039cabdff1aSopenharmony_ci c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; 1040cabdff1aSopenharmony_ci c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; 1041cabdff1aSopenharmony_ci 1042cabdff1aSopenharmony_ci c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; 1043cabdff1aSopenharmony_ci c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; 1044cabdff1aSopenharmony_ci c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; 1045cabdff1aSopenharmony_ci c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; 1046cabdff1aSopenharmony_ci c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; 1047cabdff1aSopenharmony_ci 1048cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; 1049cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; 1050cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; 1051cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; 1052cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; 1053cabdff1aSopenharmony_ci 1054cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; 1055cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; 1056cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; 1057cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; 1058cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; 1059cabdff1aSopenharmony_ci 1060cabdff1aSopenharmony_ci c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; 1061cabdff1aSopenharmony_ci c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; 1062cabdff1aSopenharmony_ci c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; 1063cabdff1aSopenharmony_ci c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; 1064cabdff1aSopenharmony_ci c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; 1065cabdff1aSopenharmony_ci 1066cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; 1067cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; 1068cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; 1069cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; 1070cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; 1071cabdff1aSopenharmony_ci 1072cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; 1073cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; 1074cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; 1075cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; 1076cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; 1077cabdff1aSopenharmony_ci 1078cabdff1aSopenharmony_ci c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; 1079cabdff1aSopenharmony_ci c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; 1080cabdff1aSopenharmony_ci c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; 1081cabdff1aSopenharmony_ci c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; 1082cabdff1aSopenharmony_ci c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; 1083cabdff1aSopenharmony_ci 1084cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; 1085cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; 1086cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; 1087cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; 1088cabdff1aSopenharmony_ci c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; 1089cabdff1aSopenharmony_ci 1090cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; 1091cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; 1092cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; 1093cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; 1094cabdff1aSopenharmony_ci c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; 1095cabdff1aSopenharmony_ci } 1096cabdff1aSopenharmony_ci SAO_BAND_INIT(10, avx2); 1097cabdff1aSopenharmony_ci SAO_EDGE_INIT(10, avx2); 1098cabdff1aSopenharmony_ci 1099cabdff1aSopenharmony_ci c->add_residual[2] = ff_hevc_add_residual_16_10_avx2; 1100cabdff1aSopenharmony_ci c->add_residual[3] = ff_hevc_add_residual_32_10_avx2; 1101cabdff1aSopenharmony_ci } 1102cabdff1aSopenharmony_ci } else if (bit_depth == 12) { 1103cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 1104cabdff1aSopenharmony_ci c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext; 1105cabdff1aSopenharmony_ci } 1106cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 1107cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; 1108cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; 1109cabdff1aSopenharmony_ci if (ARCH_X86_64) { 1110cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; 1111cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; 1112cabdff1aSopenharmony_ci } 1113cabdff1aSopenharmony_ci SAO_BAND_INIT(12, sse2); 1114cabdff1aSopenharmony_ci SAO_EDGE_INIT(12, sse2); 1115cabdff1aSopenharmony_ci 1116cabdff1aSopenharmony_ci c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2; 1117cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; 1118cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; 1119cabdff1aSopenharmony_ci } 1120cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { 1121cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; 1122cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; 1123cabdff1aSopenharmony_ci } 1124cabdff1aSopenharmony_ci if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { 1125cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); 1126cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); 1127cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); 1128cabdff1aSopenharmony_ci EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4); 1129cabdff1aSopenharmony_ci 1130cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4); 1131cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4); 1132cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); 1133cabdff1aSopenharmony_ci QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); 1134cabdff1aSopenharmony_ci } 1135cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 1136cabdff1aSopenharmony_ci c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; 1137cabdff1aSopenharmony_ci c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; 1138cabdff1aSopenharmony_ci if (ARCH_X86_64) { 1139cabdff1aSopenharmony_ci c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; 1140cabdff1aSopenharmony_ci c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; 1141cabdff1aSopenharmony_ci } 1142cabdff1aSopenharmony_ci SAO_BAND_INIT(12, avx); 1143cabdff1aSopenharmony_ci } 1144cabdff1aSopenharmony_ci if (EXTERNAL_AVX2(cpu_flags)) { 1145cabdff1aSopenharmony_ci c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2; 1146cabdff1aSopenharmony_ci } 1147cabdff1aSopenharmony_ci if (EXTERNAL_AVX2_FAST(cpu_flags)) { 1148cabdff1aSopenharmony_ci c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2; 1149cabdff1aSopenharmony_ci c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2; 1150cabdff1aSopenharmony_ci 1151cabdff1aSopenharmony_ci SAO_BAND_INIT(12, avx2); 1152cabdff1aSopenharmony_ci SAO_EDGE_INIT(12, avx2); 1153cabdff1aSopenharmony_ci } 1154cabdff1aSopenharmony_ci } 1155cabdff1aSopenharmony_ci} 1156