1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt 3cabdff1aSopenharmony_ci * Copyright (c) 2011 Daniel Kang 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 23cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 24cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 25cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 26cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 27cabdff1aSopenharmony_ci#include "libavcodec/h264qpel.h" 28cabdff1aSopenharmony_ci#include "libavcodec/pixels.h" 29cabdff1aSopenharmony_ci#include "fpel.h" 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci#if HAVE_X86ASM 32cabdff1aSopenharmony_civoid ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 33cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 34cabdff1aSopenharmony_civoid ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 35cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 36cabdff1aSopenharmony_civoid ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 37cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 38cabdff1aSopenharmony_civoid ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 39cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 40cabdff1aSopenharmony_civoid ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 41cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 42cabdff1aSopenharmony_civoid ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 43cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 44cabdff1aSopenharmony_ci#define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext 45cabdff1aSopenharmony_ci#define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext 46cabdff1aSopenharmony_ci#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext 47cabdff1aSopenharmony_ci#define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext 48cabdff1aSopenharmony_ci#define ff_put_pixels16_mmxext ff_put_pixels16_mmx 49cabdff1aSopenharmony_ci#define ff_put_pixels8_mmxext ff_put_pixels8_mmx 50cabdff1aSopenharmony_ci#define ff_put_pixels4_mmxext ff_put_pixels4_mmx 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_ci#define DEF_QPEL(OPNAME)\ 53cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 54cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 55cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 56cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 57cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 58cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 59cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 60cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\ 61cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\ 62cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\ 63cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\ 64cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\ 65cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\ 66cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\ 67cabdff1aSopenharmony_civoid ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h); 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ciDEF_QPEL(avg) 70cabdff1aSopenharmony_ciDEF_QPEL(put) 71cabdff1aSopenharmony_ci 72cabdff1aSopenharmony_ci#define QPEL_H264(OPNAME, OP, MMX)\ 73cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 74cabdff1aSopenharmony_ci int w=3;\ 75cabdff1aSopenharmony_ci src -= 2*srcStride+2;\ 76cabdff1aSopenharmony_ci while(w--){\ 77cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\ 78cabdff1aSopenharmony_ci tmp += 4;\ 79cabdff1aSopenharmony_ci src += 4;\ 80cabdff1aSopenharmony_ci }\ 81cabdff1aSopenharmony_ci tmp -= 3*4;\ 82cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\ 83cabdff1aSopenharmony_ci}\ 84cabdff1aSopenharmony_ci\ 85cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ 86cabdff1aSopenharmony_ci int w = size>>4;\ 87cabdff1aSopenharmony_ci do{\ 88cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\ 89cabdff1aSopenharmony_ci tmp += 8;\ 90cabdff1aSopenharmony_ci dst += 8;\ 91cabdff1aSopenharmony_ci }while(w--);\ 92cabdff1aSopenharmony_ci}\ 93cabdff1aSopenharmony_ci\ 94cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 95cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 96cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 97cabdff1aSopenharmony_ci src += 8*srcStride;\ 98cabdff1aSopenharmony_ci dst += 8*dstStride;\ 99cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 100cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 101cabdff1aSopenharmony_ci}\ 102cabdff1aSopenharmony_ci\ 103cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ 104cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 105cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 106cabdff1aSopenharmony_ci src += 8*dstStride;\ 107cabdff1aSopenharmony_ci dst += 8*dstStride;\ 108cabdff1aSopenharmony_ci src2 += 8*src2Stride;\ 109cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 110cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 111cabdff1aSopenharmony_ci}\ 112cabdff1aSopenharmony_ci\ 113cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\ 114cabdff1aSopenharmony_ci{\ 115cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\ 116cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ 117cabdff1aSopenharmony_ci}\ 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ci 120cabdff1aSopenharmony_ci#if ARCH_X86_64 121cabdff1aSopenharmony_ci#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_civoid ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride); 124cabdff1aSopenharmony_civoid ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride); 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci#else // ARCH_X86_64 127cabdff1aSopenharmony_ci#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 128cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ 129cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 130cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 131cabdff1aSopenharmony_ci src += 8*dstStride;\ 132cabdff1aSopenharmony_ci dst += 8*dstStride;\ 133cabdff1aSopenharmony_ci src2 += 8*src2Stride;\ 134cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 135cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 136cabdff1aSopenharmony_ci} 137cabdff1aSopenharmony_ci#endif // ARCH_X86_64 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ 140cabdff1aSopenharmony_ciQPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 141cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 142cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 143cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 144cabdff1aSopenharmony_ci src += 8*srcStride;\ 145cabdff1aSopenharmony_ci dst += 8*dstStride;\ 146cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 147cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 148cabdff1aSopenharmony_ci}\ 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\ 151cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 152cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ 153cabdff1aSopenharmony_ci}\ 154cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 155cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ 156cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 157cabdff1aSopenharmony_ci} 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_cistatic av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, 160cabdff1aSopenharmony_ci const uint8_t *src, 161cabdff1aSopenharmony_ci int tmpStride, 162cabdff1aSopenharmony_ci int srcStride, 163cabdff1aSopenharmony_ci int size) 164cabdff1aSopenharmony_ci{ 165cabdff1aSopenharmony_ci int w = (size+8)>>3; 166cabdff1aSopenharmony_ci src -= 2*srcStride+2; 167cabdff1aSopenharmony_ci while(w--){ 168cabdff1aSopenharmony_ci ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size); 169cabdff1aSopenharmony_ci tmp += 8; 170cabdff1aSopenharmony_ci src += 8; 171cabdff1aSopenharmony_ci } 172cabdff1aSopenharmony_ci} 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ 175cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 176cabdff1aSopenharmony_ci put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ 177cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ 178cabdff1aSopenharmony_ci}\ 179cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 180cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\ 181cabdff1aSopenharmony_ci}\ 182cabdff1aSopenharmony_cistatic av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 183cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ 184cabdff1aSopenharmony_ci}\ 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_ci#define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext 187cabdff1aSopenharmony_ci#define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext 188cabdff1aSopenharmony_ci#define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext 189cabdff1aSopenharmony_ci#define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext 190cabdff1aSopenharmony_ci 191cabdff1aSopenharmony_ci#define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2 192cabdff1aSopenharmony_ci#define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2 193cabdff1aSopenharmony_ci#define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2 194cabdff1aSopenharmony_ci#define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci#define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext 197cabdff1aSopenharmony_ci#define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \ 200cabdff1aSopenharmony_ciH264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ 201cabdff1aSopenharmony_ciH264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ 204cabdff1aSopenharmony_ciH264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ 205cabdff1aSopenharmony_ciH264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ 206cabdff1aSopenharmony_ciH264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ 207cabdff1aSopenharmony_ciH264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_cistatic void put_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src, 210cabdff1aSopenharmony_ci ptrdiff_t stride) 211cabdff1aSopenharmony_ci{ 212cabdff1aSopenharmony_ci ff_put_pixels16_sse2(dst, src, stride, 16); 213cabdff1aSopenharmony_ci} 214cabdff1aSopenharmony_cistatic void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src, 215cabdff1aSopenharmony_ci ptrdiff_t stride) 216cabdff1aSopenharmony_ci{ 217cabdff1aSopenharmony_ci ff_avg_pixels16_sse2(dst, src, stride, 16); 218cabdff1aSopenharmony_ci} 219cabdff1aSopenharmony_ci#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext 220cabdff1aSopenharmony_ci#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext 221cabdff1aSopenharmony_ci 222cabdff1aSopenharmony_ci#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ 223cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 224cabdff1aSopenharmony_ci{\ 225cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ 226cabdff1aSopenharmony_ci}\ 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ 229cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 230cabdff1aSopenharmony_ci{\ 231cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ 232cabdff1aSopenharmony_ci}\ 233cabdff1aSopenharmony_ci\ 234cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 235cabdff1aSopenharmony_ci{\ 236cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ 237cabdff1aSopenharmony_ci}\ 238cabdff1aSopenharmony_ci\ 239cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 240cabdff1aSopenharmony_ci{\ 241cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ 242cabdff1aSopenharmony_ci}\ 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ 245cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 246cabdff1aSopenharmony_ci{\ 247cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 248cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 249cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ 250cabdff1aSopenharmony_ci}\ 251cabdff1aSopenharmony_ci\ 252cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 253cabdff1aSopenharmony_ci{\ 254cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ 255cabdff1aSopenharmony_ci}\ 256cabdff1aSopenharmony_ci\ 257cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 258cabdff1aSopenharmony_ci{\ 259cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 260cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 261cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ 262cabdff1aSopenharmony_ci}\ 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ 265cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 266cabdff1aSopenharmony_ci{\ 267cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 268cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 269cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 270cabdff1aSopenharmony_ci}\ 271cabdff1aSopenharmony_ci\ 272cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 273cabdff1aSopenharmony_ci{\ 274cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 275cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 276cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 277cabdff1aSopenharmony_ci}\ 278cabdff1aSopenharmony_ci\ 279cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 280cabdff1aSopenharmony_ci{\ 281cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 282cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 283cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 284cabdff1aSopenharmony_ci}\ 285cabdff1aSopenharmony_ci\ 286cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 287cabdff1aSopenharmony_ci{\ 288cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 289cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 290cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 291cabdff1aSopenharmony_ci}\ 292cabdff1aSopenharmony_ci\ 293cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 294cabdff1aSopenharmony_ci{\ 295cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\ 296cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ 297cabdff1aSopenharmony_ci}\ 298cabdff1aSopenharmony_ci\ 299cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 300cabdff1aSopenharmony_ci{\ 301cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 302cabdff1aSopenharmony_ci uint8_t * const halfHV= temp;\ 303cabdff1aSopenharmony_ci int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 304cabdff1aSopenharmony_ci av_assert2(((uintptr_t)temp & 7) == 0);\ 305cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 306cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ 307cabdff1aSopenharmony_ci}\ 308cabdff1aSopenharmony_ci\ 309cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 310cabdff1aSopenharmony_ci{\ 311cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 312cabdff1aSopenharmony_ci uint8_t * const halfHV= temp;\ 313cabdff1aSopenharmony_ci int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 314cabdff1aSopenharmony_ci av_assert2(((uintptr_t)temp & 7) == 0);\ 315cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 316cabdff1aSopenharmony_ci ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ 317cabdff1aSopenharmony_ci}\ 318cabdff1aSopenharmony_ci\ 319cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 320cabdff1aSopenharmony_ci{\ 321cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 322cabdff1aSopenharmony_ci uint8_t * const halfHV= temp;\ 323cabdff1aSopenharmony_ci int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 324cabdff1aSopenharmony_ci av_assert2(((uintptr_t)temp & 7) == 0);\ 325cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 326cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ 327cabdff1aSopenharmony_ci}\ 328cabdff1aSopenharmony_ci\ 329cabdff1aSopenharmony_cistatic void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 330cabdff1aSopenharmony_ci{\ 331cabdff1aSopenharmony_ci LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 332cabdff1aSopenharmony_ci uint8_t * const halfHV= temp;\ 333cabdff1aSopenharmony_ci int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 334cabdff1aSopenharmony_ci av_assert2(((uintptr_t)temp & 7) == 0);\ 335cabdff1aSopenharmony_ci ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 336cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ 337cabdff1aSopenharmony_ci}\ 338cabdff1aSopenharmony_ci 339cabdff1aSopenharmony_ci#define H264_MC(QPEL, SIZE, MMX, ALIGN)\ 340cabdff1aSopenharmony_ciQPEL(put_, SIZE, MMX, ALIGN) \ 341cabdff1aSopenharmony_ciQPEL(avg_, SIZE, MMX, ALIGN) \ 342cabdff1aSopenharmony_ci 343cabdff1aSopenharmony_ci#define H264_MC_816(QPEL, XMM)\ 344cabdff1aSopenharmony_ciQPEL(put_, 8, XMM, 16)\ 345cabdff1aSopenharmony_ciQPEL(put_, 16,XMM, 16)\ 346cabdff1aSopenharmony_ciQPEL(avg_, 8, XMM, 16)\ 347cabdff1aSopenharmony_ciQPEL(avg_, 16,XMM, 16)\ 348cabdff1aSopenharmony_ci 349cabdff1aSopenharmony_ciQPEL_H264(put_, PUT_OP, mmxext) 350cabdff1aSopenharmony_ciQPEL_H264(avg_, AVG_MMXEXT_OP, mmxext) 351cabdff1aSopenharmony_ciQPEL_H264_V_XMM(put_, PUT_OP, sse2) 352cabdff1aSopenharmony_ciQPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) 353cabdff1aSopenharmony_ciQPEL_H264_HV_XMM(put_, PUT_OP, sse2) 354cabdff1aSopenharmony_ciQPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2) 355cabdff1aSopenharmony_ciQPEL_H264_H_XMM(put_, PUT_OP, ssse3) 356cabdff1aSopenharmony_ciQPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) 357cabdff1aSopenharmony_ciQPEL_H264_HV_XMM(put_, PUT_OP, ssse3) 358cabdff1aSopenharmony_ciQPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) 359cabdff1aSopenharmony_ci 360cabdff1aSopenharmony_ciH264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8) 361cabdff1aSopenharmony_ciH264_MC(H264_MC_C_H, 8, mmxext, 8) 362cabdff1aSopenharmony_ciH264_MC(H264_MC_C_H, 16, mmxext, 8) 363cabdff1aSopenharmony_ciH264_MC_816(H264_MC_V, sse2) 364cabdff1aSopenharmony_ciH264_MC_816(H264_MC_HV, sse2) 365cabdff1aSopenharmony_ciH264_MC_816(H264_MC_H, ssse3) 366cabdff1aSopenharmony_ciH264_MC_816(H264_MC_HV, ssse3) 367cabdff1aSopenharmony_ci 368cabdff1aSopenharmony_ci 369cabdff1aSopenharmony_ci//10bit 370cabdff1aSopenharmony_ci#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \ 371cabdff1aSopenharmony_civoid ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ 372cabdff1aSopenharmony_ci (uint8_t *dst, const uint8_t *src, ptrdiff_t stride); 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_ci#define LUMA_MC_4(DEPTH, TYPE, OPT) \ 375cabdff1aSopenharmony_ci LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \ 376cabdff1aSopenharmony_ci LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_ci#define LUMA_MC_816(DEPTH, TYPE, OPT) \ 379cabdff1aSopenharmony_ci LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \ 380cabdff1aSopenharmony_ci LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \ 381cabdff1aSopenharmony_ci LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ 382cabdff1aSopenharmony_ci LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) 383cabdff1aSopenharmony_ci 384cabdff1aSopenharmony_ciLUMA_MC_4(10, mc00, mmxext) 385cabdff1aSopenharmony_ciLUMA_MC_4(10, mc10, mmxext) 386cabdff1aSopenharmony_ciLUMA_MC_4(10, mc20, mmxext) 387cabdff1aSopenharmony_ciLUMA_MC_4(10, mc30, mmxext) 388cabdff1aSopenharmony_ciLUMA_MC_4(10, mc01, mmxext) 389cabdff1aSopenharmony_ciLUMA_MC_4(10, mc11, mmxext) 390cabdff1aSopenharmony_ciLUMA_MC_4(10, mc21, mmxext) 391cabdff1aSopenharmony_ciLUMA_MC_4(10, mc31, mmxext) 392cabdff1aSopenharmony_ciLUMA_MC_4(10, mc02, mmxext) 393cabdff1aSopenharmony_ciLUMA_MC_4(10, mc12, mmxext) 394cabdff1aSopenharmony_ciLUMA_MC_4(10, mc22, mmxext) 395cabdff1aSopenharmony_ciLUMA_MC_4(10, mc32, mmxext) 396cabdff1aSopenharmony_ciLUMA_MC_4(10, mc03, mmxext) 397cabdff1aSopenharmony_ciLUMA_MC_4(10, mc13, mmxext) 398cabdff1aSopenharmony_ciLUMA_MC_4(10, mc23, mmxext) 399cabdff1aSopenharmony_ciLUMA_MC_4(10, mc33, mmxext) 400cabdff1aSopenharmony_ci 401cabdff1aSopenharmony_ciLUMA_MC_816(10, mc00, sse2) 402cabdff1aSopenharmony_ciLUMA_MC_816(10, mc10, sse2) 403cabdff1aSopenharmony_ciLUMA_MC_816(10, mc10, sse2_cache64) 404cabdff1aSopenharmony_ciLUMA_MC_816(10, mc10, ssse3_cache64) 405cabdff1aSopenharmony_ciLUMA_MC_816(10, mc20, sse2) 406cabdff1aSopenharmony_ciLUMA_MC_816(10, mc20, sse2_cache64) 407cabdff1aSopenharmony_ciLUMA_MC_816(10, mc20, ssse3_cache64) 408cabdff1aSopenharmony_ciLUMA_MC_816(10, mc30, sse2) 409cabdff1aSopenharmony_ciLUMA_MC_816(10, mc30, sse2_cache64) 410cabdff1aSopenharmony_ciLUMA_MC_816(10, mc30, ssse3_cache64) 411cabdff1aSopenharmony_ciLUMA_MC_816(10, mc01, sse2) 412cabdff1aSopenharmony_ciLUMA_MC_816(10, mc11, sse2) 413cabdff1aSopenharmony_ciLUMA_MC_816(10, mc21, sse2) 414cabdff1aSopenharmony_ciLUMA_MC_816(10, mc31, sse2) 415cabdff1aSopenharmony_ciLUMA_MC_816(10, mc02, sse2) 416cabdff1aSopenharmony_ciLUMA_MC_816(10, mc12, sse2) 417cabdff1aSopenharmony_ciLUMA_MC_816(10, mc22, sse2) 418cabdff1aSopenharmony_ciLUMA_MC_816(10, mc32, sse2) 419cabdff1aSopenharmony_ciLUMA_MC_816(10, mc03, sse2) 420cabdff1aSopenharmony_ciLUMA_MC_816(10, mc13, sse2) 421cabdff1aSopenharmony_ciLUMA_MC_816(10, mc23, sse2) 422cabdff1aSopenharmony_ciLUMA_MC_816(10, mc33, sse2) 423cabdff1aSopenharmony_ci 424cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_ci#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \ 427cabdff1aSopenharmony_ci do { \ 428cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 429cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 430cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 431cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 432cabdff1aSopenharmony_ci } while (0) 433cabdff1aSopenharmony_ci#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ 434cabdff1aSopenharmony_ci do { \ 435cabdff1aSopenharmony_ci SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX); \ 436cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 437cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 438cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 439cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 440cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 441cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 442cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 443cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 444cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 445cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 446cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 447cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 448cabdff1aSopenharmony_ci } while (0) 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci#define H264_QPEL_FUNCS(x, y, CPU) \ 451cabdff1aSopenharmony_ci do { \ 452cabdff1aSopenharmony_ci c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 453cabdff1aSopenharmony_ci c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \ 454cabdff1aSopenharmony_ci c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 455cabdff1aSopenharmony_ci c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \ 456cabdff1aSopenharmony_ci } while (0) 457cabdff1aSopenharmony_ci 458cabdff1aSopenharmony_ci#define H264_QPEL_FUNCS_10(x, y, CPU) \ 459cabdff1aSopenharmony_ci do { \ 460cabdff1aSopenharmony_ci c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 461cabdff1aSopenharmony_ci c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ 462cabdff1aSopenharmony_ci c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 463cabdff1aSopenharmony_ci c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ 464cabdff1aSopenharmony_ci } while (0) 465cabdff1aSopenharmony_ci 466cabdff1aSopenharmony_ciav_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) 467cabdff1aSopenharmony_ci{ 468cabdff1aSopenharmony_ci#if HAVE_X86ASM 469cabdff1aSopenharmony_ci int high_bit_depth = bit_depth > 8; 470cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 471cabdff1aSopenharmony_ci 472cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 473cabdff1aSopenharmony_ci if (!high_bit_depth) { 474cabdff1aSopenharmony_ci SET_QPEL_FUNCS0123(put_h264_qpel, 0, 16, mmxext, ); 475cabdff1aSopenharmony_ci SET_QPEL_FUNCS0123(put_h264_qpel, 1, 8, mmxext, ); 476cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); 477cabdff1aSopenharmony_ci SET_QPEL_FUNCS0123(avg_h264_qpel, 0, 16, mmxext, ); 478cabdff1aSopenharmony_ci SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, ); 479cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); 480cabdff1aSopenharmony_ci } else if (bit_depth == 10) { 481cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); 482cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); 483cabdff1aSopenharmony_ci } 484cabdff1aSopenharmony_ci } 485cabdff1aSopenharmony_ci 486cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 487cabdff1aSopenharmony_ci if (!high_bit_depth) { 488cabdff1aSopenharmony_ci H264_QPEL_FUNCS(0, 1, sse2); 489cabdff1aSopenharmony_ci H264_QPEL_FUNCS(0, 2, sse2); 490cabdff1aSopenharmony_ci H264_QPEL_FUNCS(0, 3, sse2); 491cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 1, sse2); 492cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 2, sse2); 493cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 3, sse2); 494cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 1, sse2); 495cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 2, sse2); 496cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 3, sse2); 497cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 1, sse2); 498cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 2, sse2); 499cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 3, sse2); 500cabdff1aSopenharmony_ci } 501cabdff1aSopenharmony_ci 502cabdff1aSopenharmony_ci if (bit_depth == 10) { 503cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); 504cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); 505cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); 506cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); 507cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(1, 0, sse2_cache64); 508cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(2, 0, sse2_cache64); 509cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(3, 0, sse2_cache64); 510cabdff1aSopenharmony_ci } 511cabdff1aSopenharmony_ci } 512cabdff1aSopenharmony_ci 513cabdff1aSopenharmony_ci if (EXTERNAL_SSE2_FAST(cpu_flags)) { 514cabdff1aSopenharmony_ci if (!high_bit_depth) { 515cabdff1aSopenharmony_ci H264_QPEL_FUNCS(0, 0, sse2); 516cabdff1aSopenharmony_ci } 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci 519cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 520cabdff1aSopenharmony_ci if (!high_bit_depth) { 521cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 0, ssse3); 522cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 1, ssse3); 523cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 2, ssse3); 524cabdff1aSopenharmony_ci H264_QPEL_FUNCS(1, 3, ssse3); 525cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 0, ssse3); 526cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 1, ssse3); 527cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 2, ssse3); 528cabdff1aSopenharmony_ci H264_QPEL_FUNCS(2, 3, ssse3); 529cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 0, ssse3); 530cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 1, ssse3); 531cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 2, ssse3); 532cabdff1aSopenharmony_ci H264_QPEL_FUNCS(3, 3, ssse3); 533cabdff1aSopenharmony_ci } 534cabdff1aSopenharmony_ci 535cabdff1aSopenharmony_ci if (bit_depth == 10) { 536cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); 537cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); 538cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); 539cabdff1aSopenharmony_ci } 540cabdff1aSopenharmony_ci } 541cabdff1aSopenharmony_ci 542cabdff1aSopenharmony_ci if (EXTERNAL_AVX(cpu_flags)) { 543cabdff1aSopenharmony_ci /* AVX implies 64 byte cache lines without the need to avoid unaligned 544cabdff1aSopenharmony_ci * memory accesses that cross the boundary between two cache lines. 545cabdff1aSopenharmony_ci * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid 546cabdff1aSopenharmony_ci * having to treat SSE2 functions with such properties as AVX. */ 547cabdff1aSopenharmony_ci if (bit_depth == 10) { 548cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(1, 0, sse2); 549cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(2, 0, sse2); 550cabdff1aSopenharmony_ci H264_QPEL_FUNCS_10(3, 0, sse2); 551cabdff1aSopenharmony_ci } 552cabdff1aSopenharmony_ci } 553cabdff1aSopenharmony_ci#endif 554cabdff1aSopenharmony_ci} 555