1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. 3cabdff1aSopenharmony_ci * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * MMX-optimized DSP functions, based on H.264 optimizations by 6cabdff1aSopenharmony_ci * Michael Niedermayer and Loren Merritt 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * This file is part of FFmpeg. 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 14cabdff1aSopenharmony_ci * 15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18cabdff1aSopenharmony_ci * Lesser General Public License for more details. 19cabdff1aSopenharmony_ci * 20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23cabdff1aSopenharmony_ci */ 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 26cabdff1aSopenharmony_ci#include "libavutil/common.h" 27cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 28cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 29cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 30cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 31cabdff1aSopenharmony_ci#include "libavcodec/cavsdsp.h" 32cabdff1aSopenharmony_ci#include "libavcodec/idctdsp.h" 33cabdff1aSopenharmony_ci#include "constants.h" 34cabdff1aSopenharmony_ci#include "fpel.h" 35cabdff1aSopenharmony_ci#include "idctdsp.h" 36cabdff1aSopenharmony_ci#include "config.h" 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci#if HAVE_SSE2_EXTERNAL 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_civoid ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_cistatic void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) 44cabdff1aSopenharmony_ci{ 45cabdff1aSopenharmony_ci LOCAL_ALIGNED(16, int16_t, b2, [64]); 46cabdff1aSopenharmony_ci ff_cavs_idct8_sse2(b2, block); 47cabdff1aSopenharmony_ci ff_add_pixels_clamped_sse2(b2, dst, stride); 48cabdff1aSopenharmony_ci} 49cabdff1aSopenharmony_ci 50cabdff1aSopenharmony_ci#endif /* HAVE_SSE2_EXTERNAL */ 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_ci#if HAVE_MMXEXT_INLINE 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci/***************************************************************************** 55cabdff1aSopenharmony_ci * 56cabdff1aSopenharmony_ci * motion compensation 57cabdff1aSopenharmony_ci * 58cabdff1aSopenharmony_ci ****************************************************************************/ 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci/* vertical filter [-1 -2 96 42 -7 0] */ 61cabdff1aSopenharmony_ci#define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 62cabdff1aSopenharmony_ci "movd (%0), "#F" \n\t"\ 63cabdff1aSopenharmony_ci "movq "#C", %%mm6 \n\t"\ 64cabdff1aSopenharmony_ci "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ 65cabdff1aSopenharmony_ci "movq "#D", %%mm7 \n\t"\ 66cabdff1aSopenharmony_ci "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ 67cabdff1aSopenharmony_ci "psllw $3, "#E" \n\t"\ 68cabdff1aSopenharmony_ci "psubw "#E", %%mm6 \n\t"\ 69cabdff1aSopenharmony_ci "psraw $3, "#E" \n\t"\ 70cabdff1aSopenharmony_ci "paddw %%mm7, %%mm6 \n\t"\ 71cabdff1aSopenharmony_ci "paddw "#E", %%mm6 \n\t"\ 72cabdff1aSopenharmony_ci "paddw "#B", "#B" \n\t"\ 73cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t"\ 74cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 75cabdff1aSopenharmony_ci "punpcklbw %%mm7, "#F" \n\t"\ 76cabdff1aSopenharmony_ci "psubw "#B", %%mm6 \n\t"\ 77cabdff1aSopenharmony_ci "psraw $1, "#B" \n\t"\ 78cabdff1aSopenharmony_ci "psubw "#A", %%mm6 \n\t"\ 79cabdff1aSopenharmony_ci "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 80cabdff1aSopenharmony_ci "psraw $7, %%mm6 \n\t"\ 81cabdff1aSopenharmony_ci "packuswb %%mm6, %%mm6 \n\t"\ 82cabdff1aSopenharmony_ci OP(%%mm6, (%1), A, d) \ 83cabdff1aSopenharmony_ci "add %3, %1 \n\t" 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_ci/* vertical filter [ 0 -1 5 5 -1 0] */ 86cabdff1aSopenharmony_ci#define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 87cabdff1aSopenharmony_ci "movd (%0), "#F" \n\t"\ 88cabdff1aSopenharmony_ci "movq "#C", %%mm6 \n\t"\ 89cabdff1aSopenharmony_ci "paddw "#D", %%mm6 \n\t"\ 90cabdff1aSopenharmony_ci "pmullw "MANGLE(MUL1)", %%mm6\n\t"\ 91cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 92cabdff1aSopenharmony_ci "punpcklbw %%mm7, "#F" \n\t"\ 93cabdff1aSopenharmony_ci "psubw "#B", %%mm6 \n\t"\ 94cabdff1aSopenharmony_ci "psubw "#E", %%mm6 \n\t"\ 95cabdff1aSopenharmony_ci "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 96cabdff1aSopenharmony_ci "psraw $3, %%mm6 \n\t"\ 97cabdff1aSopenharmony_ci "packuswb %%mm6, %%mm6 \n\t"\ 98cabdff1aSopenharmony_ci OP(%%mm6, (%1), A, d) \ 99cabdff1aSopenharmony_ci "add %3, %1 \n\t" 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci/* vertical filter [ 0 -7 42 96 -2 -1] */ 102cabdff1aSopenharmony_ci#define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \ 103cabdff1aSopenharmony_ci "movd (%0), "#F" \n\t"\ 104cabdff1aSopenharmony_ci "movq "#C", %%mm6 \n\t"\ 105cabdff1aSopenharmony_ci "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ 106cabdff1aSopenharmony_ci "movq "#D", %%mm7 \n\t"\ 107cabdff1aSopenharmony_ci "pmullw "MANGLE(MUL1)", %%mm7\n\t"\ 108cabdff1aSopenharmony_ci "psllw $3, "#B" \n\t"\ 109cabdff1aSopenharmony_ci "psubw "#B", %%mm6 \n\t"\ 110cabdff1aSopenharmony_ci "psraw $3, "#B" \n\t"\ 111cabdff1aSopenharmony_ci "paddw %%mm7, %%mm6 \n\t"\ 112cabdff1aSopenharmony_ci "paddw "#B", %%mm6 \n\t"\ 113cabdff1aSopenharmony_ci "paddw "#E", "#E" \n\t"\ 114cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t"\ 115cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 116cabdff1aSopenharmony_ci "punpcklbw %%mm7, "#F" \n\t"\ 117cabdff1aSopenharmony_ci "psubw "#E", %%mm6 \n\t"\ 118cabdff1aSopenharmony_ci "psraw $1, "#E" \n\t"\ 119cabdff1aSopenharmony_ci "psubw "#F", %%mm6 \n\t"\ 120cabdff1aSopenharmony_ci "paddw "MANGLE(ADD)", %%mm6 \n\t"\ 121cabdff1aSopenharmony_ci "psraw $7, %%mm6 \n\t"\ 122cabdff1aSopenharmony_ci "packuswb %%mm6, %%mm6 \n\t"\ 123cabdff1aSopenharmony_ci OP(%%mm6, (%1), A, d) \ 124cabdff1aSopenharmony_ci "add %3, %1 \n\t" 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ci#define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ 128cabdff1aSopenharmony_ci int w= 2;\ 129cabdff1aSopenharmony_ci src -= 2*srcStride;\ 130cabdff1aSopenharmony_ci \ 131cabdff1aSopenharmony_ci while(w--){\ 132cabdff1aSopenharmony_ci __asm__ volatile(\ 133cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t"\ 134cabdff1aSopenharmony_ci "movd (%0), %%mm0 \n\t"\ 135cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 136cabdff1aSopenharmony_ci "movd (%0), %%mm1 \n\t"\ 137cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 138cabdff1aSopenharmony_ci "movd (%0), %%mm2 \n\t"\ 139cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 140cabdff1aSopenharmony_ci "movd (%0), %%mm3 \n\t"\ 141cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 142cabdff1aSopenharmony_ci "movd (%0), %%mm4 \n\t"\ 143cabdff1aSopenharmony_ci "add %2, %0 \n\t"\ 144cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm0 \n\t"\ 145cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm1 \n\t"\ 146cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm2 \n\t"\ 147cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm3 \n\t"\ 148cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm4 \n\t"\ 149cabdff1aSopenharmony_ci VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 150cabdff1aSopenharmony_ci VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 151cabdff1aSopenharmony_ci VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 152cabdff1aSopenharmony_ci VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 153cabdff1aSopenharmony_ci VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ 154cabdff1aSopenharmony_ci VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ 155cabdff1aSopenharmony_ci VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 156cabdff1aSopenharmony_ci VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 157cabdff1aSopenharmony_ci \ 158cabdff1aSopenharmony_ci : "+a"(src), "+c"(dst)\ 159cabdff1aSopenharmony_ci : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ 160cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ 161cabdff1aSopenharmony_ci : "memory"\ 162cabdff1aSopenharmony_ci );\ 163cabdff1aSopenharmony_ci if(h==16){\ 164cabdff1aSopenharmony_ci __asm__ volatile(\ 165cabdff1aSopenharmony_ci VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 166cabdff1aSopenharmony_ci VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 167cabdff1aSopenharmony_ci VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\ 168cabdff1aSopenharmony_ci VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\ 169cabdff1aSopenharmony_ci VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\ 170cabdff1aSopenharmony_ci VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\ 171cabdff1aSopenharmony_ci VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\ 172cabdff1aSopenharmony_ci VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\ 173cabdff1aSopenharmony_ci \ 174cabdff1aSopenharmony_ci : "+a"(src), "+c"(dst)\ 175cabdff1aSopenharmony_ci : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\ 176cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\ 177cabdff1aSopenharmony_ci : "memory"\ 178cabdff1aSopenharmony_ci );\ 179cabdff1aSopenharmony_ci }\ 180cabdff1aSopenharmony_ci src += 4-(h+5)*srcStride;\ 181cabdff1aSopenharmony_ci dst += 4-h*dstStride;\ 182cabdff1aSopenharmony_ci } 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci#define QPEL_CAVS(OPNAME, OP, MMX)\ 185cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 186cabdff1aSopenharmony_ci{\ 187cabdff1aSopenharmony_ci int h=8;\ 188cabdff1aSopenharmony_ci __asm__ volatile(\ 189cabdff1aSopenharmony_ci "pxor %%mm7, %%mm7 \n\t"\ 190cabdff1aSopenharmony_ci "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ 191cabdff1aSopenharmony_ci "1: \n\t"\ 192cabdff1aSopenharmony_ci "movq (%0), %%mm0 \n\t"\ 193cabdff1aSopenharmony_ci "movq 1(%0), %%mm2 \n\t"\ 194cabdff1aSopenharmony_ci "movq %%mm0, %%mm1 \n\t"\ 195cabdff1aSopenharmony_ci "movq %%mm2, %%mm3 \n\t"\ 196cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm0 \n\t"\ 197cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm1 \n\t"\ 198cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm2 \n\t"\ 199cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm3 \n\t"\ 200cabdff1aSopenharmony_ci "paddw %%mm2, %%mm0 \n\t"\ 201cabdff1aSopenharmony_ci "paddw %%mm3, %%mm1 \n\t"\ 202cabdff1aSopenharmony_ci "pmullw %%mm6, %%mm0 \n\t"\ 203cabdff1aSopenharmony_ci "pmullw %%mm6, %%mm1 \n\t"\ 204cabdff1aSopenharmony_ci "movq -1(%0), %%mm2 \n\t"\ 205cabdff1aSopenharmony_ci "movq 2(%0), %%mm4 \n\t"\ 206cabdff1aSopenharmony_ci "movq %%mm2, %%mm3 \n\t"\ 207cabdff1aSopenharmony_ci "movq %%mm4, %%mm5 \n\t"\ 208cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm2 \n\t"\ 209cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm3 \n\t"\ 210cabdff1aSopenharmony_ci "punpcklbw %%mm7, %%mm4 \n\t"\ 211cabdff1aSopenharmony_ci "punpckhbw %%mm7, %%mm5 \n\t"\ 212cabdff1aSopenharmony_ci "paddw %%mm4, %%mm2 \n\t"\ 213cabdff1aSopenharmony_ci "paddw %%mm3, %%mm5 \n\t"\ 214cabdff1aSopenharmony_ci "psubw %%mm2, %%mm0 \n\t"\ 215cabdff1aSopenharmony_ci "psubw %%mm5, %%mm1 \n\t"\ 216cabdff1aSopenharmony_ci "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\ 217cabdff1aSopenharmony_ci "paddw %%mm5, %%mm0 \n\t"\ 218cabdff1aSopenharmony_ci "paddw %%mm5, %%mm1 \n\t"\ 219cabdff1aSopenharmony_ci "psraw $3, %%mm0 \n\t"\ 220cabdff1aSopenharmony_ci "psraw $3, %%mm1 \n\t"\ 221cabdff1aSopenharmony_ci "packuswb %%mm1, %%mm0 \n\t"\ 222cabdff1aSopenharmony_ci OP(%%mm0, (%1),%%mm5, q) \ 223cabdff1aSopenharmony_ci "add %3, %0 \n\t"\ 224cabdff1aSopenharmony_ci "add %4, %1 \n\t"\ 225cabdff1aSopenharmony_ci "decl %2 \n\t"\ 226cabdff1aSopenharmony_ci " jnz 1b \n\t"\ 227cabdff1aSopenharmony_ci : "+a"(src), "+c"(dst), "+m"(h)\ 228cabdff1aSopenharmony_ci : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\ 229cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\ 230cabdff1aSopenharmony_ci : "memory"\ 231cabdff1aSopenharmony_ci );\ 232cabdff1aSopenharmony_ci}\ 233cabdff1aSopenharmony_ci\ 234cabdff1aSopenharmony_cistatic inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 235cabdff1aSopenharmony_ci{ \ 236cabdff1aSopenharmony_ci QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 237cabdff1aSopenharmony_ci}\ 238cabdff1aSopenharmony_ci\ 239cabdff1aSopenharmony_cistatic inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 240cabdff1aSopenharmony_ci{ \ 241cabdff1aSopenharmony_ci QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \ 242cabdff1aSopenharmony_ci}\ 243cabdff1aSopenharmony_ci\ 244cabdff1aSopenharmony_cistatic inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\ 245cabdff1aSopenharmony_ci{ \ 246cabdff1aSopenharmony_ci QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 247cabdff1aSopenharmony_ci}\ 248cabdff1aSopenharmony_ci\ 249cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 250cabdff1aSopenharmony_ci{ \ 251cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ 252cabdff1aSopenharmony_ci}\ 253cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 254cabdff1aSopenharmony_ci{ \ 255cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ 256cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 257cabdff1aSopenharmony_ci}\ 258cabdff1aSopenharmony_ci\ 259cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 260cabdff1aSopenharmony_ci{ \ 261cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ 262cabdff1aSopenharmony_ci}\ 263cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 264cabdff1aSopenharmony_ci{ \ 265cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ 266cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 267cabdff1aSopenharmony_ci}\ 268cabdff1aSopenharmony_ci\ 269cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 270cabdff1aSopenharmony_ci{ \ 271cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ 272cabdff1aSopenharmony_ci}\ 273cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 274cabdff1aSopenharmony_ci{ \ 275cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ 276cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 277cabdff1aSopenharmony_ci}\ 278cabdff1aSopenharmony_ci\ 279cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ 280cabdff1aSopenharmony_ci{ \ 281cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 282cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 283cabdff1aSopenharmony_ci src += 8*srcStride;\ 284cabdff1aSopenharmony_ci dst += 8*dstStride;\ 285cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 286cabdff1aSopenharmony_ci OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 287cabdff1aSopenharmony_ci}\ 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci#define CAVS_MC(OPNAME, SIZE, MMX) \ 290cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 291cabdff1aSopenharmony_ci{\ 292cabdff1aSopenharmony_ci OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ 293cabdff1aSopenharmony_ci}\ 294cabdff1aSopenharmony_ci\ 295cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 296cabdff1aSopenharmony_ci{\ 297cabdff1aSopenharmony_ci OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ 298cabdff1aSopenharmony_ci}\ 299cabdff1aSopenharmony_ci\ 300cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 301cabdff1aSopenharmony_ci{\ 302cabdff1aSopenharmony_ci OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ 303cabdff1aSopenharmony_ci}\ 304cabdff1aSopenharmony_ci\ 305cabdff1aSopenharmony_cistatic void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 306cabdff1aSopenharmony_ci{\ 307cabdff1aSopenharmony_ci OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ 308cabdff1aSopenharmony_ci}\ 309cabdff1aSopenharmony_ci 310cabdff1aSopenharmony_ci#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" 311cabdff1aSopenharmony_ci#define AVG_3DNOW_OP(a,b,temp, size) \ 312cabdff1aSopenharmony_ci"mov" #size " " #b ", " #temp " \n\t"\ 313cabdff1aSopenharmony_ci"pavgusb " #temp ", " #a " \n\t"\ 314cabdff1aSopenharmony_ci"mov" #size " " #a ", " #b " \n\t" 315cabdff1aSopenharmony_ci#define AVG_MMXEXT_OP(a, b, temp, size) \ 316cabdff1aSopenharmony_ci"mov" #size " " #b ", " #temp " \n\t"\ 317cabdff1aSopenharmony_ci"pavgb " #temp ", " #a " \n\t"\ 318cabdff1aSopenharmony_ci"mov" #size " " #a ", " #b " \n\t" 319cabdff1aSopenharmony_ci 320cabdff1aSopenharmony_ci#endif /* HAVE_MMXEXT_INLINE */ 321cabdff1aSopenharmony_ci 322cabdff1aSopenharmony_ci#if HAVE_MMX_EXTERNAL 323cabdff1aSopenharmony_cistatic void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, 324cabdff1aSopenharmony_ci ptrdiff_t stride) 325cabdff1aSopenharmony_ci{ 326cabdff1aSopenharmony_ci ff_put_pixels8_mmx(dst, src, stride, 8); 327cabdff1aSopenharmony_ci} 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_cistatic void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, 330cabdff1aSopenharmony_ci ptrdiff_t stride) 331cabdff1aSopenharmony_ci{ 332cabdff1aSopenharmony_ci ff_avg_pixels8_mmxext(dst, src, stride, 8); 333cabdff1aSopenharmony_ci} 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_cistatic void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, 336cabdff1aSopenharmony_ci ptrdiff_t stride) 337cabdff1aSopenharmony_ci{ 338cabdff1aSopenharmony_ci ff_put_pixels16_sse2(dst, src, stride, 16); 339cabdff1aSopenharmony_ci} 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_cistatic void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, 342cabdff1aSopenharmony_ci ptrdiff_t stride) 343cabdff1aSopenharmony_ci{ 344cabdff1aSopenharmony_ci ff_avg_pixels16_sse2(dst, src, stride, 16); 345cabdff1aSopenharmony_ci} 346cabdff1aSopenharmony_ci#endif 347cabdff1aSopenharmony_ci 348cabdff1aSopenharmony_cistatic av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, 349cabdff1aSopenharmony_ci AVCodecContext *avctx) 350cabdff1aSopenharmony_ci{ 351cabdff1aSopenharmony_ci#if HAVE_MMX_EXTERNAL 352cabdff1aSopenharmony_ci c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; 353cabdff1aSopenharmony_ci#endif /* HAVE_MMX_EXTERNAL */ 354cabdff1aSopenharmony_ci} 355cabdff1aSopenharmony_ci 356cabdff1aSopenharmony_ci#define DSPFUNC(PFX, IDX, NUM, EXT) \ 357cabdff1aSopenharmony_ci c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ 358cabdff1aSopenharmony_ci c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ 359cabdff1aSopenharmony_ci c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ 360cabdff1aSopenharmony_ci c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ 361cabdff1aSopenharmony_ci 362cabdff1aSopenharmony_ci#if HAVE_MMXEXT_INLINE 363cabdff1aSopenharmony_ciQPEL_CAVS(put_, PUT_OP, mmxext) 364cabdff1aSopenharmony_ciQPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) 365cabdff1aSopenharmony_ci 366cabdff1aSopenharmony_ciCAVS_MC(put_, 8, mmxext) 367cabdff1aSopenharmony_ciCAVS_MC(put_, 16, mmxext) 368cabdff1aSopenharmony_ciCAVS_MC(avg_, 8, mmxext) 369cabdff1aSopenharmony_ciCAVS_MC(avg_, 16, mmxext) 370cabdff1aSopenharmony_ci#endif /* HAVE_MMXEXT_INLINE */ 371cabdff1aSopenharmony_ci 372cabdff1aSopenharmony_ciav_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) 373cabdff1aSopenharmony_ci{ 374cabdff1aSopenharmony_ci av_unused int cpu_flags = av_get_cpu_flags(); 375cabdff1aSopenharmony_ci 376cabdff1aSopenharmony_ci if (X86_MMX(cpu_flags)) 377cabdff1aSopenharmony_ci cavsdsp_init_mmx(c, avctx); 378cabdff1aSopenharmony_ci 379cabdff1aSopenharmony_ci#if HAVE_MMXEXT_INLINE 380cabdff1aSopenharmony_ci if (INLINE_MMXEXT(cpu_flags)) { 381cabdff1aSopenharmony_ci DSPFUNC(put, 0, 16, mmxext); 382cabdff1aSopenharmony_ci DSPFUNC(put, 1, 8, mmxext); 383cabdff1aSopenharmony_ci DSPFUNC(avg, 0, 16, mmxext); 384cabdff1aSopenharmony_ci DSPFUNC(avg, 1, 8, mmxext); 385cabdff1aSopenharmony_ci } 386cabdff1aSopenharmony_ci#endif 387cabdff1aSopenharmony_ci#if HAVE_MMX_EXTERNAL 388cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 389cabdff1aSopenharmony_ci c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; 390cabdff1aSopenharmony_ci } 391cabdff1aSopenharmony_ci#endif 392cabdff1aSopenharmony_ci#if HAVE_SSE2_EXTERNAL 393cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 394cabdff1aSopenharmony_ci c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2; 395cabdff1aSopenharmony_ci c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2; 396cabdff1aSopenharmony_ci 397cabdff1aSopenharmony_ci c->cavs_idct8_add = cavs_idct8_add_sse2; 398cabdff1aSopenharmony_ci c->idct_perm = FF_IDCT_PERM_TRANSPOSE; 399cabdff1aSopenharmony_ci } 400cabdff1aSopenharmony_ci#endif 401cabdff1aSopenharmony_ci} 402