1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* Copyright (c) 2012 Michael Niedermayer 3cabdff1aSopenharmony_ci;* 4cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 5cabdff1aSopenharmony_ci;* 6cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci;* 11cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 15cabdff1aSopenharmony_ci;* 16cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci;****************************************************************************** 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ciSECTION_RODATA 32 25cabdff1aSopenharmony_cidw1: times 8 dd 1 26cabdff1aSopenharmony_ciw1 : times 16 dw 1 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ciSECTION .text 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci%macro MIX2_FLT 1 31cabdff1aSopenharmony_cicglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len 32cabdff1aSopenharmony_ci%ifidn %1, a 33cabdff1aSopenharmony_ci test in1q, mmsize-1 34cabdff1aSopenharmony_ci jne mix_2_1_float_u_int %+ SUFFIX 35cabdff1aSopenharmony_ci test in2q, mmsize-1 36cabdff1aSopenharmony_ci jne mix_2_1_float_u_int %+ SUFFIX 37cabdff1aSopenharmony_ci test outq, mmsize-1 38cabdff1aSopenharmony_ci jne mix_2_1_float_u_int %+ SUFFIX 39cabdff1aSopenharmony_ci%else 40cabdff1aSopenharmony_cimix_2_1_float_u_int %+ SUFFIX: 41cabdff1aSopenharmony_ci%endif 42cabdff1aSopenharmony_ci VBROADCASTSS m4, [coeffpq + 4*index1q] 43cabdff1aSopenharmony_ci VBROADCASTSS m5, [coeffpq + 4*index2q] 44cabdff1aSopenharmony_ci shl lend , 2 45cabdff1aSopenharmony_ci add in1q , lenq 46cabdff1aSopenharmony_ci add in2q , lenq 47cabdff1aSopenharmony_ci add outq , lenq 48cabdff1aSopenharmony_ci neg lenq 49cabdff1aSopenharmony_ci.next: 50cabdff1aSopenharmony_ci%ifidn %1, a 51cabdff1aSopenharmony_ci mulps m0, m4, [in1q + lenq ] 52cabdff1aSopenharmony_ci mulps m1, m5, [in2q + lenq ] 53cabdff1aSopenharmony_ci mulps m2, m4, [in1q + lenq + mmsize] 54cabdff1aSopenharmony_ci mulps m3, m5, [in2q + lenq + mmsize] 55cabdff1aSopenharmony_ci%else 56cabdff1aSopenharmony_ci movu m0, [in1q + lenq ] 57cabdff1aSopenharmony_ci movu m1, [in2q + lenq ] 58cabdff1aSopenharmony_ci movu m2, [in1q + lenq + mmsize] 59cabdff1aSopenharmony_ci movu m3, [in2q + lenq + mmsize] 60cabdff1aSopenharmony_ci mulps m0, m0, m4 61cabdff1aSopenharmony_ci mulps m1, m1, m5 62cabdff1aSopenharmony_ci mulps m2, m2, m4 63cabdff1aSopenharmony_ci mulps m3, m3, m5 64cabdff1aSopenharmony_ci%endif 65cabdff1aSopenharmony_ci addps m0, m0, m1 66cabdff1aSopenharmony_ci addps m2, m2, m3 67cabdff1aSopenharmony_ci mov%1 [outq + lenq ], m0 68cabdff1aSopenharmony_ci mov%1 [outq + lenq + mmsize], m2 69cabdff1aSopenharmony_ci add lenq, mmsize*2 70cabdff1aSopenharmony_ci jl .next 71cabdff1aSopenharmony_ci REP_RET 72cabdff1aSopenharmony_ci%endmacro 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci%macro MIX1_FLT 1 75cabdff1aSopenharmony_cicglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len 76cabdff1aSopenharmony_ci%ifidn %1, a 77cabdff1aSopenharmony_ci test inq, mmsize-1 78cabdff1aSopenharmony_ci jne mix_1_1_float_u_int %+ SUFFIX 79cabdff1aSopenharmony_ci test outq, mmsize-1 80cabdff1aSopenharmony_ci jne mix_1_1_float_u_int %+ SUFFIX 81cabdff1aSopenharmony_ci%else 82cabdff1aSopenharmony_cimix_1_1_float_u_int %+ SUFFIX: 83cabdff1aSopenharmony_ci%endif 84cabdff1aSopenharmony_ci VBROADCASTSS m2, [coeffpq + 4*indexq] 85cabdff1aSopenharmony_ci shl lenq , 2 86cabdff1aSopenharmony_ci add inq , lenq 87cabdff1aSopenharmony_ci add outq , lenq 88cabdff1aSopenharmony_ci neg lenq 89cabdff1aSopenharmony_ci.next: 90cabdff1aSopenharmony_ci%ifidn %1, a 91cabdff1aSopenharmony_ci mulps m0, m2, [inq + lenq ] 92cabdff1aSopenharmony_ci mulps m1, m2, [inq + lenq + mmsize] 93cabdff1aSopenharmony_ci%else 94cabdff1aSopenharmony_ci movu m0, [inq + lenq ] 95cabdff1aSopenharmony_ci movu m1, [inq + lenq + mmsize] 96cabdff1aSopenharmony_ci mulps m0, m0, m2 97cabdff1aSopenharmony_ci mulps m1, m1, m2 98cabdff1aSopenharmony_ci%endif 99cabdff1aSopenharmony_ci mov%1 [outq + lenq ], m0 100cabdff1aSopenharmony_ci mov%1 [outq + lenq + mmsize], m1 101cabdff1aSopenharmony_ci add lenq, mmsize*2 102cabdff1aSopenharmony_ci jl .next 103cabdff1aSopenharmony_ci REP_RET 104cabdff1aSopenharmony_ci%endmacro 105cabdff1aSopenharmony_ci 106cabdff1aSopenharmony_ci%macro MIX1_INT16 1 107cabdff1aSopenharmony_cicglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len 108cabdff1aSopenharmony_ci%ifidn %1, a 109cabdff1aSopenharmony_ci test inq, mmsize-1 110cabdff1aSopenharmony_ci jne mix_1_1_int16_u_int %+ SUFFIX 111cabdff1aSopenharmony_ci test outq, mmsize-1 112cabdff1aSopenharmony_ci jne mix_1_1_int16_u_int %+ SUFFIX 113cabdff1aSopenharmony_ci%else 114cabdff1aSopenharmony_cimix_1_1_int16_u_int %+ SUFFIX: 115cabdff1aSopenharmony_ci%endif 116cabdff1aSopenharmony_ci movd m4, [coeffpq + 4*indexq] 117cabdff1aSopenharmony_ci SPLATW m5, m4 118cabdff1aSopenharmony_ci psllq m4, 32 119cabdff1aSopenharmony_ci psrlq m4, 48 120cabdff1aSopenharmony_ci mova m0, [w1] 121cabdff1aSopenharmony_ci psllw m0, m4 122cabdff1aSopenharmony_ci psrlw m0, 1 123cabdff1aSopenharmony_ci punpcklwd m5, m0 124cabdff1aSopenharmony_ci add lenq , lenq 125cabdff1aSopenharmony_ci add inq , lenq 126cabdff1aSopenharmony_ci add outq , lenq 127cabdff1aSopenharmony_ci neg lenq 128cabdff1aSopenharmony_ci.next: 129cabdff1aSopenharmony_ci mov%1 m0, [inq + lenq ] 130cabdff1aSopenharmony_ci mov%1 m2, [inq + lenq + mmsize] 131cabdff1aSopenharmony_ci mova m1, m0 132cabdff1aSopenharmony_ci mova m3, m2 133cabdff1aSopenharmony_ci punpcklwd m0, [w1] 134cabdff1aSopenharmony_ci punpckhwd m1, [w1] 135cabdff1aSopenharmony_ci punpcklwd m2, [w1] 136cabdff1aSopenharmony_ci punpckhwd m3, [w1] 137cabdff1aSopenharmony_ci pmaddwd m0, m5 138cabdff1aSopenharmony_ci pmaddwd m1, m5 139cabdff1aSopenharmony_ci pmaddwd m2, m5 140cabdff1aSopenharmony_ci pmaddwd m3, m5 141cabdff1aSopenharmony_ci psrad m0, m4 142cabdff1aSopenharmony_ci psrad m1, m4 143cabdff1aSopenharmony_ci psrad m2, m4 144cabdff1aSopenharmony_ci psrad m3, m4 145cabdff1aSopenharmony_ci packssdw m0, m1 146cabdff1aSopenharmony_ci packssdw m2, m3 147cabdff1aSopenharmony_ci mov%1 [outq + lenq ], m0 148cabdff1aSopenharmony_ci mov%1 [outq + lenq + mmsize], m2 149cabdff1aSopenharmony_ci add lenq, mmsize*2 150cabdff1aSopenharmony_ci jl .next 151cabdff1aSopenharmony_ci%if mmsize == 8 152cabdff1aSopenharmony_ci emms 153cabdff1aSopenharmony_ci RET 154cabdff1aSopenharmony_ci%else 155cabdff1aSopenharmony_ci REP_RET 156cabdff1aSopenharmony_ci%endif 157cabdff1aSopenharmony_ci%endmacro 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ci%macro MIX2_INT16 1 160cabdff1aSopenharmony_cicglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len 161cabdff1aSopenharmony_ci%ifidn %1, a 162cabdff1aSopenharmony_ci test in1q, mmsize-1 163cabdff1aSopenharmony_ci jne mix_2_1_int16_u_int %+ SUFFIX 164cabdff1aSopenharmony_ci test in2q, mmsize-1 165cabdff1aSopenharmony_ci jne mix_2_1_int16_u_int %+ SUFFIX 166cabdff1aSopenharmony_ci test outq, mmsize-1 167cabdff1aSopenharmony_ci jne mix_2_1_int16_u_int %+ SUFFIX 168cabdff1aSopenharmony_ci%else 169cabdff1aSopenharmony_cimix_2_1_int16_u_int %+ SUFFIX: 170cabdff1aSopenharmony_ci%endif 171cabdff1aSopenharmony_ci movd m4, [coeffpq + 4*index1q] 172cabdff1aSopenharmony_ci movd m6, [coeffpq + 4*index2q] 173cabdff1aSopenharmony_ci SPLATW m5, m4 174cabdff1aSopenharmony_ci SPLATW m6, m6 175cabdff1aSopenharmony_ci psllq m4, 32 176cabdff1aSopenharmony_ci psrlq m4, 48 177cabdff1aSopenharmony_ci mova m7, [dw1] 178cabdff1aSopenharmony_ci pslld m7, m4 179cabdff1aSopenharmony_ci psrld m7, 1 180cabdff1aSopenharmony_ci punpcklwd m5, m6 181cabdff1aSopenharmony_ci add lend , lend 182cabdff1aSopenharmony_ci add in1q , lenq 183cabdff1aSopenharmony_ci add in2q , lenq 184cabdff1aSopenharmony_ci add outq , lenq 185cabdff1aSopenharmony_ci neg lenq 186cabdff1aSopenharmony_ci.next: 187cabdff1aSopenharmony_ci mov%1 m0, [in1q + lenq ] 188cabdff1aSopenharmony_ci mov%1 m2, [in2q + lenq ] 189cabdff1aSopenharmony_ci mova m1, m0 190cabdff1aSopenharmony_ci punpcklwd m0, m2 191cabdff1aSopenharmony_ci punpckhwd m1, m2 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci mov%1 m2, [in1q + lenq + mmsize] 194cabdff1aSopenharmony_ci mov%1 m6, [in2q + lenq + mmsize] 195cabdff1aSopenharmony_ci mova m3, m2 196cabdff1aSopenharmony_ci punpcklwd m2, m6 197cabdff1aSopenharmony_ci punpckhwd m3, m6 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci pmaddwd m0, m5 200cabdff1aSopenharmony_ci pmaddwd m1, m5 201cabdff1aSopenharmony_ci pmaddwd m2, m5 202cabdff1aSopenharmony_ci pmaddwd m3, m5 203cabdff1aSopenharmony_ci paddd m0, m7 204cabdff1aSopenharmony_ci paddd m1, m7 205cabdff1aSopenharmony_ci paddd m2, m7 206cabdff1aSopenharmony_ci paddd m3, m7 207cabdff1aSopenharmony_ci psrad m0, m4 208cabdff1aSopenharmony_ci psrad m1, m4 209cabdff1aSopenharmony_ci psrad m2, m4 210cabdff1aSopenharmony_ci psrad m3, m4 211cabdff1aSopenharmony_ci packssdw m0, m1 212cabdff1aSopenharmony_ci packssdw m2, m3 213cabdff1aSopenharmony_ci mov%1 [outq + lenq ], m0 214cabdff1aSopenharmony_ci mov%1 [outq + lenq + mmsize], m2 215cabdff1aSopenharmony_ci add lenq, mmsize*2 216cabdff1aSopenharmony_ci jl .next 217cabdff1aSopenharmony_ci%if mmsize == 8 218cabdff1aSopenharmony_ci emms 219cabdff1aSopenharmony_ci RET 220cabdff1aSopenharmony_ci%else 221cabdff1aSopenharmony_ci REP_RET 222cabdff1aSopenharmony_ci%endif 223cabdff1aSopenharmony_ci%endmacro 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci 226cabdff1aSopenharmony_ciINIT_XMM sse 227cabdff1aSopenharmony_ciMIX2_FLT u 228cabdff1aSopenharmony_ciMIX2_FLT a 229cabdff1aSopenharmony_ciMIX1_FLT u 230cabdff1aSopenharmony_ciMIX1_FLT a 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ciINIT_XMM sse2 233cabdff1aSopenharmony_ciMIX1_INT16 u 234cabdff1aSopenharmony_ciMIX1_INT16 a 235cabdff1aSopenharmony_ciMIX2_INT16 u 236cabdff1aSopenharmony_ciMIX2_INT16 a 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 239cabdff1aSopenharmony_ciINIT_YMM avx 240cabdff1aSopenharmony_ciMIX2_FLT u 241cabdff1aSopenharmony_ciMIX2_FLT a 242cabdff1aSopenharmony_ciMIX1_FLT u 243cabdff1aSopenharmony_ciMIX1_FLT a 244cabdff1aSopenharmony_ci%endif 245