1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* Copyright (c) 2012 Michael Niedermayer
3cabdff1aSopenharmony_ci;*
4cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
5cabdff1aSopenharmony_ci;*
6cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci;*
11cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
15cabdff1aSopenharmony_ci;*
16cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci;******************************************************************************
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ciSECTION_RODATA 32
25cabdff1aSopenharmony_cidw1: times 8  dd 1
26cabdff1aSopenharmony_ciw1 : times 16 dw 1
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ciSECTION .text
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci%macro MIX2_FLT 1
31cabdff1aSopenharmony_cicglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
32cabdff1aSopenharmony_ci%ifidn %1, a
33cabdff1aSopenharmony_ci    test in1q, mmsize-1
34cabdff1aSopenharmony_ci        jne mix_2_1_float_u_int %+ SUFFIX
35cabdff1aSopenharmony_ci    test in2q, mmsize-1
36cabdff1aSopenharmony_ci        jne mix_2_1_float_u_int %+ SUFFIX
37cabdff1aSopenharmony_ci    test outq, mmsize-1
38cabdff1aSopenharmony_ci        jne mix_2_1_float_u_int %+ SUFFIX
39cabdff1aSopenharmony_ci%else
40cabdff1aSopenharmony_cimix_2_1_float_u_int %+ SUFFIX:
41cabdff1aSopenharmony_ci%endif
42cabdff1aSopenharmony_ci    VBROADCASTSS m4, [coeffpq + 4*index1q]
43cabdff1aSopenharmony_ci    VBROADCASTSS m5, [coeffpq + 4*index2q]
44cabdff1aSopenharmony_ci    shl lend    , 2
45cabdff1aSopenharmony_ci    add in1q    , lenq
46cabdff1aSopenharmony_ci    add in2q    , lenq
47cabdff1aSopenharmony_ci    add outq    , lenq
48cabdff1aSopenharmony_ci    neg lenq
49cabdff1aSopenharmony_ci.next:
50cabdff1aSopenharmony_ci%ifidn %1, a
51cabdff1aSopenharmony_ci    mulps        m0, m4, [in1q + lenq         ]
52cabdff1aSopenharmony_ci    mulps        m1, m5, [in2q + lenq         ]
53cabdff1aSopenharmony_ci    mulps        m2, m4, [in1q + lenq + mmsize]
54cabdff1aSopenharmony_ci    mulps        m3, m5, [in2q + lenq + mmsize]
55cabdff1aSopenharmony_ci%else
56cabdff1aSopenharmony_ci    movu         m0, [in1q + lenq         ]
57cabdff1aSopenharmony_ci    movu         m1, [in2q + lenq         ]
58cabdff1aSopenharmony_ci    movu         m2, [in1q + lenq + mmsize]
59cabdff1aSopenharmony_ci    movu         m3, [in2q + lenq + mmsize]
60cabdff1aSopenharmony_ci    mulps        m0, m0, m4
61cabdff1aSopenharmony_ci    mulps        m1, m1, m5
62cabdff1aSopenharmony_ci    mulps        m2, m2, m4
63cabdff1aSopenharmony_ci    mulps        m3, m3, m5
64cabdff1aSopenharmony_ci%endif
65cabdff1aSopenharmony_ci    addps        m0, m0, m1
66cabdff1aSopenharmony_ci    addps        m2, m2, m3
67cabdff1aSopenharmony_ci    mov%1  [outq + lenq         ], m0
68cabdff1aSopenharmony_ci    mov%1  [outq + lenq + mmsize], m2
69cabdff1aSopenharmony_ci    add        lenq, mmsize*2
70cabdff1aSopenharmony_ci        jl .next
71cabdff1aSopenharmony_ci    REP_RET
72cabdff1aSopenharmony_ci%endmacro
73cabdff1aSopenharmony_ci
74cabdff1aSopenharmony_ci%macro MIX1_FLT 1
75cabdff1aSopenharmony_cicglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
76cabdff1aSopenharmony_ci%ifidn %1, a
77cabdff1aSopenharmony_ci    test inq, mmsize-1
78cabdff1aSopenharmony_ci        jne mix_1_1_float_u_int %+ SUFFIX
79cabdff1aSopenharmony_ci    test outq, mmsize-1
80cabdff1aSopenharmony_ci        jne mix_1_1_float_u_int %+ SUFFIX
81cabdff1aSopenharmony_ci%else
82cabdff1aSopenharmony_cimix_1_1_float_u_int %+ SUFFIX:
83cabdff1aSopenharmony_ci%endif
84cabdff1aSopenharmony_ci    VBROADCASTSS m2, [coeffpq + 4*indexq]
85cabdff1aSopenharmony_ci    shl lenq    , 2
86cabdff1aSopenharmony_ci    add inq     , lenq
87cabdff1aSopenharmony_ci    add outq    , lenq
88cabdff1aSopenharmony_ci    neg lenq
89cabdff1aSopenharmony_ci.next:
90cabdff1aSopenharmony_ci%ifidn %1, a
91cabdff1aSopenharmony_ci    mulps        m0, m2, [inq + lenq         ]
92cabdff1aSopenharmony_ci    mulps        m1, m2, [inq + lenq + mmsize]
93cabdff1aSopenharmony_ci%else
94cabdff1aSopenharmony_ci    movu         m0, [inq + lenq         ]
95cabdff1aSopenharmony_ci    movu         m1, [inq + lenq + mmsize]
96cabdff1aSopenharmony_ci    mulps        m0, m0, m2
97cabdff1aSopenharmony_ci    mulps        m1, m1, m2
98cabdff1aSopenharmony_ci%endif
99cabdff1aSopenharmony_ci    mov%1  [outq + lenq         ], m0
100cabdff1aSopenharmony_ci    mov%1  [outq + lenq + mmsize], m1
101cabdff1aSopenharmony_ci    add        lenq, mmsize*2
102cabdff1aSopenharmony_ci        jl .next
103cabdff1aSopenharmony_ci    REP_RET
104cabdff1aSopenharmony_ci%endmacro
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci%macro MIX1_INT16 1
107cabdff1aSopenharmony_cicglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
108cabdff1aSopenharmony_ci%ifidn %1, a
109cabdff1aSopenharmony_ci    test inq, mmsize-1
110cabdff1aSopenharmony_ci        jne mix_1_1_int16_u_int %+ SUFFIX
111cabdff1aSopenharmony_ci    test outq, mmsize-1
112cabdff1aSopenharmony_ci        jne mix_1_1_int16_u_int %+ SUFFIX
113cabdff1aSopenharmony_ci%else
114cabdff1aSopenharmony_cimix_1_1_int16_u_int %+ SUFFIX:
115cabdff1aSopenharmony_ci%endif
116cabdff1aSopenharmony_ci    movd   m4, [coeffpq + 4*indexq]
117cabdff1aSopenharmony_ci    SPLATW m5, m4
118cabdff1aSopenharmony_ci    psllq  m4, 32
119cabdff1aSopenharmony_ci    psrlq  m4, 48
120cabdff1aSopenharmony_ci    mova   m0, [w1]
121cabdff1aSopenharmony_ci    psllw  m0, m4
122cabdff1aSopenharmony_ci    psrlw  m0, 1
123cabdff1aSopenharmony_ci    punpcklwd m5, m0
124cabdff1aSopenharmony_ci    add lenq    , lenq
125cabdff1aSopenharmony_ci    add inq     , lenq
126cabdff1aSopenharmony_ci    add outq    , lenq
127cabdff1aSopenharmony_ci    neg lenq
128cabdff1aSopenharmony_ci.next:
129cabdff1aSopenharmony_ci    mov%1        m0, [inq + lenq         ]
130cabdff1aSopenharmony_ci    mov%1        m2, [inq + lenq + mmsize]
131cabdff1aSopenharmony_ci    mova         m1, m0
132cabdff1aSopenharmony_ci    mova         m3, m2
133cabdff1aSopenharmony_ci    punpcklwd    m0, [w1]
134cabdff1aSopenharmony_ci    punpckhwd    m1, [w1]
135cabdff1aSopenharmony_ci    punpcklwd    m2, [w1]
136cabdff1aSopenharmony_ci    punpckhwd    m3, [w1]
137cabdff1aSopenharmony_ci    pmaddwd      m0, m5
138cabdff1aSopenharmony_ci    pmaddwd      m1, m5
139cabdff1aSopenharmony_ci    pmaddwd      m2, m5
140cabdff1aSopenharmony_ci    pmaddwd      m3, m5
141cabdff1aSopenharmony_ci    psrad        m0, m4
142cabdff1aSopenharmony_ci    psrad        m1, m4
143cabdff1aSopenharmony_ci    psrad        m2, m4
144cabdff1aSopenharmony_ci    psrad        m3, m4
145cabdff1aSopenharmony_ci    packssdw     m0, m1
146cabdff1aSopenharmony_ci    packssdw     m2, m3
147cabdff1aSopenharmony_ci    mov%1  [outq + lenq         ], m0
148cabdff1aSopenharmony_ci    mov%1  [outq + lenq + mmsize], m2
149cabdff1aSopenharmony_ci    add        lenq, mmsize*2
150cabdff1aSopenharmony_ci        jl .next
151cabdff1aSopenharmony_ci%if mmsize == 8
152cabdff1aSopenharmony_ci    emms
153cabdff1aSopenharmony_ci    RET
154cabdff1aSopenharmony_ci%else
155cabdff1aSopenharmony_ci    REP_RET
156cabdff1aSopenharmony_ci%endif
157cabdff1aSopenharmony_ci%endmacro
158cabdff1aSopenharmony_ci
159cabdff1aSopenharmony_ci%macro MIX2_INT16 1
160cabdff1aSopenharmony_cicglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
161cabdff1aSopenharmony_ci%ifidn %1, a
162cabdff1aSopenharmony_ci    test in1q, mmsize-1
163cabdff1aSopenharmony_ci        jne mix_2_1_int16_u_int %+ SUFFIX
164cabdff1aSopenharmony_ci    test in2q, mmsize-1
165cabdff1aSopenharmony_ci        jne mix_2_1_int16_u_int %+ SUFFIX
166cabdff1aSopenharmony_ci    test outq, mmsize-1
167cabdff1aSopenharmony_ci        jne mix_2_1_int16_u_int %+ SUFFIX
168cabdff1aSopenharmony_ci%else
169cabdff1aSopenharmony_cimix_2_1_int16_u_int %+ SUFFIX:
170cabdff1aSopenharmony_ci%endif
171cabdff1aSopenharmony_ci    movd   m4, [coeffpq + 4*index1q]
172cabdff1aSopenharmony_ci    movd   m6, [coeffpq + 4*index2q]
173cabdff1aSopenharmony_ci    SPLATW m5, m4
174cabdff1aSopenharmony_ci    SPLATW m6, m6
175cabdff1aSopenharmony_ci    psllq  m4, 32
176cabdff1aSopenharmony_ci    psrlq  m4, 48
177cabdff1aSopenharmony_ci    mova   m7, [dw1]
178cabdff1aSopenharmony_ci    pslld  m7, m4
179cabdff1aSopenharmony_ci    psrld  m7, 1
180cabdff1aSopenharmony_ci    punpcklwd m5, m6
181cabdff1aSopenharmony_ci    add lend    , lend
182cabdff1aSopenharmony_ci    add in1q    , lenq
183cabdff1aSopenharmony_ci    add in2q    , lenq
184cabdff1aSopenharmony_ci    add outq    , lenq
185cabdff1aSopenharmony_ci    neg lenq
186cabdff1aSopenharmony_ci.next:
187cabdff1aSopenharmony_ci    mov%1        m0, [in1q + lenq         ]
188cabdff1aSopenharmony_ci    mov%1        m2, [in2q + lenq         ]
189cabdff1aSopenharmony_ci    mova         m1, m0
190cabdff1aSopenharmony_ci    punpcklwd    m0, m2
191cabdff1aSopenharmony_ci    punpckhwd    m1, m2
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci    mov%1        m2, [in1q + lenq + mmsize]
194cabdff1aSopenharmony_ci    mov%1        m6, [in2q + lenq + mmsize]
195cabdff1aSopenharmony_ci    mova         m3, m2
196cabdff1aSopenharmony_ci    punpcklwd    m2, m6
197cabdff1aSopenharmony_ci    punpckhwd    m3, m6
198cabdff1aSopenharmony_ci
199cabdff1aSopenharmony_ci    pmaddwd      m0, m5
200cabdff1aSopenharmony_ci    pmaddwd      m1, m5
201cabdff1aSopenharmony_ci    pmaddwd      m2, m5
202cabdff1aSopenharmony_ci    pmaddwd      m3, m5
203cabdff1aSopenharmony_ci    paddd        m0, m7
204cabdff1aSopenharmony_ci    paddd        m1, m7
205cabdff1aSopenharmony_ci    paddd        m2, m7
206cabdff1aSopenharmony_ci    paddd        m3, m7
207cabdff1aSopenharmony_ci    psrad        m0, m4
208cabdff1aSopenharmony_ci    psrad        m1, m4
209cabdff1aSopenharmony_ci    psrad        m2, m4
210cabdff1aSopenharmony_ci    psrad        m3, m4
211cabdff1aSopenharmony_ci    packssdw     m0, m1
212cabdff1aSopenharmony_ci    packssdw     m2, m3
213cabdff1aSopenharmony_ci    mov%1  [outq + lenq         ], m0
214cabdff1aSopenharmony_ci    mov%1  [outq + lenq + mmsize], m2
215cabdff1aSopenharmony_ci    add        lenq, mmsize*2
216cabdff1aSopenharmony_ci        jl .next
217cabdff1aSopenharmony_ci%if mmsize == 8
218cabdff1aSopenharmony_ci    emms
219cabdff1aSopenharmony_ci    RET
220cabdff1aSopenharmony_ci%else
221cabdff1aSopenharmony_ci    REP_RET
222cabdff1aSopenharmony_ci%endif
223cabdff1aSopenharmony_ci%endmacro
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci
226cabdff1aSopenharmony_ciINIT_XMM sse
227cabdff1aSopenharmony_ciMIX2_FLT u
228cabdff1aSopenharmony_ciMIX2_FLT a
229cabdff1aSopenharmony_ciMIX1_FLT u
230cabdff1aSopenharmony_ciMIX1_FLT a
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ciINIT_XMM sse2
233cabdff1aSopenharmony_ciMIX1_INT16 u
234cabdff1aSopenharmony_ciMIX1_INT16 a
235cabdff1aSopenharmony_ciMIX2_INT16 u
236cabdff1aSopenharmony_ciMIX2_INT16 a
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL
239cabdff1aSopenharmony_ciINIT_YMM avx
240cabdff1aSopenharmony_ciMIX2_FLT u
241cabdff1aSopenharmony_ciMIX2_FLT a
242cabdff1aSopenharmony_ciMIX1_FLT u
243cabdff1aSopenharmony_ciMIX1_FLT a
244cabdff1aSopenharmony_ci%endif
245