1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * ARM NEON optimised RDFT
3cabdff1aSopenharmony_ci * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_cifunction ff_rdft_calc_neon, export=1
25cabdff1aSopenharmony_ci        push            {r4-r8,lr}
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci        ldr             r6,  [r0, #4]           @ inverse
28cabdff1aSopenharmony_ci        mov             r4,  r0
29cabdff1aSopenharmony_ci        mov             r5,  r1
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci        lsls            r6,  r6,  #31
32cabdff1aSopenharmony_ci        bne             1f
33cabdff1aSopenharmony_ci        add             r0,  r4,  #24
34cabdff1aSopenharmony_ci        bl              X(ff_fft_permute_neon)
35cabdff1aSopenharmony_ci        add             r0,  r4,  #24
36cabdff1aSopenharmony_ci        mov             r1,  r5
37cabdff1aSopenharmony_ci        bl              X(ff_fft_calc_neon)
38cabdff1aSopenharmony_ci1:
39cabdff1aSopenharmony_ci        ldr             r12, [r4, #0]           @ nbits
40cabdff1aSopenharmony_ci        mov             r2,  #1
41cabdff1aSopenharmony_ci        ldr             r8,  [r4, #20]          @ negative_sin
42cabdff1aSopenharmony_ci        lsl             r12, r2,  r12
43cabdff1aSopenharmony_ci        add             r0,  r5,  #8
44cabdff1aSopenharmony_ci        lsl             r8,  r8,  #31
45cabdff1aSopenharmony_ci        add             r1,  r5,  r12, lsl #2
46cabdff1aSopenharmony_ci        lsr             r12, r12, #2
47cabdff1aSopenharmony_ci        vdup.32         d26, r8
48cabdff1aSopenharmony_ci        ldr             r2,  [r4, #12]          @ tcos
49cabdff1aSopenharmony_ci        sub             r12, r12, #2
50cabdff1aSopenharmony_ci        ldr             r3,  [r4, #16]          @ tsin
51cabdff1aSopenharmony_ci        mov             r7,  r0
52cabdff1aSopenharmony_ci        sub             r1,  r1,  #8
53cabdff1aSopenharmony_ci        mov             lr,  r1
54cabdff1aSopenharmony_ci        mov             r8,  #-8
55cabdff1aSopenharmony_ci        vld1.32         {d0},     [r0,:64]!     @ d1[0,1]
56cabdff1aSopenharmony_ci        vld1.32         {d1},     [r1,:64], r8  @ d2[0,1]
57cabdff1aSopenharmony_ci        vld1.32         {d4},     [r2,:64]!     @ tcos[i]
58cabdff1aSopenharmony_ci        vld1.32         {d5},     [r3,:64]!     @ tsin[i]
59cabdff1aSopenharmony_ci        vmov.f32        d18, #0.5               @ k1
60cabdff1aSopenharmony_ci        vdup.32         d19, r6
61cabdff1aSopenharmony_ci        veor            d5,  d26, d5
62cabdff1aSopenharmony_ci        pld             [r0, #32]
63cabdff1aSopenharmony_ci        veor            d19, d18, d19           @ k2
64cabdff1aSopenharmony_ci        vmov.i32        d16, #0
65cabdff1aSopenharmony_ci        vmov.i32        d17, #1<<31
66cabdff1aSopenharmony_ci        pld             [r1, #-32]
67cabdff1aSopenharmony_ci        vtrn.32         d16, d17
68cabdff1aSopenharmony_ci        pld             [r2, #32]
69cabdff1aSopenharmony_ci        vrev64.32       d16, d16                @ d16=1,0 d17=0,1
70cabdff1aSopenharmony_ci        pld             [r3, #32]
71cabdff1aSopenharmony_ci2:
72cabdff1aSopenharmony_ci        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
73cabdff1aSopenharmony_ci        vld1.32         {d24},    [r0,:64]!     @  d1[0,1]
74cabdff1aSopenharmony_ci        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
75cabdff1aSopenharmony_ci        vld1.32         {d25},    [r1,:64], r8  @  d2[0,1]
76cabdff1aSopenharmony_ci        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
77cabdff1aSopenharmony_ci        veor            q3,  q12, q8            @ -d1[0],d1[1], d2[0],-d2[1]
78cabdff1aSopenharmony_ci        pld             [r0, #32]
79cabdff1aSopenharmony_ci        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
80cabdff1aSopenharmony_ci        pld             [r1, #-32]
81cabdff1aSopenharmony_ci        vadd.f32        d0,  d24, d7            @  d1[0]+d2[0], d1[1]-d2[1]
82cabdff1aSopenharmony_ci        vadd.f32        d1,  d6,  d25           @ -d1[0]+d2[0], d1[1]+d2[1]
83cabdff1aSopenharmony_ci        vmul.f32        q11, q0,  q9            @  ev.re, ev.im, od.im, od.re
84cabdff1aSopenharmony_ci        veor            d7,  d21, d16           @ -od.im, od.re
85cabdff1aSopenharmony_ci        vrev64.32       d3,  d21                @  od.re, od.im
86cabdff1aSopenharmony_ci        veor            d6,  d20, d17           @  ev.re,-ev.im
87cabdff1aSopenharmony_ci        veor            d2,  d3,  d16           @ -od.re, od.im
88cabdff1aSopenharmony_ci        vmla.f32        d20, d3,  d4[1]
89cabdff1aSopenharmony_ci        vmla.f32        d20, d7,  d5[1]
90cabdff1aSopenharmony_ci        vmla.f32        d6,  d2,  d4[1]
91cabdff1aSopenharmony_ci        vmla.f32        d6,  d21, d5[1]
92cabdff1aSopenharmony_ci        vld1.32         {d4},     [r2,:64]!     @  tcos[i]
93cabdff1aSopenharmony_ci        veor            d7,  d23, d16           @ -od.im, od.re
94cabdff1aSopenharmony_ci        vld1.32         {d5},     [r3,:64]!     @  tsin[i]
95cabdff1aSopenharmony_ci        veor            d24, d22, d17           @  ev.re,-ev.im
96cabdff1aSopenharmony_ci        vrev64.32       d3,  d23                @  od.re, od.im
97cabdff1aSopenharmony_ci        veor            d5, d26, d5
98cabdff1aSopenharmony_ci        pld             [r2, #32]
99cabdff1aSopenharmony_ci        veor            d2,  d3,  d16           @ -od.re, od.im
100cabdff1aSopenharmony_ci        pld             [r3, #32]
101cabdff1aSopenharmony_ci        vmla.f32        d22, d3,  d4[0]
102cabdff1aSopenharmony_ci        vmla.f32        d22, d7,  d5[0]
103cabdff1aSopenharmony_ci        vmla.f32        d24, d2,  d4[0]
104cabdff1aSopenharmony_ci        vmla.f32        d24, d23, d5[0]
105cabdff1aSopenharmony_ci        vld1.32         {d0},     [r0,:64]!     @  d1[0,1]
106cabdff1aSopenharmony_ci        vld1.32         {d1},     [r1,:64], r8  @  d2[0,1]
107cabdff1aSopenharmony_ci        vst1.32         {d20},    [r7,:64]!
108cabdff1aSopenharmony_ci        vst1.32         {d6},     [lr,:64], r8
109cabdff1aSopenharmony_ci        vst1.32         {d22},    [r7,:64]!
110cabdff1aSopenharmony_ci        vst1.32         {d24},    [lr,:64], r8
111cabdff1aSopenharmony_ci        subs            r12, r12, #2
112cabdff1aSopenharmony_ci        bgt             2b
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
115cabdff1aSopenharmony_ci        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
116cabdff1aSopenharmony_ci        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
117cabdff1aSopenharmony_ci        ldr             r2,  [r4, #8]           @  sign_convention
118cabdff1aSopenharmony_ci        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
119cabdff1aSopenharmony_ci        add             r0,  r0,  #4
120cabdff1aSopenharmony_ci        bfc             r2,  #0,  #31
121cabdff1aSopenharmony_ci        vld1.32         {d0[0]},  [r0,:32]
122cabdff1aSopenharmony_ci        veor            d7,  d21, d16           @ -od.im, od.re
123cabdff1aSopenharmony_ci        vrev64.32       d3,  d21                @  od.re, od.im
124cabdff1aSopenharmony_ci        veor            d6,  d20, d17           @  ev.re,-ev.im
125cabdff1aSopenharmony_ci        vld1.32         {d22},    [r5,:64]
126cabdff1aSopenharmony_ci        vdup.32         d1,  r2
127cabdff1aSopenharmony_ci        vmov            d23, d22
128cabdff1aSopenharmony_ci        veor            d2,  d3,  d16           @ -od.re, od.im
129cabdff1aSopenharmony_ci        vtrn.32         d22, d23
130cabdff1aSopenharmony_ci        veor            d0,  d0,  d1
131cabdff1aSopenharmony_ci        veor            d23, d23, d17
132cabdff1aSopenharmony_ci        vmla.f32        d20, d3,  d4[1]
133cabdff1aSopenharmony_ci        vmla.f32        d20, d7,  d5[1]
134cabdff1aSopenharmony_ci        vmla.f32        d6,  d2,  d4[1]
135cabdff1aSopenharmony_ci        vmla.f32        d6,  d21, d5[1]
136cabdff1aSopenharmony_ci        vadd.f32        d22, d22, d23
137cabdff1aSopenharmony_ci        vst1.32         {d20},    [r7,:64]
138cabdff1aSopenharmony_ci        vst1.32         {d6},     [lr,:64]
139cabdff1aSopenharmony_ci        vst1.32         {d0[0]},  [r0,:32]
140cabdff1aSopenharmony_ci        vst1.32         {d22},    [r5,:64]
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci        cmp             r6,  #0
143cabdff1aSopenharmony_ci        it              eq
144cabdff1aSopenharmony_ci        popeq           {r4-r8,pc}
145cabdff1aSopenharmony_ci
146cabdff1aSopenharmony_ci        vmul.f32        d22, d22, d18
147cabdff1aSopenharmony_ci        vst1.32         {d22},    [r5,:64]
148cabdff1aSopenharmony_ci        add             r0,  r4,  #24
149cabdff1aSopenharmony_ci        mov             r1,  r5
150cabdff1aSopenharmony_ci        bl              X(ff_fft_permute_neon)
151cabdff1aSopenharmony_ci        add             r0,  r4,  #24
152cabdff1aSopenharmony_ci        mov             r1,  r5
153cabdff1aSopenharmony_ci        pop             {r4-r8,lr}
154cabdff1aSopenharmony_ci        b               X(ff_fft_calc_neon)
155cabdff1aSopenharmony_ciendfunc
156