1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_cifunction ff_h264_idct_add_neon, export=1
24cabdff1aSopenharmony_cih264_idct_add_neon_nothumb:
25cabdff1aSopenharmony_ci        vld1.64         {d0-d3},  [r1,:128]
26cabdff1aSopenharmony_ci        vmov.i16        q15, #0
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci        vswp            d1,  d2
29cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]!
30cabdff1aSopenharmony_ci        vadd.i16        d4,  d0,  d1
31cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]!
32cabdff1aSopenharmony_ci        vshr.s16        q8,  q1,  #1
33cabdff1aSopenharmony_ci        vsub.i16        d5,  d0,  d1
34cabdff1aSopenharmony_ci        vadd.i16        d6,  d2,  d17
35cabdff1aSopenharmony_ci        vsub.i16        d7,  d16, d3
36cabdff1aSopenharmony_ci        vadd.i16        q0,  q2,  q3
37cabdff1aSopenharmony_ci        vsub.i16        q1,  q2,  q3
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci        vtrn.16         d0,  d1
40cabdff1aSopenharmony_ci        vtrn.16         d3,  d2
41cabdff1aSopenharmony_ci        vtrn.32         d0,  d3
42cabdff1aSopenharmony_ci        vtrn.32         d1,  d2
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci        vadd.i16        d4,  d0,  d3
45cabdff1aSopenharmony_ci        vld1.32         {d18[0]}, [r0,:32], r2
46cabdff1aSopenharmony_ci        vswp            d1,  d3
47cabdff1aSopenharmony_ci        vshr.s16        q8,  q1,  #1
48cabdff1aSopenharmony_ci        vld1.32         {d19[1]}, [r0,:32], r2
49cabdff1aSopenharmony_ci        vsub.i16        d5,  d0,  d1
50cabdff1aSopenharmony_ci        vld1.32         {d18[1]}, [r0,:32], r2
51cabdff1aSopenharmony_ci        vadd.i16        d6,  d16, d3
52cabdff1aSopenharmony_ci        vld1.32         {d19[0]}, [r0,:32], r2
53cabdff1aSopenharmony_ci        vsub.i16        d7,  d2,  d17
54cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #2
55cabdff1aSopenharmony_ci        vadd.i16        q0,  q2,  q3
56cabdff1aSopenharmony_ci        vsub.i16        q1,  q2,  q3
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci        vrshr.s16       q0,  q0,  #6
59cabdff1aSopenharmony_ci        vrshr.s16       q1,  q1,  #6
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci        vaddw.u8        q0,  q0,  d18
62cabdff1aSopenharmony_ci        vaddw.u8        q1,  q1,  d19
63cabdff1aSopenharmony_ci
64cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q0
65cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q1
66cabdff1aSopenharmony_ci
67cabdff1aSopenharmony_ci        vst1.32         {d0[0]},  [r0,:32], r2
68cabdff1aSopenharmony_ci        vst1.32         {d1[1]},  [r0,:32], r2
69cabdff1aSopenharmony_ci        vst1.32         {d0[1]},  [r0,:32], r2
70cabdff1aSopenharmony_ci        vst1.32         {d1[0]},  [r0,:32], r2
71cabdff1aSopenharmony_ci
72cabdff1aSopenharmony_ci        sub             r1,  r1,  #32
73cabdff1aSopenharmony_ci        bx              lr
74cabdff1aSopenharmony_ciendfunc
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_cifunction ff_h264_idct_dc_add_neon, export=1
77cabdff1aSopenharmony_cih264_idct_dc_add_neon_nothumb:
78cabdff1aSopenharmony_ci        mov             r3,       #0
79cabdff1aSopenharmony_ci        vld1.16         {d2[],d3[]}, [r1,:16]
80cabdff1aSopenharmony_ci        strh            r3,       [r1]
81cabdff1aSopenharmony_ci        vrshr.s16       q1,  q1,  #6
82cabdff1aSopenharmony_ci        vld1.32         {d0[0]},  [r0,:32], r2
83cabdff1aSopenharmony_ci        vld1.32         {d0[1]},  [r0,:32], r2
84cabdff1aSopenharmony_ci        vaddw.u8        q2,  q1,  d0
85cabdff1aSopenharmony_ci        vld1.32         {d1[0]},  [r0,:32], r2
86cabdff1aSopenharmony_ci        vld1.32         {d1[1]},  [r0,:32], r2
87cabdff1aSopenharmony_ci        vaddw.u8        q1,  q1,  d1
88cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q2
89cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q1
90cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #2
91cabdff1aSopenharmony_ci        vst1.32         {d0[0]},  [r0,:32], r2
92cabdff1aSopenharmony_ci        vst1.32         {d0[1]},  [r0,:32], r2
93cabdff1aSopenharmony_ci        vst1.32         {d1[0]},  [r0,:32], r2
94cabdff1aSopenharmony_ci        vst1.32         {d1[1]},  [r0,:32], r2
95cabdff1aSopenharmony_ci        bx              lr
96cabdff1aSopenharmony_ciendfunc
97cabdff1aSopenharmony_ci
98cabdff1aSopenharmony_cifunction ff_h264_idct_add16_neon, export=1
99cabdff1aSopenharmony_ci        push            {r4-r8,lr}
100cabdff1aSopenharmony_ci        mov             r4,  r0
101cabdff1aSopenharmony_ci        mov             r5,  r1
102cabdff1aSopenharmony_ci        mov             r1,  r2
103cabdff1aSopenharmony_ci        mov             r2,  r3
104cabdff1aSopenharmony_ci        ldr             r6,  [sp, #24]
105cabdff1aSopenharmony_ci        movrel          r7,  scan8
106cabdff1aSopenharmony_ci        mov             ip,  #16
107cabdff1aSopenharmony_ci1:      ldrb            r8,  [r7], #1
108cabdff1aSopenharmony_ci        ldr             r0,  [r5], #4
109cabdff1aSopenharmony_ci        ldrb            r8,  [r6, r8]
110cabdff1aSopenharmony_ci        subs            r8,  r8,  #1
111cabdff1aSopenharmony_ci        blt             2f
112cabdff1aSopenharmony_ci        ldrsh           lr,  [r1]
113cabdff1aSopenharmony_ci        add             r0,  r0,  r4
114cabdff1aSopenharmony_ci        it              ne
115cabdff1aSopenharmony_ci        movne           lr,  #0
116cabdff1aSopenharmony_ci        cmp             lr,  #0
117cabdff1aSopenharmony_ci        ite             ne
118cabdff1aSopenharmony_ci        adrne           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
119cabdff1aSopenharmony_ci        adreq           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
120cabdff1aSopenharmony_ci        blx             lr
121cabdff1aSopenharmony_ci2:      subs            ip,  ip,  #1
122cabdff1aSopenharmony_ci        add             r1,  r1,  #32
123cabdff1aSopenharmony_ci        bne             1b
124cabdff1aSopenharmony_ci        pop             {r4-r8,pc}
125cabdff1aSopenharmony_ciendfunc
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_cifunction ff_h264_idct_add16intra_neon, export=1
128cabdff1aSopenharmony_ci        push            {r4-r8,lr}
129cabdff1aSopenharmony_ci        mov             r4,  r0
130cabdff1aSopenharmony_ci        mov             r5,  r1
131cabdff1aSopenharmony_ci        mov             r1,  r2
132cabdff1aSopenharmony_ci        mov             r2,  r3
133cabdff1aSopenharmony_ci        ldr             r6,  [sp, #24]
134cabdff1aSopenharmony_ci        movrel          r7,  scan8
135cabdff1aSopenharmony_ci        mov             ip,  #16
136cabdff1aSopenharmony_ci1:      ldrb            r8,  [r7], #1
137cabdff1aSopenharmony_ci        ldr             r0,  [r5], #4
138cabdff1aSopenharmony_ci        ldrb            r8,  [r6, r8]
139cabdff1aSopenharmony_ci        add             r0,  r0,  r4
140cabdff1aSopenharmony_ci        cmp             r8,  #0
141cabdff1aSopenharmony_ci        ldrsh           r8,  [r1]
142cabdff1aSopenharmony_ci        iteet           ne
143cabdff1aSopenharmony_ci        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
144cabdff1aSopenharmony_ci        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
145cabdff1aSopenharmony_ci        cmpeq           r8,  #0
146cabdff1aSopenharmony_ci        blxne           lr
147cabdff1aSopenharmony_ci        subs            ip,  ip,  #1
148cabdff1aSopenharmony_ci        add             r1,  r1,  #32
149cabdff1aSopenharmony_ci        bne             1b
150cabdff1aSopenharmony_ci        pop             {r4-r8,pc}
151cabdff1aSopenharmony_ciendfunc
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_cifunction ff_h264_idct_add8_neon, export=1
154cabdff1aSopenharmony_ci        push            {r4-r10,lr}
155cabdff1aSopenharmony_ci        ldm             r0,  {r4,r9}
156cabdff1aSopenharmony_ci        add             r5,  r1,  #16*4
157cabdff1aSopenharmony_ci        add             r1,  r2,  #16*32
158cabdff1aSopenharmony_ci        mov             r2,  r3
159cabdff1aSopenharmony_ci        mov             r10, r1
160cabdff1aSopenharmony_ci        ldr             r6,  [sp, #32]
161cabdff1aSopenharmony_ci        movrel          r7,  scan8+16
162cabdff1aSopenharmony_ci        mov             r12, #0
163cabdff1aSopenharmony_ci1:      ldrb            r8,  [r7, r12]
164cabdff1aSopenharmony_ci        ldr             r0,  [r5, r12, lsl #2]
165cabdff1aSopenharmony_ci        ldrb            r8,  [r6, r8]
166cabdff1aSopenharmony_ci        add             r0,  r0,  r4
167cabdff1aSopenharmony_ci        add             r1,  r10, r12, lsl #5
168cabdff1aSopenharmony_ci        cmp             r8,  #0
169cabdff1aSopenharmony_ci        ldrsh           r8,  [r1]
170cabdff1aSopenharmony_ci        iteet           ne
171cabdff1aSopenharmony_ci        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
172cabdff1aSopenharmony_ci        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
173cabdff1aSopenharmony_ci        cmpeq           r8,  #0
174cabdff1aSopenharmony_ci        blxne           lr
175cabdff1aSopenharmony_ci        add             r12, r12, #1
176cabdff1aSopenharmony_ci        cmp             r12, #4
177cabdff1aSopenharmony_ci        itt             eq
178cabdff1aSopenharmony_ci        moveq           r12, #16
179cabdff1aSopenharmony_ci        moveq           r4,  r9
180cabdff1aSopenharmony_ci        cmp             r12, #20
181cabdff1aSopenharmony_ci        blt             1b
182cabdff1aSopenharmony_ci        pop             {r4-r10,pc}
183cabdff1aSopenharmony_ciendfunc
184cabdff1aSopenharmony_ci
185cabdff1aSopenharmony_ci.macro  idct8x8_cols    pass
186cabdff1aSopenharmony_ci  .if \pass == 0
187cabdff1aSopenharmony_ci        qa      .req    q2
188cabdff1aSopenharmony_ci        qb      .req    q14
189cabdff1aSopenharmony_ci        vshr.s16        q2,  q10, #1
190cabdff1aSopenharmony_ci        vadd.i16        q0,  q8,  q12
191cabdff1aSopenharmony_ci        vld1.16         {q14-q15},[r1,:128]
192cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
193cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
194cabdff1aSopenharmony_ci        vsub.i16        q1,  q8,  q12
195cabdff1aSopenharmony_ci        vshr.s16        q3,  q14, #1
196cabdff1aSopenharmony_ci        vsub.i16        q2,  q2,  q14
197cabdff1aSopenharmony_ci        vadd.i16        q3,  q3,  q10
198cabdff1aSopenharmony_ci  .else
199cabdff1aSopenharmony_ci        qa      .req    q14
200cabdff1aSopenharmony_ci        qb      .req    q2
201cabdff1aSopenharmony_ci        vtrn.32         q8,  q10
202cabdff1aSopenharmony_ci        vtrn.16         q12, q13
203cabdff1aSopenharmony_ci        vtrn.32         q9,  q11
204cabdff1aSopenharmony_ci        vtrn.32         q12, q2
205cabdff1aSopenharmony_ci        vtrn.32         q13, q15
206cabdff1aSopenharmony_ci        vswp            d21, d4
207cabdff1aSopenharmony_ci        vshr.s16        q14, q10, #1
208cabdff1aSopenharmony_ci        vswp            d17, d24
209cabdff1aSopenharmony_ci        vshr.s16        q3,  q2,  #1
210cabdff1aSopenharmony_ci        vswp            d19, d26
211cabdff1aSopenharmony_ci        vadd.i16        q0,  q8,  q12
212cabdff1aSopenharmony_ci        vswp            d23, d30
213cabdff1aSopenharmony_ci        vsub.i16        q1,  q8,  q12
214cabdff1aSopenharmony_ci        vsub.i16        q14, q14, q2
215cabdff1aSopenharmony_ci        vadd.i16        q3,  q3,  q10
216cabdff1aSopenharmony_ci  .endif
217cabdff1aSopenharmony_ci        vadd.i16        q10, q1,  qa
218cabdff1aSopenharmony_ci        vsub.i16        q12, q1,  qa
219cabdff1aSopenharmony_ci        vadd.i16        q8,  q0,  q3
220cabdff1aSopenharmony_ci        vsub.i16        qb,  q0,  q3
221cabdff1aSopenharmony_ci        vsub.i16        q0,  q13, q11
222cabdff1aSopenharmony_ci        vadd.i16        q1,  q15, q9
223cabdff1aSopenharmony_ci        vsub.i16        qa,  q15, q9
224cabdff1aSopenharmony_ci        vadd.i16        q3,  q13, q11
225cabdff1aSopenharmony_ci        vsub.i16        q0,  q0,  q15
226cabdff1aSopenharmony_ci        vsub.i16        q1,  q1,  q11
227cabdff1aSopenharmony_ci        vadd.i16        qa,  qa,  q13
228cabdff1aSopenharmony_ci        vadd.i16        q3,  q3,  q9
229cabdff1aSopenharmony_ci        vshr.s16        q9,  q9,  #1
230cabdff1aSopenharmony_ci        vshr.s16        q11, q11, #1
231cabdff1aSopenharmony_ci        vshr.s16        q13, q13, #1
232cabdff1aSopenharmony_ci        vshr.s16        q15, q15, #1
233cabdff1aSopenharmony_ci        vsub.i16        q0,  q0,  q15
234cabdff1aSopenharmony_ci        vsub.i16        q1,  q1,  q11
235cabdff1aSopenharmony_ci        vadd.i16        qa,  qa,  q13
236cabdff1aSopenharmony_ci        vadd.i16        q3,  q3,  q9
237cabdff1aSopenharmony_ci        vshr.s16        q9,  q0,  #2
238cabdff1aSopenharmony_ci        vshr.s16        q11, q1,  #2
239cabdff1aSopenharmony_ci        vshr.s16        q13, qa,  #2
240cabdff1aSopenharmony_ci        vshr.s16        q15, q3,  #2
241cabdff1aSopenharmony_ci        vsub.i16        q3,  q3,  q9
242cabdff1aSopenharmony_ci        vsub.i16        qa,  q11, qa
243cabdff1aSopenharmony_ci        vadd.i16        q1,  q1,  q13
244cabdff1aSopenharmony_ci        vadd.i16        q0,  q0,  q15
245cabdff1aSopenharmony_ci  .if \pass == 0
246cabdff1aSopenharmony_ci        vsub.i16        q15, q8,  q3
247cabdff1aSopenharmony_ci        vadd.i16        q8,  q8,  q3
248cabdff1aSopenharmony_ci        vadd.i16        q9,  q10, q2
249cabdff1aSopenharmony_ci        vsub.i16        q2,  q10, q2
250cabdff1aSopenharmony_ci        vtrn.16         q8,  q9
251cabdff1aSopenharmony_ci        vadd.i16        q10, q12, q1
252cabdff1aSopenharmony_ci        vtrn.16         q2,  q15
253cabdff1aSopenharmony_ci        vadd.i16        q11, q14, q0
254cabdff1aSopenharmony_ci        vsub.i16        q13, q12, q1
255cabdff1aSopenharmony_ci        vtrn.16         q10, q11
256cabdff1aSopenharmony_ci        vsub.i16        q12, q14, q0
257cabdff1aSopenharmony_ci  .else
258cabdff1aSopenharmony_ci        vsub.i16        q15, q8,  q3
259cabdff1aSopenharmony_ci        vadd.i16        q8,  q8,  q3
260cabdff1aSopenharmony_ci        vadd.i16        q9,  q10, q14
261cabdff1aSopenharmony_ci        vsub.i16        q14, q10, q14
262cabdff1aSopenharmony_ci        vadd.i16        q10, q12, q1
263cabdff1aSopenharmony_ci        vsub.i16        q13, q12, q1
264cabdff1aSopenharmony_ci        vadd.i16        q11, q2, q0
265cabdff1aSopenharmony_ci        vsub.i16        q12, q2, q0
266cabdff1aSopenharmony_ci  .endif
267cabdff1aSopenharmony_ci        .unreq          qa
268cabdff1aSopenharmony_ci        .unreq          qb
269cabdff1aSopenharmony_ci.endm
270cabdff1aSopenharmony_ci
271cabdff1aSopenharmony_cifunction ff_h264_idct8_add_neon, export=1
272cabdff1aSopenharmony_cih264_idct8_add_neon_nothumb:
273cabdff1aSopenharmony_ci        vmov.i16        q3,       #0
274cabdff1aSopenharmony_ci        vld1.16         {q8-q9},  [r1,:128]
275cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
276cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
277cabdff1aSopenharmony_ci        vld1.16         {q10-q11},[r1,:128]
278cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
279cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
280cabdff1aSopenharmony_ci        vld1.16         {q12-q13},[r1,:128]
281cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
282cabdff1aSopenharmony_ci        vst1.16         {q3},     [r1,:128]!
283cabdff1aSopenharmony_ci
284cabdff1aSopenharmony_ci        idct8x8_cols    0
285cabdff1aSopenharmony_ci        idct8x8_cols    1
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_ci        mov             r3,  r0
288cabdff1aSopenharmony_ci        vrshr.s16       q8,  q8,  #6
289cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0,:64], r2
290cabdff1aSopenharmony_ci        vrshr.s16       q9,  q9,  #6
291cabdff1aSopenharmony_ci        vld1.8          {d1},     [r0,:64], r2
292cabdff1aSopenharmony_ci        vrshr.s16       q10, q10, #6
293cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r2
294cabdff1aSopenharmony_ci        vrshr.s16       q11, q11, #6
295cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0,:64], r2
296cabdff1aSopenharmony_ci        vrshr.s16       q12, q12, #6
297cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0,:64], r2
298cabdff1aSopenharmony_ci        vrshr.s16       q13, q13, #6
299cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64], r2
300cabdff1aSopenharmony_ci        vrshr.s16       q14, q14, #6
301cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0,:64], r2
302cabdff1aSopenharmony_ci        vrshr.s16       q15, q15, #6
303cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0,:64], r2
304cabdff1aSopenharmony_ci        vaddw.u8        q8,  q8,  d0
305cabdff1aSopenharmony_ci        vaddw.u8        q9,  q9,  d1
306cabdff1aSopenharmony_ci        vaddw.u8        q10, q10, d2
307cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q8
308cabdff1aSopenharmony_ci        vaddw.u8        q11, q11, d3
309cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q9
310cabdff1aSopenharmony_ci        vaddw.u8        q12, q12, d4
311cabdff1aSopenharmony_ci        vqmovun.s16     d2,  q10
312cabdff1aSopenharmony_ci        vst1.8          {d0},     [r3,:64], r2
313cabdff1aSopenharmony_ci        vaddw.u8        q13, q13, d5
314cabdff1aSopenharmony_ci        vqmovun.s16     d3,  q11
315cabdff1aSopenharmony_ci        vst1.8          {d1},     [r3,:64], r2
316cabdff1aSopenharmony_ci        vaddw.u8        q14, q14, d6
317cabdff1aSopenharmony_ci        vqmovun.s16     d4,  q12
318cabdff1aSopenharmony_ci        vst1.8          {d2},     [r3,:64], r2
319cabdff1aSopenharmony_ci        vaddw.u8        q15, q15, d7
320cabdff1aSopenharmony_ci        vqmovun.s16     d5,  q13
321cabdff1aSopenharmony_ci        vst1.8          {d3},     [r3,:64], r2
322cabdff1aSopenharmony_ci        vqmovun.s16     d6,  q14
323cabdff1aSopenharmony_ci        vqmovun.s16     d7,  q15
324cabdff1aSopenharmony_ci        vst1.8          {d4},     [r3,:64], r2
325cabdff1aSopenharmony_ci        vst1.8          {d5},     [r3,:64], r2
326cabdff1aSopenharmony_ci        vst1.8          {d6},     [r3,:64], r2
327cabdff1aSopenharmony_ci        vst1.8          {d7},     [r3,:64], r2
328cabdff1aSopenharmony_ci
329cabdff1aSopenharmony_ci        sub             r1,  r1,  #128
330cabdff1aSopenharmony_ci        bx              lr
331cabdff1aSopenharmony_ciendfunc
332cabdff1aSopenharmony_ci
333cabdff1aSopenharmony_cifunction ff_h264_idct8_dc_add_neon, export=1
334cabdff1aSopenharmony_cih264_idct8_dc_add_neon_nothumb:
335cabdff1aSopenharmony_ci        mov             r3,       #0
336cabdff1aSopenharmony_ci        vld1.16         {d30[],d31[]},[r1,:16]
337cabdff1aSopenharmony_ci        strh            r3,       [r1]
338cabdff1aSopenharmony_ci        vld1.32         {d0},     [r0,:64], r2
339cabdff1aSopenharmony_ci        vrshr.s16       q15, q15, #6
340cabdff1aSopenharmony_ci        vld1.32         {d1},     [r0,:64], r2
341cabdff1aSopenharmony_ci        vld1.32         {d2},     [r0,:64], r2
342cabdff1aSopenharmony_ci        vaddw.u8        q8,  q15, d0
343cabdff1aSopenharmony_ci        vld1.32         {d3},     [r0,:64], r2
344cabdff1aSopenharmony_ci        vaddw.u8        q9,  q15, d1
345cabdff1aSopenharmony_ci        vld1.32         {d4},     [r0,:64], r2
346cabdff1aSopenharmony_ci        vaddw.u8        q10, q15, d2
347cabdff1aSopenharmony_ci        vld1.32         {d5},     [r0,:64], r2
348cabdff1aSopenharmony_ci        vaddw.u8        q11, q15, d3
349cabdff1aSopenharmony_ci        vld1.32         {d6},     [r0,:64], r2
350cabdff1aSopenharmony_ci        vaddw.u8        q12, q15, d4
351cabdff1aSopenharmony_ci        vld1.32         {d7},     [r0,:64], r2
352cabdff1aSopenharmony_ci        vaddw.u8        q13, q15, d5
353cabdff1aSopenharmony_ci        vaddw.u8        q14, q15, d6
354cabdff1aSopenharmony_ci        vaddw.u8        q15, q15, d7
355cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q8
356cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q9
357cabdff1aSopenharmony_ci        vqmovun.s16     d2,  q10
358cabdff1aSopenharmony_ci        vqmovun.s16     d3,  q11
359cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #3
360cabdff1aSopenharmony_ci        vst1.32         {d0},     [r0,:64], r2
361cabdff1aSopenharmony_ci        vqmovun.s16     d4,  q12
362cabdff1aSopenharmony_ci        vst1.32         {d1},     [r0,:64], r2
363cabdff1aSopenharmony_ci        vqmovun.s16     d5,  q13
364cabdff1aSopenharmony_ci        vst1.32         {d2},     [r0,:64], r2
365cabdff1aSopenharmony_ci        vqmovun.s16     d6,  q14
366cabdff1aSopenharmony_ci        vst1.32         {d3},     [r0,:64], r2
367cabdff1aSopenharmony_ci        vqmovun.s16     d7,  q15
368cabdff1aSopenharmony_ci        vst1.32         {d4},     [r0,:64], r2
369cabdff1aSopenharmony_ci        vst1.32         {d5},     [r0,:64], r2
370cabdff1aSopenharmony_ci        vst1.32         {d6},     [r0,:64], r2
371cabdff1aSopenharmony_ci        vst1.32         {d7},     [r0,:64], r2
372cabdff1aSopenharmony_ci        bx              lr
373cabdff1aSopenharmony_ciendfunc
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_cifunction ff_h264_idct8_add4_neon, export=1
376cabdff1aSopenharmony_ci        push            {r4-r8,lr}
377cabdff1aSopenharmony_ci        mov             r4,  r0
378cabdff1aSopenharmony_ci        mov             r5,  r1
379cabdff1aSopenharmony_ci        mov             r1,  r2
380cabdff1aSopenharmony_ci        mov             r2,  r3
381cabdff1aSopenharmony_ci        ldr             r6,  [sp, #24]
382cabdff1aSopenharmony_ci        movrel          r7,  scan8
383cabdff1aSopenharmony_ci        mov             r12, #16
384cabdff1aSopenharmony_ci1:      ldrb            r8,  [r7], #4
385cabdff1aSopenharmony_ci        ldr             r0,  [r5], #16
386cabdff1aSopenharmony_ci        ldrb            r8,  [r6, r8]
387cabdff1aSopenharmony_ci        subs            r8,  r8,  #1
388cabdff1aSopenharmony_ci        blt             2f
389cabdff1aSopenharmony_ci        ldrsh           lr,  [r1]
390cabdff1aSopenharmony_ci        add             r0,  r0,  r4
391cabdff1aSopenharmony_ci        it              ne
392cabdff1aSopenharmony_ci        movne           lr,  #0
393cabdff1aSopenharmony_ci        cmp             lr,  #0
394cabdff1aSopenharmony_ci        ite             ne
395cabdff1aSopenharmony_ci        adrne           lr,  h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB
396cabdff1aSopenharmony_ci        adreq           lr,  h264_idct8_add_neon_nothumb    + CONFIG_THUMB
397cabdff1aSopenharmony_ci        blx             lr
398cabdff1aSopenharmony_ci2:      subs            r12, r12, #4
399cabdff1aSopenharmony_ci        add             r1,  r1,  #128
400cabdff1aSopenharmony_ci        bne             1b
401cabdff1aSopenharmony_ci        pop             {r4-r8,pc}
402cabdff1aSopenharmony_ciendfunc
403cabdff1aSopenharmony_ci
404cabdff1aSopenharmony_ciconst   scan8
405cabdff1aSopenharmony_ci        .byte           4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
406cabdff1aSopenharmony_ci        .byte           6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
407cabdff1aSopenharmony_ci        .byte           4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
408cabdff1aSopenharmony_ci        .byte           6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
409cabdff1aSopenharmony_ci        .byte           4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
410cabdff1aSopenharmony_ci        .byte           6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
411cabdff1aSopenharmony_ci        .byte           4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
412cabdff1aSopenharmony_ci        .byte           6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
413cabdff1aSopenharmony_ci        .byte           4+11*8, 5+11*8, 4+12*8, 5+12*8
414cabdff1aSopenharmony_ci        .byte           6+11*8, 7+11*8, 6+12*8, 7+12*8
415cabdff1aSopenharmony_ci        .byte           4+13*8, 5+13*8, 4+14*8, 5+14*8
416cabdff1aSopenharmony_ci        .byte           6+13*8, 7+13*8, 6+14*8, 7+14*8
417cabdff1aSopenharmony_ciendconst
418