1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci   C-like prototype :
3cabdff1aSopenharmony_ci        void j_rev_dct_arm(DCTBLOCK data)
4cabdff1aSopenharmony_ci
5cabdff1aSopenharmony_ci   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6cabdff1aSopenharmony_ci
7cabdff1aSopenharmony_ci   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
8cabdff1aSopenharmony_ci
9cabdff1aSopenharmony_ci   Permission is hereby granted, free of charge, to any person obtaining a copy
10cabdff1aSopenharmony_ci   of this software and associated documentation files (the "Software"), to deal
11cabdff1aSopenharmony_ci   in the Software without restriction, including without limitation the rights
12cabdff1aSopenharmony_ci   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13cabdff1aSopenharmony_ci   copies of the Software, and to permit persons to whom the Software is
14cabdff1aSopenharmony_ci   furnished to do so, subject to the following conditions:
15cabdff1aSopenharmony_ci
16cabdff1aSopenharmony_ci   The above copyright notice and this permission notice shall be included in
17cabdff1aSopenharmony_ci   all copies or substantial portions of the Software.
18cabdff1aSopenharmony_ci
19cabdff1aSopenharmony_ci   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20cabdff1aSopenharmony_ci   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21cabdff1aSopenharmony_ci   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22cabdff1aSopenharmony_ci   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23cabdff1aSopenharmony_ci   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24cabdff1aSopenharmony_ci   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci*/
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci#define FIX_0_298631336 2446
31cabdff1aSopenharmony_ci#define FIX_0_541196100 4433
32cabdff1aSopenharmony_ci#define FIX_0_765366865 6270
33cabdff1aSopenharmony_ci#define FIX_1_175875602 9633
34cabdff1aSopenharmony_ci#define FIX_1_501321110 12299
35cabdff1aSopenharmony_ci#define FIX_2_053119869 16819
36cabdff1aSopenharmony_ci#define FIX_3_072711026 25172
37cabdff1aSopenharmony_ci#define FIX_M_0_390180644 -3196
38cabdff1aSopenharmony_ci#define FIX_M_0_899976223 -7373
39cabdff1aSopenharmony_ci#define FIX_M_1_847759065 -15137
40cabdff1aSopenharmony_ci#define FIX_M_1_961570560 -16069
41cabdff1aSopenharmony_ci#define FIX_M_2_562915447 -20995
42cabdff1aSopenharmony_ci#define FIX_0xFFFF 0xFFFF
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci#define FIX_0_298631336_ID      0
45cabdff1aSopenharmony_ci#define FIX_0_541196100_ID      4
46cabdff1aSopenharmony_ci#define FIX_0_765366865_ID      8
47cabdff1aSopenharmony_ci#define FIX_1_175875602_ID     12
48cabdff1aSopenharmony_ci#define FIX_1_501321110_ID     16
49cabdff1aSopenharmony_ci#define FIX_2_053119869_ID     20
50cabdff1aSopenharmony_ci#define FIX_3_072711026_ID     24
51cabdff1aSopenharmony_ci#define FIX_M_0_390180644_ID   28
52cabdff1aSopenharmony_ci#define FIX_M_0_899976223_ID   32
53cabdff1aSopenharmony_ci#define FIX_M_1_847759065_ID   36
54cabdff1aSopenharmony_ci#define FIX_M_1_961570560_ID   40
55cabdff1aSopenharmony_ci#define FIX_M_2_562915447_ID   44
56cabdff1aSopenharmony_ci#define FIX_0xFFFF_ID          48
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_cifunction ff_j_rev_dct_arm, export=1
59cabdff1aSopenharmony_ci        push {r0, r4 - r11, lr}
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci        mov lr, r0                      @ lr = pointer to the current row
62cabdff1aSopenharmony_ci        mov r12, #8                     @ r12 = row-counter
63cabdff1aSopenharmony_ci        movrel r11, const_array         @ r11 = base pointer to the constants array
64cabdff1aSopenharmony_cirow_loop:
65cabdff1aSopenharmony_ci        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
66cabdff1aSopenharmony_ci        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_ci        @ Optimization for row that have all items except the first set to 0
69cabdff1aSopenharmony_ci        @ (this works as the int16_t are always 4-byte aligned)
70cabdff1aSopenharmony_ci        ldr r5, [lr, # 0]
71cabdff1aSopenharmony_ci        ldr r6, [lr, # 4]
72cabdff1aSopenharmony_ci        ldr r3, [lr, # 8]
73cabdff1aSopenharmony_ci        ldr r4, [lr, #12]
74cabdff1aSopenharmony_ci        orr r3, r3, r4
75cabdff1aSopenharmony_ci        orr r3, r3, r6
76cabdff1aSopenharmony_ci        orrs r5, r3, r5
77cabdff1aSopenharmony_ci        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
78cabdff1aSopenharmony_ci        orrs r3, r3, r2
79cabdff1aSopenharmony_ci        beq empty_row
80cabdff1aSopenharmony_ci
81cabdff1aSopenharmony_ci        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
82cabdff1aSopenharmony_ci        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
83cabdff1aSopenharmony_ci        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ci        ldr r3, [r11, #FIX_0_541196100_ID]
86cabdff1aSopenharmony_ci        add r7, r2, r6
87cabdff1aSopenharmony_ci        ldr r5, [r11, #FIX_M_1_847759065_ID]
88cabdff1aSopenharmony_ci        mul r7, r3, r7                      @ r7 = z1
89cabdff1aSopenharmony_ci        ldr r3, [r11, #FIX_0_765366865_ID]
90cabdff1aSopenharmony_ci        mla r6, r5, r6, r7                  @ r6 = tmp2
91cabdff1aSopenharmony_ci        add r5, r0, r4                      @ r5 = tmp0
92cabdff1aSopenharmony_ci        mla r2, r3, r2, r7                  @ r2 = tmp3
93cabdff1aSopenharmony_ci        sub r3, r0, r4                      @ r3 = tmp1
94cabdff1aSopenharmony_ci
95cabdff1aSopenharmony_ci        add r0, r2, r5, lsl #13             @ r0 = tmp10
96cabdff1aSopenharmony_ci        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
97cabdff1aSopenharmony_ci        add r4, r6, r3, lsl #13             @ r4 = tmp11
98cabdff1aSopenharmony_ci        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ci        ldrsh r3, [lr, #10]             @ r3 = 'd3'
103cabdff1aSopenharmony_ci        ldrsh r5, [lr, #12]             @ r5 = 'd5'
104cabdff1aSopenharmony_ci        ldrsh r7, [lr, #14]             @ r7 = 'd7'
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        add r0, r3, r5                        @ r0 = 'z2'
107cabdff1aSopenharmony_ci        add r2, r1, r7                  @ r2 = 'z1'
108cabdff1aSopenharmony_ci        add r4, r3, r7                  @ r4 = 'z3'
109cabdff1aSopenharmony_ci        add r6, r1, r5                  @ r6 = 'z4'
110cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_1_175875602_ID]
111cabdff1aSopenharmony_ci        add r8, r4, r6                  @ r8 = z3 + z4
112cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_M_0_899976223_ID]
113cabdff1aSopenharmony_ci        mul r8, r9, r8                  @ r8 = 'z5'
114cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_M_2_562915447_ID]
115cabdff1aSopenharmony_ci        mul r2, r10, r2                 @ r2 = 'z1'
116cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_M_1_961570560_ID]
117cabdff1aSopenharmony_ci        mul r0, r9, r0                  @ r0 = 'z2'
118cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_M_0_390180644_ID]
119cabdff1aSopenharmony_ci        mla r4, r10, r4, r8             @ r4 = 'z3'
120cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_0_298631336_ID]
121cabdff1aSopenharmony_ci        mla r6, r9, r6, r8              @ r6 = 'z4'
122cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_2_053119869_ID]
123cabdff1aSopenharmony_ci        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
124cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_3_072711026_ID]
125cabdff1aSopenharmony_ci        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
126cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_1_501321110_ID]
127cabdff1aSopenharmony_ci        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
128cabdff1aSopenharmony_ci        add r7, r7, r4                  @ r7 = tmp0
129cabdff1aSopenharmony_ci        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
130cabdff1aSopenharmony_ci        add r5,        r5, r6                  @ r5 = tmp1
131cabdff1aSopenharmony_ci        add r3, r3, r4                  @ r3 = tmp2
132cabdff1aSopenharmony_ci        add r1, r1, r6                  @ r1 = tmp3
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
135cabdff1aSopenharmony_ci                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
136cabdff1aSopenharmony_ci
137cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
138cabdff1aSopenharmony_ci        add r8, r0, r1
139cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
140cabdff1aSopenharmony_ci        mov r8, r8, asr #11
141cabdff1aSopenharmony_ci        strh r8, [lr, # 0]
142cabdff1aSopenharmony_ci
143cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
144cabdff1aSopenharmony_ci        sub r8, r0, r1
145cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
146cabdff1aSopenharmony_ci        mov r8, r8, asr #11
147cabdff1aSopenharmony_ci        strh r8, [lr, #14]
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
150cabdff1aSopenharmony_ci        add r8, r6, r3
151cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
152cabdff1aSopenharmony_ci        mov r8, r8, asr #11
153cabdff1aSopenharmony_ci        strh r8, [lr, # 2]
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
156cabdff1aSopenharmony_ci        sub r8, r6, r3
157cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
158cabdff1aSopenharmony_ci        mov r8, r8, asr #11
159cabdff1aSopenharmony_ci        strh r8, [lr, #12]
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
162cabdff1aSopenharmony_ci        add r8, r4, r5
163cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
164cabdff1aSopenharmony_ci        mov r8, r8, asr #11
165cabdff1aSopenharmony_ci        strh r8, [lr, # 4]
166cabdff1aSopenharmony_ci
167cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
168cabdff1aSopenharmony_ci        sub r8, r4, r5
169cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
170cabdff1aSopenharmony_ci        mov r8, r8, asr #11
171cabdff1aSopenharmony_ci        strh r8, [lr, #10]
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
174cabdff1aSopenharmony_ci        add r8, r2, r7
175cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
176cabdff1aSopenharmony_ci        mov r8, r8, asr #11
177cabdff1aSopenharmony_ci        strh r8, [lr, # 6]
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
180cabdff1aSopenharmony_ci        sub r8, r2, r7
181cabdff1aSopenharmony_ci        add r8, r8, #(1<<10)
182cabdff1aSopenharmony_ci        mov r8, r8, asr #11
183cabdff1aSopenharmony_ci        strh r8, [lr, # 8]
184cabdff1aSopenharmony_ci
185cabdff1aSopenharmony_ci        @ End of row loop
186cabdff1aSopenharmony_ci        add lr, lr, #16
187cabdff1aSopenharmony_ci        subs r12, r12, #1
188cabdff1aSopenharmony_ci        bne row_loop
189cabdff1aSopenharmony_ci        beq start_column_loop
190cabdff1aSopenharmony_ci
191cabdff1aSopenharmony_ciempty_row:
192cabdff1aSopenharmony_ci        ldr r1, [r11, #FIX_0xFFFF_ID]
193cabdff1aSopenharmony_ci        mov r0, r0, lsl #2
194cabdff1aSopenharmony_ci        and r0, r0, r1
195cabdff1aSopenharmony_ci        add r0, r0, r0, lsl #16
196cabdff1aSopenharmony_ci        str r0, [lr, # 0]
197cabdff1aSopenharmony_ci        str r0, [lr, # 4]
198cabdff1aSopenharmony_ci        str r0, [lr, # 8]
199cabdff1aSopenharmony_ci        str r0, [lr, #12]
200cabdff1aSopenharmony_ci
201cabdff1aSopenharmony_ciend_of_row_loop:
202cabdff1aSopenharmony_ci        @ End of loop
203cabdff1aSopenharmony_ci        add lr, lr, #16
204cabdff1aSopenharmony_ci        subs r12, r12, #1
205cabdff1aSopenharmony_ci        bne row_loop
206cabdff1aSopenharmony_ci
207cabdff1aSopenharmony_cistart_column_loop:
208cabdff1aSopenharmony_ci        @ Start of column loop
209cabdff1aSopenharmony_ci        pop {lr}
210cabdff1aSopenharmony_ci        mov r12, #8
211cabdff1aSopenharmony_cicolumn_loop:
212cabdff1aSopenharmony_ci        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
213cabdff1aSopenharmony_ci        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
214cabdff1aSopenharmony_ci        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
215cabdff1aSopenharmony_ci        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
216cabdff1aSopenharmony_ci
217cabdff1aSopenharmony_ci        ldr r3, [r11, #FIX_0_541196100_ID]
218cabdff1aSopenharmony_ci        add r1, r2, r6
219cabdff1aSopenharmony_ci        ldr r5, [r11, #FIX_M_1_847759065_ID]
220cabdff1aSopenharmony_ci        mul r1, r3, r1                      @ r1 = z1
221cabdff1aSopenharmony_ci        ldr r3, [r11, #FIX_0_765366865_ID]
222cabdff1aSopenharmony_ci        mla r6, r5, r6, r1                  @ r6 = tmp2
223cabdff1aSopenharmony_ci        add r5, r0, r4                      @ r5 = tmp0
224cabdff1aSopenharmony_ci        mla r2, r3, r2, r1                  @ r2 = tmp3
225cabdff1aSopenharmony_ci        sub r3, r0, r4                      @ r3 = tmp1
226cabdff1aSopenharmony_ci
227cabdff1aSopenharmony_ci        add r0, r2, r5, lsl #13             @ r0 = tmp10
228cabdff1aSopenharmony_ci        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
229cabdff1aSopenharmony_ci        add r4, r6, r3, lsl #13             @ r4 = tmp11
230cabdff1aSopenharmony_ci        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
233cabdff1aSopenharmony_ci        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
234cabdff1aSopenharmony_ci        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
235cabdff1aSopenharmony_ci        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
236cabdff1aSopenharmony_ci
237cabdff1aSopenharmony_ci        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
238cabdff1aSopenharmony_ci        orr r9, r1, r3
239cabdff1aSopenharmony_ci        orr r10, r5, r7
240cabdff1aSopenharmony_ci        orrs r10, r9, r10
241cabdff1aSopenharmony_ci        beq empty_odd_column
242cabdff1aSopenharmony_ci
243cabdff1aSopenharmony_ci        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
244cabdff1aSopenharmony_ci
245cabdff1aSopenharmony_ci        add r0, r3, r5                  @ r0 = 'z2'
246cabdff1aSopenharmony_ci        add r2, r1, r7                  @ r2 = 'z1'
247cabdff1aSopenharmony_ci        add r4, r3, r7                  @ r4 = 'z3'
248cabdff1aSopenharmony_ci        add r6, r1, r5                  @ r6 = 'z4'
249cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_1_175875602_ID]
250cabdff1aSopenharmony_ci        add r8, r4, r6
251cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_M_0_899976223_ID]
252cabdff1aSopenharmony_ci        mul r8, r9, r8                  @ r8 = 'z5'
253cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_M_2_562915447_ID]
254cabdff1aSopenharmony_ci        mul r2, r10, r2                 @ r2 = 'z1'
255cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_M_1_961570560_ID]
256cabdff1aSopenharmony_ci        mul r0, r9, r0                  @ r0 = 'z2'
257cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_M_0_390180644_ID]
258cabdff1aSopenharmony_ci        mla r4, r10, r4, r8             @ r4 = 'z3'
259cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_0_298631336_ID]
260cabdff1aSopenharmony_ci        mla r6, r9, r6, r8              @ r6 = 'z4'
261cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_2_053119869_ID]
262cabdff1aSopenharmony_ci        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
263cabdff1aSopenharmony_ci        ldr r10, [r11, #FIX_3_072711026_ID]
264cabdff1aSopenharmony_ci        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
265cabdff1aSopenharmony_ci        ldr r9, [r11, #FIX_1_501321110_ID]
266cabdff1aSopenharmony_ci        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
267cabdff1aSopenharmony_ci        add r7, r7, r4                  @ r7 = tmp0
268cabdff1aSopenharmony_ci        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
269cabdff1aSopenharmony_ci        add r5,        r5, r6                  @ r5 = tmp1
270cabdff1aSopenharmony_ci        add r3, r3, r4                  @ r3 = tmp2
271cabdff1aSopenharmony_ci        add r1, r1, r6                  @ r1 = tmp3
272cabdff1aSopenharmony_ci
273cabdff1aSopenharmony_ci        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
274cabdff1aSopenharmony_ci                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
275cabdff1aSopenharmony_ci
276cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
277cabdff1aSopenharmony_ci        add r8, r0, r1
278cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
279cabdff1aSopenharmony_ci        mov r8, r8, asr #18
280cabdff1aSopenharmony_ci        strh r8, [lr, #( 0*8)]
281cabdff1aSopenharmony_ci
282cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
283cabdff1aSopenharmony_ci        sub r8, r0, r1
284cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
285cabdff1aSopenharmony_ci        mov r8, r8, asr #18
286cabdff1aSopenharmony_ci        strh r8, [lr, #(14*8)]
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
289cabdff1aSopenharmony_ci        add r8, r4, r3
290cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
291cabdff1aSopenharmony_ci        mov r8, r8, asr #18
292cabdff1aSopenharmony_ci        strh r8, [lr, #( 2*8)]
293cabdff1aSopenharmony_ci
294cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
295cabdff1aSopenharmony_ci        sub r8, r4, r3
296cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
297cabdff1aSopenharmony_ci        mov r8, r8, asr #18
298cabdff1aSopenharmony_ci        strh r8, [lr, #(12*8)]
299cabdff1aSopenharmony_ci
300cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
301cabdff1aSopenharmony_ci        add r8, r6, r5
302cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
303cabdff1aSopenharmony_ci        mov r8, r8, asr #18
304cabdff1aSopenharmony_ci        strh r8, [lr, #( 4*8)]
305cabdff1aSopenharmony_ci
306cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
307cabdff1aSopenharmony_ci        sub r8, r6, r5
308cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
309cabdff1aSopenharmony_ci        mov r8, r8, asr #18
310cabdff1aSopenharmony_ci        strh r8, [lr, #(10*8)]
311cabdff1aSopenharmony_ci
312cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
313cabdff1aSopenharmony_ci        add r8, r2, r7
314cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
315cabdff1aSopenharmony_ci        mov r8, r8, asr #18
316cabdff1aSopenharmony_ci        strh r8, [lr, #( 6*8)]
317cabdff1aSopenharmony_ci
318cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
319cabdff1aSopenharmony_ci        sub r8, r2, r7
320cabdff1aSopenharmony_ci        add r8, r8, #(1<<17)
321cabdff1aSopenharmony_ci        mov r8, r8, asr #18
322cabdff1aSopenharmony_ci        strh r8, [lr, #( 8*8)]
323cabdff1aSopenharmony_ci
324cabdff1aSopenharmony_ci        @ End of row loop
325cabdff1aSopenharmony_ci        add lr, lr, #2
326cabdff1aSopenharmony_ci        subs r12, r12, #1
327cabdff1aSopenharmony_ci        bne column_loop
328cabdff1aSopenharmony_ci        beq the_end
329cabdff1aSopenharmony_ci
330cabdff1aSopenharmony_ciempty_odd_column:
331cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
332cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
333cabdff1aSopenharmony_ci        add r0, r0, #(1<<17)
334cabdff1aSopenharmony_ci        mov r0, r0, asr #18
335cabdff1aSopenharmony_ci        strh r0, [lr, #( 0*8)]
336cabdff1aSopenharmony_ci        strh r0, [lr, #(14*8)]
337cabdff1aSopenharmony_ci
338cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
339cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
340cabdff1aSopenharmony_ci        add r4, r4, #(1<<17)
341cabdff1aSopenharmony_ci        mov r4, r4, asr #18
342cabdff1aSopenharmony_ci        strh r4, [lr, #( 2*8)]
343cabdff1aSopenharmony_ci        strh r4, [lr, #(12*8)]
344cabdff1aSopenharmony_ci
345cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
346cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
347cabdff1aSopenharmony_ci        add r6, r6, #(1<<17)
348cabdff1aSopenharmony_ci        mov r6, r6, asr #18
349cabdff1aSopenharmony_ci        strh r6, [lr, #( 4*8)]
350cabdff1aSopenharmony_ci        strh r6, [lr, #(10*8)]
351cabdff1aSopenharmony_ci
352cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
353cabdff1aSopenharmony_ci        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
354cabdff1aSopenharmony_ci        add r2, r2, #(1<<17)
355cabdff1aSopenharmony_ci        mov r2, r2, asr #18
356cabdff1aSopenharmony_ci        strh r2, [lr, #( 6*8)]
357cabdff1aSopenharmony_ci        strh r2, [lr, #( 8*8)]
358cabdff1aSopenharmony_ci
359cabdff1aSopenharmony_ci        @ End of row loop
360cabdff1aSopenharmony_ci        add lr, lr, #2
361cabdff1aSopenharmony_ci        subs r12, r12, #1
362cabdff1aSopenharmony_ci        bne column_loop
363cabdff1aSopenharmony_ci
364cabdff1aSopenharmony_cithe_end:
365cabdff1aSopenharmony_ci        @ The end....
366cabdff1aSopenharmony_ci        pop {r4 - r11, pc}
367cabdff1aSopenharmony_ciendfunc
368cabdff1aSopenharmony_ci
369cabdff1aSopenharmony_ciconst const_array
370cabdff1aSopenharmony_ci        .word FIX_0_298631336
371cabdff1aSopenharmony_ci        .word FIX_0_541196100
372cabdff1aSopenharmony_ci        .word FIX_0_765366865
373cabdff1aSopenharmony_ci        .word FIX_1_175875602
374cabdff1aSopenharmony_ci        .word FIX_1_501321110
375cabdff1aSopenharmony_ci        .word FIX_2_053119869
376cabdff1aSopenharmony_ci        .word FIX_3_072711026
377cabdff1aSopenharmony_ci        .word FIX_M_0_390180644
378cabdff1aSopenharmony_ci        .word FIX_M_0_899976223
379cabdff1aSopenharmony_ci        .word FIX_M_1_847759065
380cabdff1aSopenharmony_ci        .word FIX_M_1_961570560
381cabdff1aSopenharmony_ci        .word FIX_M_2_562915447
382cabdff1aSopenharmony_ci        .word FIX_0xFFFF
383cabdff1aSopenharmony_ciendconst
384