1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Optimization of some functions from mpegvideo.c for armv5te
3cabdff1aSopenharmony_ci * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config.h"
23cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci/*
26cabdff1aSopenharmony_ci * Special optimized version of dct_unquantize_h263_helper_c, it
27cabdff1aSopenharmony_ci * requires the block to be at least 8 bytes aligned, and may process
28cabdff1aSopenharmony_ci * more elements than requested.  But it is guaranteed to never
29cabdff1aSopenharmony_ci * process more than 64 elements provided that count argument is <= 64,
30cabdff1aSopenharmony_ci * so it is safe. This function is optimized for a common distribution
31cabdff1aSopenharmony_ci * of values for nCoeffs (they are mostly multiple of 8 plus one or
32cabdff1aSopenharmony_ci * two extra elements). So this function processes data as 8 elements
33cabdff1aSopenharmony_ci * per loop iteration and contains optional 2 elements processing in
34cabdff1aSopenharmony_ci * the end.
35cabdff1aSopenharmony_ci *
36cabdff1aSopenharmony_ci * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
37cabdff1aSopenharmony_ci */
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci.macro  dequant_t       dst, src, mul, add, tmp
40cabdff1aSopenharmony_ci        rsbs            \tmp, ip, \src, asr #16
41cabdff1aSopenharmony_ci        it              gt
42cabdff1aSopenharmony_ci        addgt           \tmp, \add, #0
43cabdff1aSopenharmony_ci        it              lt
44cabdff1aSopenharmony_ci        rsblt           \tmp, \add, #0
45cabdff1aSopenharmony_ci        it              ne
46cabdff1aSopenharmony_ci        smlatbne        \dst, \src, \mul, \tmp
47cabdff1aSopenharmony_ci.endm
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ci.macro  dequant_b       dst, src, mul, add, tmp
50cabdff1aSopenharmony_ci        rsbs            \tmp, ip, \src, lsl #16
51cabdff1aSopenharmony_ci        it              gt
52cabdff1aSopenharmony_ci        addgt           \tmp, \add, #0
53cabdff1aSopenharmony_ci        it              lt
54cabdff1aSopenharmony_ci        rsblt           \tmp, \add, #0
55cabdff1aSopenharmony_ci        it              ne
56cabdff1aSopenharmony_ci        smlabbne        \dst, \src, \mul, \tmp
57cabdff1aSopenharmony_ci.endm
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_cifunction ff_dct_unquantize_h263_armv5te, export=1
60cabdff1aSopenharmony_ci        push            {r4-r9,lr}
61cabdff1aSopenharmony_ci        mov             ip, #0
62cabdff1aSopenharmony_ci        subs            r3, r3, #2
63cabdff1aSopenharmony_ci        ble             2f
64cabdff1aSopenharmony_ci        ldrd            r4, r5, [r0, #0]
65cabdff1aSopenharmony_ci1:
66cabdff1aSopenharmony_ci        ldrd            r6, r7, [r0, #8]
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_ci        dequant_t       r9, r4, r1, r2, r9
69cabdff1aSopenharmony_ci        dequant_t       lr, r5, r1, r2, lr
70cabdff1aSopenharmony_ci        dequant_b       r4, r4, r1, r2, r8
71cabdff1aSopenharmony_ci        dequant_b       r5, r5, r1, r2, r8
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci        strh            r4, [r0], #2
74cabdff1aSopenharmony_ci        strh            r9, [r0], #2
75cabdff1aSopenharmony_ci        strh            r5, [r0], #2
76cabdff1aSopenharmony_ci        strh            lr, [r0], #2
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci        dequant_t       r9, r6, r1, r2, r9
79cabdff1aSopenharmony_ci        dequant_t       lr, r7, r1, r2, lr
80cabdff1aSopenharmony_ci        dequant_b       r6, r6, r1, r2, r8
81cabdff1aSopenharmony_ci        dequant_b       r7, r7, r1, r2, r8
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ci        strh            r6, [r0], #2
84cabdff1aSopenharmony_ci        strh            r9, [r0], #2
85cabdff1aSopenharmony_ci        strh            r7, [r0], #2
86cabdff1aSopenharmony_ci        strh            lr, [r0], #2
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci        subs            r3, r3, #8
89cabdff1aSopenharmony_ci        it              gt
90cabdff1aSopenharmony_ci        ldrdgt          r4, r5, [r0, #0] /* load data early to avoid load/use pipeline stall */
91cabdff1aSopenharmony_ci        bgt             1b
92cabdff1aSopenharmony_ci
93cabdff1aSopenharmony_ci        adds            r3, r3, #2
94cabdff1aSopenharmony_ci        it              le
95cabdff1aSopenharmony_ci        pople           {r4-r9,pc}
96cabdff1aSopenharmony_ci2:
97cabdff1aSopenharmony_ci        ldrsh           r9, [r0, #0]
98cabdff1aSopenharmony_ci        ldrsh           lr, [r0, #2]
99cabdff1aSopenharmony_ci        mov             r8, r2
100cabdff1aSopenharmony_ci        cmp             r9, #0
101cabdff1aSopenharmony_ci        it              lt
102cabdff1aSopenharmony_ci        rsblt           r8, r2, #0
103cabdff1aSopenharmony_ci        it              ne
104cabdff1aSopenharmony_ci        smlabbne        r9, r9, r1, r8
105cabdff1aSopenharmony_ci        mov             r8, r2
106cabdff1aSopenharmony_ci        cmp             lr, #0
107cabdff1aSopenharmony_ci        it              lt
108cabdff1aSopenharmony_ci        rsblt           r8, r2, #0
109cabdff1aSopenharmony_ci        it              ne
110cabdff1aSopenharmony_ci        smlabbne        lr, lr, r1, r8
111cabdff1aSopenharmony_ci        strh            r9, [r0], #2
112cabdff1aSopenharmony_ci        strh            lr, [r0], #2
113cabdff1aSopenharmony_ci        pop             {r4-r9,pc}
114cabdff1aSopenharmony_ciendfunc
115