1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Alpha optimized DSP utils
3cabdff1aSopenharmony_ci * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "regdef.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci/* Some nicer register names.  */
25cabdff1aSopenharmony_ci#define ta t10
26cabdff1aSopenharmony_ci#define tb t11
27cabdff1aSopenharmony_ci#define tc t12
28cabdff1aSopenharmony_ci#define td AT
29cabdff1aSopenharmony_ci/* Danger: these overlap with the argument list and the return value */
30cabdff1aSopenharmony_ci#define te a5
31cabdff1aSopenharmony_ci#define tf a4
32cabdff1aSopenharmony_ci#define tg a3
33cabdff1aSopenharmony_ci#define th v0
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_ci        .set noat
36cabdff1aSopenharmony_ci        .set noreorder
37cabdff1aSopenharmony_ci        .arch pca56
38cabdff1aSopenharmony_ci        .text
39cabdff1aSopenharmony_ci
40cabdff1aSopenharmony_ci/*****************************************************************************
41cabdff1aSopenharmony_ci * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
42cabdff1aSopenharmony_ci *
43cabdff1aSopenharmony_ci * This code is written with a pca56 in mind. For ev6, one should
44cabdff1aSopenharmony_ci * really take the increased latency of 3 cycles for MVI instructions
45cabdff1aSopenharmony_ci * into account.
46cabdff1aSopenharmony_ci *
47cabdff1aSopenharmony_ci * It is important to keep the loading and first use of a register as
48cabdff1aSopenharmony_ci * far apart as possible, because if a register is accessed before it
49cabdff1aSopenharmony_ci * has been fetched from memory, the CPU will stall.
50cabdff1aSopenharmony_ci */
51cabdff1aSopenharmony_ci        .align 4
52cabdff1aSopenharmony_ci        .globl pix_abs16x16_mvi_asm
53cabdff1aSopenharmony_ci        .ent pix_abs16x16_mvi_asm
54cabdff1aSopenharmony_cipix_abs16x16_mvi_asm:
55cabdff1aSopenharmony_ci        .frame sp, 0, ra, 0
56cabdff1aSopenharmony_ci        .prologue 0
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci        and     a2, 7, t0
59cabdff1aSopenharmony_ci        clr     v0
60cabdff1aSopenharmony_ci        beq     t0, $aligned
61cabdff1aSopenharmony_ci        .align 4
62cabdff1aSopenharmony_ci$unaligned:
63cabdff1aSopenharmony_ci        /* Registers:
64cabdff1aSopenharmony_ci           line 0:
65cabdff1aSopenharmony_ci           t0:  left_u -> left lo -> left
66cabdff1aSopenharmony_ci           t1:  mid
67cabdff1aSopenharmony_ci           t2:  right_u -> right hi -> right
68cabdff1aSopenharmony_ci           t3:  ref left
69cabdff1aSopenharmony_ci           t4:  ref right
70cabdff1aSopenharmony_ci           line 1:
71cabdff1aSopenharmony_ci           t5:  left_u -> left lo -> left
72cabdff1aSopenharmony_ci           t6:  mid
73cabdff1aSopenharmony_ci           t7:  right_u -> right hi -> right
74cabdff1aSopenharmony_ci           t8:  ref left
75cabdff1aSopenharmony_ci           t9:  ref right
76cabdff1aSopenharmony_ci           temp:
77cabdff1aSopenharmony_ci           ta:  left hi
78cabdff1aSopenharmony_ci           tb:  right lo
79cabdff1aSopenharmony_ci           tc:  error left
80cabdff1aSopenharmony_ci           td:  error right  */
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci        /* load line 0 */
83cabdff1aSopenharmony_ci        ldq_u   t0, 0(a2)       # left_u
84cabdff1aSopenharmony_ci        ldq_u   t1, 8(a2)       # mid
85cabdff1aSopenharmony_ci        ldq_u   t2, 16(a2)      # right_u
86cabdff1aSopenharmony_ci        ldq     t3, 0(a1)       # ref left
87cabdff1aSopenharmony_ci        ldq     t4, 8(a1)       # ref right
88cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
89cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
90cabdff1aSopenharmony_ci        /* load line 1 */
91cabdff1aSopenharmony_ci        ldq_u   t5, 0(a2)       # left_u
92cabdff1aSopenharmony_ci        ldq_u   t6, 8(a2)       # mid
93cabdff1aSopenharmony_ci        ldq_u   t7, 16(a2)      # right_u
94cabdff1aSopenharmony_ci        ldq     t8, 0(a1)       # ref left
95cabdff1aSopenharmony_ci        ldq     t9, 8(a1)       # ref right
96cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
97cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
98cabdff1aSopenharmony_ci        /* calc line 0 */
99cabdff1aSopenharmony_ci        extql   t0, a2, t0      # left lo
100cabdff1aSopenharmony_ci        extqh   t1, a2, ta      # left hi
101cabdff1aSopenharmony_ci        extql   t1, a2, tb      # right lo
102cabdff1aSopenharmony_ci        or      t0, ta, t0      # left
103cabdff1aSopenharmony_ci        extqh   t2, a2, t2      # right hi
104cabdff1aSopenharmony_ci        perr    t3, t0, tc      # error left
105cabdff1aSopenharmony_ci        or      t2, tb, t2      # right
106cabdff1aSopenharmony_ci        perr    t4, t2, td      # error right
107cabdff1aSopenharmony_ci        addq    v0, tc, v0      # add error left
108cabdff1aSopenharmony_ci        addq    v0, td, v0      # add error left
109cabdff1aSopenharmony_ci        /* calc line 1 */
110cabdff1aSopenharmony_ci        extql   t5, a2, t5      # left lo
111cabdff1aSopenharmony_ci        extqh   t6, a2, ta      # left hi
112cabdff1aSopenharmony_ci        extql   t6, a2, tb      # right lo
113cabdff1aSopenharmony_ci        or      t5, ta, t5      # left
114cabdff1aSopenharmony_ci        extqh   t7, a2, t7      # right hi
115cabdff1aSopenharmony_ci        perr    t8, t5, tc      # error left
116cabdff1aSopenharmony_ci        or      t7, tb, t7      # right
117cabdff1aSopenharmony_ci        perr    t9, t7, td      # error right
118cabdff1aSopenharmony_ci        addq    v0, tc, v0      # add error left
119cabdff1aSopenharmony_ci        addq    v0, td, v0      # add error left
120cabdff1aSopenharmony_ci        /* loop */
121cabdff1aSopenharmony_ci        subq    a4,  2, a4      # h -= 2
122cabdff1aSopenharmony_ci        bne     a4, $unaligned
123cabdff1aSopenharmony_ci        ret
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci        .align 4
126cabdff1aSopenharmony_ci$aligned:
127cabdff1aSopenharmony_ci        /* load line 0 */
128cabdff1aSopenharmony_ci        ldq     t0, 0(a2)       # left
129cabdff1aSopenharmony_ci        ldq     t1, 8(a2)       # right
130cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
131cabdff1aSopenharmony_ci        ldq     t2, 0(a1)       # ref left
132cabdff1aSopenharmony_ci        ldq     t3, 8(a1)       # ref right
133cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
134cabdff1aSopenharmony_ci        /* load line 1 */
135cabdff1aSopenharmony_ci        ldq     t4, 0(a2)       # left
136cabdff1aSopenharmony_ci        ldq     t5, 8(a2)       # right
137cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
138cabdff1aSopenharmony_ci        ldq     t6, 0(a1)       # ref left
139cabdff1aSopenharmony_ci        ldq     t7, 8(a1)       # ref right
140cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
141cabdff1aSopenharmony_ci        /* load line 2 */
142cabdff1aSopenharmony_ci        ldq     t8, 0(a2)       # left
143cabdff1aSopenharmony_ci        ldq     t9, 8(a2)       # right
144cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
145cabdff1aSopenharmony_ci        ldq     ta, 0(a1)       # ref left
146cabdff1aSopenharmony_ci        ldq     tb, 8(a1)       # ref right
147cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
148cabdff1aSopenharmony_ci        /* load line 3 */
149cabdff1aSopenharmony_ci        ldq     tc, 0(a2)       # left
150cabdff1aSopenharmony_ci        ldq     td, 8(a2)       # right
151cabdff1aSopenharmony_ci        addq    a2, a3, a2      # pix2
152cabdff1aSopenharmony_ci        ldq     te, 0(a1)       # ref left
153cabdff1aSopenharmony_ci        ldq     a0, 8(a1)       # ref right
154cabdff1aSopenharmony_ci        /* calc line 0 */
155cabdff1aSopenharmony_ci        perr    t0, t2, t0      # error left
156cabdff1aSopenharmony_ci        addq    a1, a3, a1      # pix1
157cabdff1aSopenharmony_ci        perr    t1, t3, t1      # error right
158cabdff1aSopenharmony_ci        addq    v0, t0, v0      # add error left
159cabdff1aSopenharmony_ci        /* calc line 1 */
160cabdff1aSopenharmony_ci        perr    t4, t6, t0      # error left
161cabdff1aSopenharmony_ci        addq    v0, t1, v0      # add error right
162cabdff1aSopenharmony_ci        perr    t5, t7, t1      # error right
163cabdff1aSopenharmony_ci        addq    v0, t0, v0      # add error left
164cabdff1aSopenharmony_ci        /* calc line 2 */
165cabdff1aSopenharmony_ci        perr    t8, ta, t0      # error left
166cabdff1aSopenharmony_ci        addq    v0, t1, v0      # add error right
167cabdff1aSopenharmony_ci        perr    t9, tb, t1      # error right
168cabdff1aSopenharmony_ci        addq    v0, t0, v0      # add error left
169cabdff1aSopenharmony_ci        /* calc line 3 */
170cabdff1aSopenharmony_ci        perr    tc, te, t0      # error left
171cabdff1aSopenharmony_ci        addq    v0, t1, v0      # add error right
172cabdff1aSopenharmony_ci        perr    td, a0, t1      # error right
173cabdff1aSopenharmony_ci        addq    v0, t0, v0      # add error left
174cabdff1aSopenharmony_ci        addq    v0, t1, v0      # add error right
175cabdff1aSopenharmony_ci        /* loop */
176cabdff1aSopenharmony_ci        subq    a4,  4, a4      # h -= 4
177cabdff1aSopenharmony_ci        bne     a4, $aligned
178cabdff1aSopenharmony_ci        ret
179cabdff1aSopenharmony_ci        .end pix_abs16x16_mvi_asm
180