1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Alpha optimized IDCT-related routines
3cabdff1aSopenharmony_ci * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci/*
23cabdff1aSopenharmony_ci * These functions are scheduled for pca56. They should work
24cabdff1aSopenharmony_ci * reasonably on ev6, though.
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "regdef.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci        .set noat
30cabdff1aSopenharmony_ci        .set noreorder
31cabdff1aSopenharmony_ci        .arch pca56
32cabdff1aSopenharmony_ci        .text
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_ci/************************************************************************
35cabdff1aSopenharmony_ci * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
36cabdff1aSopenharmony_ci *                                 ptrdiff_t line_size)
37cabdff1aSopenharmony_ci */
38cabdff1aSopenharmony_ci        .align 6
39cabdff1aSopenharmony_ci        .globl put_pixels_clamped_mvi_asm
40cabdff1aSopenharmony_ci        .ent put_pixels_clamped_mvi_asm
41cabdff1aSopenharmony_ciput_pixels_clamped_mvi_asm:
42cabdff1aSopenharmony_ci        .frame sp, 0, ra
43cabdff1aSopenharmony_ci        .prologue 0
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ci        lda     t8, -1
46cabdff1aSopenharmony_ci        lda     t9, 8           # loop counter
47cabdff1aSopenharmony_ci        zap     t8, 0xaa, t8    # 00ff00ff00ff00ff
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ci        .align 4
50cabdff1aSopenharmony_ci1:      ldq     t0,  0(a0)
51cabdff1aSopenharmony_ci        ldq     t1,  8(a0)
52cabdff1aSopenharmony_ci        ldq     t2, 16(a0)
53cabdff1aSopenharmony_ci        ldq     t3, 24(a0)
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci        maxsw4  t0, zero, t0
56cabdff1aSopenharmony_ci        subq    t9, 2, t9
57cabdff1aSopenharmony_ci        maxsw4  t1, zero, t1
58cabdff1aSopenharmony_ci        lda     a0, 32(a0)
59cabdff1aSopenharmony_ci
60cabdff1aSopenharmony_ci        maxsw4  t2, zero, t2
61cabdff1aSopenharmony_ci        addq    a1, a2, ta
62cabdff1aSopenharmony_ci        maxsw4  t3, zero, t3
63cabdff1aSopenharmony_ci        minsw4  t0, t8, t0
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci        minsw4  t1, t8, t1
66cabdff1aSopenharmony_ci        minsw4  t2, t8, t2
67cabdff1aSopenharmony_ci        minsw4  t3, t8, t3
68cabdff1aSopenharmony_ci        pkwb    t0, t0
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci        pkwb    t1, t1
71cabdff1aSopenharmony_ci        pkwb    t2, t2
72cabdff1aSopenharmony_ci        pkwb    t3, t3
73cabdff1aSopenharmony_ci        stl     t0, 0(a1)
74cabdff1aSopenharmony_ci
75cabdff1aSopenharmony_ci        stl     t1, 4(a1)
76cabdff1aSopenharmony_ci        addq    ta, a2, a1
77cabdff1aSopenharmony_ci        stl     t2, 0(ta)
78cabdff1aSopenharmony_ci        stl     t3, 4(ta)
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_ci        bne     t9, 1b
81cabdff1aSopenharmony_ci        ret
82cabdff1aSopenharmony_ci        .end put_pixels_clamped_mvi_asm
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_ci/************************************************************************
85cabdff1aSopenharmony_ci * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
86cabdff1aSopenharmony_ci *                                 ptrdiff_t line_size)
87cabdff1aSopenharmony_ci */
88cabdff1aSopenharmony_ci        .align 6
89cabdff1aSopenharmony_ci        .globl add_pixels_clamped_mvi_asm
90cabdff1aSopenharmony_ci        .ent add_pixels_clamped_mvi_asm
91cabdff1aSopenharmony_ciadd_pixels_clamped_mvi_asm:
92cabdff1aSopenharmony_ci        .frame sp, 0, ra
93cabdff1aSopenharmony_ci        .prologue 0
94cabdff1aSopenharmony_ci
95cabdff1aSopenharmony_ci        lda     t1, -1
96cabdff1aSopenharmony_ci        lda     th, 8
97cabdff1aSopenharmony_ci        zap     t1, 0x33, tg
98cabdff1aSopenharmony_ci        nop
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci        srl     tg, 1, t0
101cabdff1aSopenharmony_ci        xor     tg, t0, tg      # 0x8000800080008000
102cabdff1aSopenharmony_ci        zap     t1, 0xaa, tf    # 0x00ff00ff00ff00ff
103cabdff1aSopenharmony_ci
104cabdff1aSopenharmony_ci        .align 4
105cabdff1aSopenharmony_ci1:      ldl     t1, 0(a1)       # pix0 (try to hit cache line soon)
106cabdff1aSopenharmony_ci        ldl     t4, 4(a1)       # pix1
107cabdff1aSopenharmony_ci        addq    a1, a2, te      # pixels += line_size
108cabdff1aSopenharmony_ci        ldq     t0, 0(a0)       # shorts0
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci        ldl     t7, 0(te)       # pix2 (try to hit cache line soon)
111cabdff1aSopenharmony_ci        ldl     ta, 4(te)       # pix3
112cabdff1aSopenharmony_ci        ldq     t3, 8(a0)       # shorts1
113cabdff1aSopenharmony_ci        ldq     t6, 16(a0)      # shorts2
114cabdff1aSopenharmony_ci
115cabdff1aSopenharmony_ci        ldq     t9, 24(a0)      # shorts3
116cabdff1aSopenharmony_ci        unpkbw  t1, t1          # 0 0 (quarter/op no.)
117cabdff1aSopenharmony_ci        and     t0, tg, t2      # 0 1
118cabdff1aSopenharmony_ci        unpkbw  t4, t4          # 1 0
119cabdff1aSopenharmony_ci
120cabdff1aSopenharmony_ci        bic     t0, tg, t0      # 0 2
121cabdff1aSopenharmony_ci        unpkbw  t7, t7          # 2 0
122cabdff1aSopenharmony_ci        and     t3, tg, t5      # 1 1
123cabdff1aSopenharmony_ci        addq    t0, t1, t0      # 0 3
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci        xor     t0, t2, t0      # 0 4
126cabdff1aSopenharmony_ci        unpkbw  ta, ta          # 3 0
127cabdff1aSopenharmony_ci        and     t6, tg, t8      # 2 1
128cabdff1aSopenharmony_ci        maxsw4  t0, zero, t0    # 0 5
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_ci        bic     t3, tg, t3      # 1 2
131cabdff1aSopenharmony_ci        bic     t6, tg, t6      # 2 2
132cabdff1aSopenharmony_ci        minsw4  t0, tf, t0      # 0 6
133cabdff1aSopenharmony_ci        addq    t3, t4, t3      # 1 3
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci        pkwb    t0, t0          # 0 7
136cabdff1aSopenharmony_ci        xor     t3, t5, t3      # 1 4
137cabdff1aSopenharmony_ci        maxsw4  t3, zero, t3    # 1 5
138cabdff1aSopenharmony_ci        addq    t6, t7, t6      # 2 3
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_ci        xor     t6, t8, t6      # 2 4
141cabdff1aSopenharmony_ci        and     t9, tg, tb      # 3 1
142cabdff1aSopenharmony_ci        minsw4  t3, tf, t3      # 1 6
143cabdff1aSopenharmony_ci        bic     t9, tg, t9      # 3 2
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci        maxsw4  t6, zero, t6    # 2 5
146cabdff1aSopenharmony_ci        addq    t9, ta, t9      # 3 3
147cabdff1aSopenharmony_ci        stl     t0, 0(a1)       # 0 8
148cabdff1aSopenharmony_ci        minsw4  t6, tf, t6      # 2 6
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci        xor     t9, tb, t9      # 3 4
151cabdff1aSopenharmony_ci        maxsw4  t9, zero, t9    # 3 5
152cabdff1aSopenharmony_ci        lda     a0, 32(a0)      # block += 16;
153cabdff1aSopenharmony_ci        pkwb    t3, t3          # 1 7
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_ci        minsw4  t9, tf, t9      # 3 6
156cabdff1aSopenharmony_ci        subq    th, 2, th
157cabdff1aSopenharmony_ci        pkwb    t6, t6          # 2 7
158cabdff1aSopenharmony_ci        pkwb    t9, t9          # 3 7
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci        stl     t3, 4(a1)       # 1 8
161cabdff1aSopenharmony_ci        addq    te, a2, a1      # pixels += line_size
162cabdff1aSopenharmony_ci        stl     t6, 0(te)       # 2 8
163cabdff1aSopenharmony_ci        stl     t9, 4(te)       # 3 8
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci        bne     th, 1b
166cabdff1aSopenharmony_ci        ret
167cabdff1aSopenharmony_ci        .end add_pixels_clamped_mvi_asm
168