1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Simple IDCT (Alpha optimized)
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * based upon some outcommented C code from mpeg2dec (idct_mmx.c
7cabdff1aSopenharmony_ci * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * Alpha optimizations by Måns Rullgård <mans@mansr.com>
10cabdff1aSopenharmony_ci *                     and Falk Hueffner <falk@debian.org>
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * This file is part of FFmpeg.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
15cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
16cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
17cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
20cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
21cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22cabdff1aSopenharmony_ci * Lesser General Public License for more details.
23cabdff1aSopenharmony_ci *
24cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
25cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
26cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27cabdff1aSopenharmony_ci */
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#include "idctdsp_alpha.h"
30cabdff1aSopenharmony_ci#include "asm.h"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
33cabdff1aSopenharmony_ci// W4 is actually exactly 16384, but using 16383 works around
34cabdff1aSopenharmony_ci// accumulating rounding errors for some encoders
35cabdff1aSopenharmony_ci#define W1 22725
36cabdff1aSopenharmony_ci#define W2 21407
37cabdff1aSopenharmony_ci#define W3 19266
38cabdff1aSopenharmony_ci#define W4 16383
39cabdff1aSopenharmony_ci#define W5 12873
40cabdff1aSopenharmony_ci#define W6  8867
41cabdff1aSopenharmony_ci#define W7  4520
42cabdff1aSopenharmony_ci#define ROW_SHIFT 11
43cabdff1aSopenharmony_ci#define COL_SHIFT 20
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ci/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
46cabdff1aSopenharmony_cistatic inline int idct_row(int16_t *row)
47cabdff1aSopenharmony_ci{
48cabdff1aSopenharmony_ci    int a0, a1, a2, a3, b0, b1, b2, b3, t;
49cabdff1aSopenharmony_ci    uint64_t l, r, t2;
50cabdff1aSopenharmony_ci    l = ldq(row);
51cabdff1aSopenharmony_ci    r = ldq(row + 4);
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci    if (l == 0 && r == 0)
54cabdff1aSopenharmony_ci        return 0;
55cabdff1aSopenharmony_ci
56cabdff1aSopenharmony_ci    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci    if (((l & ~0xffffUL) | r) == 0) {
59cabdff1aSopenharmony_ci        a0 >>= ROW_SHIFT;
60cabdff1aSopenharmony_ci        t2 = (uint16_t) a0;
61cabdff1aSopenharmony_ci        t2 |= t2 << 16;
62cabdff1aSopenharmony_ci        t2 |= t2 << 32;
63cabdff1aSopenharmony_ci
64cabdff1aSopenharmony_ci        stq(t2, row);
65cabdff1aSopenharmony_ci        stq(t2, row + 4);
66cabdff1aSopenharmony_ci        return 1;
67cabdff1aSopenharmony_ci    }
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci    a1 = a0;
70cabdff1aSopenharmony_ci    a2 = a0;
71cabdff1aSopenharmony_ci    a3 = a0;
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci    t = extwl(l, 4);            /* row[2] */
74cabdff1aSopenharmony_ci    if (t != 0) {
75cabdff1aSopenharmony_ci        t = sextw(t);
76cabdff1aSopenharmony_ci        a0 += W2 * t;
77cabdff1aSopenharmony_ci        a1 += W6 * t;
78cabdff1aSopenharmony_ci        a2 -= W6 * t;
79cabdff1aSopenharmony_ci        a3 -= W2 * t;
80cabdff1aSopenharmony_ci    }
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci    t = extwl(r, 0);            /* row[4] */
83cabdff1aSopenharmony_ci    if (t != 0) {
84cabdff1aSopenharmony_ci        t = sextw(t);
85cabdff1aSopenharmony_ci        a0 += W4 * t;
86cabdff1aSopenharmony_ci        a1 -= W4 * t;
87cabdff1aSopenharmony_ci        a2 -= W4 * t;
88cabdff1aSopenharmony_ci        a3 += W4 * t;
89cabdff1aSopenharmony_ci    }
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci    t = extwl(r, 4);            /* row[6] */
92cabdff1aSopenharmony_ci    if (t != 0) {
93cabdff1aSopenharmony_ci        t = sextw(t);
94cabdff1aSopenharmony_ci        a0 += W6 * t;
95cabdff1aSopenharmony_ci        a1 -= W2 * t;
96cabdff1aSopenharmony_ci        a2 += W2 * t;
97cabdff1aSopenharmony_ci        a3 -= W6 * t;
98cabdff1aSopenharmony_ci    }
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci    t = extwl(l, 2);            /* row[1] */
101cabdff1aSopenharmony_ci    if (t != 0) {
102cabdff1aSopenharmony_ci        t = sextw(t);
103cabdff1aSopenharmony_ci        b0 = W1 * t;
104cabdff1aSopenharmony_ci        b1 = W3 * t;
105cabdff1aSopenharmony_ci        b2 = W5 * t;
106cabdff1aSopenharmony_ci        b3 = W7 * t;
107cabdff1aSopenharmony_ci    } else {
108cabdff1aSopenharmony_ci        b0 = 0;
109cabdff1aSopenharmony_ci        b1 = 0;
110cabdff1aSopenharmony_ci        b2 = 0;
111cabdff1aSopenharmony_ci        b3 = 0;
112cabdff1aSopenharmony_ci    }
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci    t = extwl(l, 6);            /* row[3] */
115cabdff1aSopenharmony_ci    if (t) {
116cabdff1aSopenharmony_ci        t = sextw(t);
117cabdff1aSopenharmony_ci        b0 += W3 * t;
118cabdff1aSopenharmony_ci        b1 -= W7 * t;
119cabdff1aSopenharmony_ci        b2 -= W1 * t;
120cabdff1aSopenharmony_ci        b3 -= W5 * t;
121cabdff1aSopenharmony_ci    }
122cabdff1aSopenharmony_ci
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci    t = extwl(r, 2);            /* row[5] */
125cabdff1aSopenharmony_ci    if (t) {
126cabdff1aSopenharmony_ci        t = sextw(t);
127cabdff1aSopenharmony_ci        b0 += W5 * t;
128cabdff1aSopenharmony_ci        b1 -= W1 * t;
129cabdff1aSopenharmony_ci        b2 += W7 * t;
130cabdff1aSopenharmony_ci        b3 += W3 * t;
131cabdff1aSopenharmony_ci    }
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci    t = extwl(r, 6);            /* row[7] */
134cabdff1aSopenharmony_ci    if (t) {
135cabdff1aSopenharmony_ci        t = sextw(t);
136cabdff1aSopenharmony_ci        b0 += W7 * t;
137cabdff1aSopenharmony_ci        b1 -= W5 * t;
138cabdff1aSopenharmony_ci        b2 += W3 * t;
139cabdff1aSopenharmony_ci        b3 -= W1 * t;
140cabdff1aSopenharmony_ci    }
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci    row[0] = (a0 + b0) >> ROW_SHIFT;
143cabdff1aSopenharmony_ci    row[1] = (a1 + b1) >> ROW_SHIFT;
144cabdff1aSopenharmony_ci    row[2] = (a2 + b2) >> ROW_SHIFT;
145cabdff1aSopenharmony_ci    row[3] = (a3 + b3) >> ROW_SHIFT;
146cabdff1aSopenharmony_ci    row[4] = (a3 - b3) >> ROW_SHIFT;
147cabdff1aSopenharmony_ci    row[5] = (a2 - b2) >> ROW_SHIFT;
148cabdff1aSopenharmony_ci    row[6] = (a1 - b1) >> ROW_SHIFT;
149cabdff1aSopenharmony_ci    row[7] = (a0 - b0) >> ROW_SHIFT;
150cabdff1aSopenharmony_ci
151cabdff1aSopenharmony_ci    return 2;
152cabdff1aSopenharmony_ci}
153cabdff1aSopenharmony_ci
154cabdff1aSopenharmony_cistatic inline void idct_col(int16_t *col)
155cabdff1aSopenharmony_ci{
156cabdff1aSopenharmony_ci    int a0, a1, a2, a3, b0, b1, b2, b3;
157cabdff1aSopenharmony_ci
158cabdff1aSopenharmony_ci    col[0] += (1 << (COL_SHIFT - 1)) / W4;
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci    a0 = W4 * col[8 * 0];
161cabdff1aSopenharmony_ci    a1 = W4 * col[8 * 0];
162cabdff1aSopenharmony_ci    a2 = W4 * col[8 * 0];
163cabdff1aSopenharmony_ci    a3 = W4 * col[8 * 0];
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci    if (col[8 * 2]) {
166cabdff1aSopenharmony_ci        a0 += W2 * col[8 * 2];
167cabdff1aSopenharmony_ci        a1 += W6 * col[8 * 2];
168cabdff1aSopenharmony_ci        a2 -= W6 * col[8 * 2];
169cabdff1aSopenharmony_ci        a3 -= W2 * col[8 * 2];
170cabdff1aSopenharmony_ci    }
171cabdff1aSopenharmony_ci
172cabdff1aSopenharmony_ci    if (col[8 * 4]) {
173cabdff1aSopenharmony_ci        a0 += W4 * col[8 * 4];
174cabdff1aSopenharmony_ci        a1 -= W4 * col[8 * 4];
175cabdff1aSopenharmony_ci        a2 -= W4 * col[8 * 4];
176cabdff1aSopenharmony_ci        a3 += W4 * col[8 * 4];
177cabdff1aSopenharmony_ci    }
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci    if (col[8 * 6]) {
180cabdff1aSopenharmony_ci        a0 += W6 * col[8 * 6];
181cabdff1aSopenharmony_ci        a1 -= W2 * col[8 * 6];
182cabdff1aSopenharmony_ci        a2 += W2 * col[8 * 6];
183cabdff1aSopenharmony_ci        a3 -= W6 * col[8 * 6];
184cabdff1aSopenharmony_ci    }
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_ci    if (col[8 * 1]) {
187cabdff1aSopenharmony_ci        b0 = W1 * col[8 * 1];
188cabdff1aSopenharmony_ci        b1 = W3 * col[8 * 1];
189cabdff1aSopenharmony_ci        b2 = W5 * col[8 * 1];
190cabdff1aSopenharmony_ci        b3 = W7 * col[8 * 1];
191cabdff1aSopenharmony_ci    } else {
192cabdff1aSopenharmony_ci        b0 = 0;
193cabdff1aSopenharmony_ci        b1 = 0;
194cabdff1aSopenharmony_ci        b2 = 0;
195cabdff1aSopenharmony_ci        b3 = 0;
196cabdff1aSopenharmony_ci    }
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci    if (col[8 * 3]) {
199cabdff1aSopenharmony_ci        b0 += W3 * col[8 * 3];
200cabdff1aSopenharmony_ci        b1 -= W7 * col[8 * 3];
201cabdff1aSopenharmony_ci        b2 -= W1 * col[8 * 3];
202cabdff1aSopenharmony_ci        b3 -= W5 * col[8 * 3];
203cabdff1aSopenharmony_ci    }
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci    if (col[8 * 5]) {
206cabdff1aSopenharmony_ci        b0 += W5 * col[8 * 5];
207cabdff1aSopenharmony_ci        b1 -= W1 * col[8 * 5];
208cabdff1aSopenharmony_ci        b2 += W7 * col[8 * 5];
209cabdff1aSopenharmony_ci        b3 += W3 * col[8 * 5];
210cabdff1aSopenharmony_ci    }
211cabdff1aSopenharmony_ci
212cabdff1aSopenharmony_ci    if (col[8 * 7]) {
213cabdff1aSopenharmony_ci        b0 += W7 * col[8 * 7];
214cabdff1aSopenharmony_ci        b1 -= W5 * col[8 * 7];
215cabdff1aSopenharmony_ci        b2 += W3 * col[8 * 7];
216cabdff1aSopenharmony_ci        b3 -= W1 * col[8 * 7];
217cabdff1aSopenharmony_ci    }
218cabdff1aSopenharmony_ci
219cabdff1aSopenharmony_ci    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
220cabdff1aSopenharmony_ci    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
221cabdff1aSopenharmony_ci    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
222cabdff1aSopenharmony_ci    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
223cabdff1aSopenharmony_ci    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
224cabdff1aSopenharmony_ci    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
225cabdff1aSopenharmony_ci    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
226cabdff1aSopenharmony_ci    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
227cabdff1aSopenharmony_ci}
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci/* If all rows but the first one are zero after row transformation,
230cabdff1aSopenharmony_ci   all rows will be identical after column transformation.  */
231cabdff1aSopenharmony_cistatic inline void idct_col2(int16_t *col)
232cabdff1aSopenharmony_ci{
233cabdff1aSopenharmony_ci    int i;
234cabdff1aSopenharmony_ci    uint64_t l, r;
235cabdff1aSopenharmony_ci
236cabdff1aSopenharmony_ci    for (i = 0; i < 8; ++i) {
237cabdff1aSopenharmony_ci        int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
238cabdff1aSopenharmony_ci
239cabdff1aSopenharmony_ci        a0 *= W4;
240cabdff1aSopenharmony_ci        col[i] = a0 >> COL_SHIFT;
241cabdff1aSopenharmony_ci    }
242cabdff1aSopenharmony_ci
243cabdff1aSopenharmony_ci    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
244cabdff1aSopenharmony_ci    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
245cabdff1aSopenharmony_ci    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
246cabdff1aSopenharmony_ci    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
247cabdff1aSopenharmony_ci    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
248cabdff1aSopenharmony_ci    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
249cabdff1aSopenharmony_ci    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
250cabdff1aSopenharmony_ci    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
251cabdff1aSopenharmony_ci}
252cabdff1aSopenharmony_ci
253cabdff1aSopenharmony_civoid ff_simple_idct_axp(int16_t *block)
254cabdff1aSopenharmony_ci{
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci    int i;
257cabdff1aSopenharmony_ci    int rowsZero = 1;           /* all rows except row 0 zero */
258cabdff1aSopenharmony_ci    int rowsConstant = 1;       /* all rows consist of a constant value */
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
261cabdff1aSopenharmony_ci        int sparseness = idct_row(block + 8 * i);
262cabdff1aSopenharmony_ci
263cabdff1aSopenharmony_ci        if (i > 0 && sparseness > 0)
264cabdff1aSopenharmony_ci            rowsZero = 0;
265cabdff1aSopenharmony_ci        if (sparseness == 2)
266cabdff1aSopenharmony_ci            rowsConstant = 0;
267cabdff1aSopenharmony_ci    }
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci    if (rowsZero) {
270cabdff1aSopenharmony_ci        idct_col2(block);
271cabdff1aSopenharmony_ci    } else if (rowsConstant) {
272cabdff1aSopenharmony_ci        idct_col(block);
273cabdff1aSopenharmony_ci        for (i = 0; i < 8; i += 2) {
274cabdff1aSopenharmony_ci            uint64_t v = (uint16_t) block[0];
275cabdff1aSopenharmony_ci            uint64_t w = (uint16_t) block[8];
276cabdff1aSopenharmony_ci
277cabdff1aSopenharmony_ci            v |= v << 16;
278cabdff1aSopenharmony_ci            w |= w << 16;
279cabdff1aSopenharmony_ci            v |= v << 32;
280cabdff1aSopenharmony_ci            w |= w << 32;
281cabdff1aSopenharmony_ci            stq(v, block + 0 * 4);
282cabdff1aSopenharmony_ci            stq(v, block + 1 * 4);
283cabdff1aSopenharmony_ci            stq(w, block + 2 * 4);
284cabdff1aSopenharmony_ci            stq(w, block + 3 * 4);
285cabdff1aSopenharmony_ci            block += 4 * 4;
286cabdff1aSopenharmony_ci        }
287cabdff1aSopenharmony_ci    } else {
288cabdff1aSopenharmony_ci        for (i = 0; i < 8; i++)
289cabdff1aSopenharmony_ci            idct_col(block + i);
290cabdff1aSopenharmony_ci    }
291cabdff1aSopenharmony_ci}
292cabdff1aSopenharmony_ci
293cabdff1aSopenharmony_civoid ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
294cabdff1aSopenharmony_ci{
295cabdff1aSopenharmony_ci    ff_simple_idct_axp(block);
296cabdff1aSopenharmony_ci    put_pixels_clamped_axp_p(block, dest, line_size);
297cabdff1aSopenharmony_ci}
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_civoid ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
300cabdff1aSopenharmony_ci{
301cabdff1aSopenharmony_ci    ff_simple_idct_axp(block);
302cabdff1aSopenharmony_ci    add_pixels_clamped_axp_p(block, dest, line_size);
303cabdff1aSopenharmony_ci}
304