1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Simple IDCT
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * This file is part of FFmpeg.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci * Lesser General Public License for more details.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci */
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci/**
24cabdff1aSopenharmony_ci * @file
25cabdff1aSopenharmony_ci * simpleidct in C.
26cabdff1aSopenharmony_ci */
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
29cabdff1aSopenharmony_ci#include "mathops.h"
30cabdff1aSopenharmony_ci#include "simple_idct.h"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci#define IN_IDCT_DEPTH 16
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_ci#define BIT_DEPTH 8
35cabdff1aSopenharmony_ci#include "simple_idct_template.c"
36cabdff1aSopenharmony_ci#undef BIT_DEPTH
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci#define BIT_DEPTH 10
39cabdff1aSopenharmony_ci#include "simple_idct_template.c"
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci#define EXTRA_SHIFT  2
42cabdff1aSopenharmony_ci#include "simple_idct_template.c"
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci#undef EXTRA_SHIFT
45cabdff1aSopenharmony_ci#undef BIT_DEPTH
46cabdff1aSopenharmony_ci
47cabdff1aSopenharmony_ci#define BIT_DEPTH 12
48cabdff1aSopenharmony_ci#include "simple_idct_template.c"
49cabdff1aSopenharmony_ci#undef BIT_DEPTH
50cabdff1aSopenharmony_ci#undef IN_IDCT_DEPTH
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci#define IN_IDCT_DEPTH 32
53cabdff1aSopenharmony_ci#define BIT_DEPTH 10
54cabdff1aSopenharmony_ci#include "simple_idct_template.c"
55cabdff1aSopenharmony_ci#undef BIT_DEPTH
56cabdff1aSopenharmony_ci#undef IN_IDCT_DEPTH
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci/* 2x4x8 idct */
59cabdff1aSopenharmony_ci
60cabdff1aSopenharmony_ci#define CN_SHIFT 12
61cabdff1aSopenharmony_ci#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
62cabdff1aSopenharmony_ci#define C1 C_FIX(0.6532814824)
63cabdff1aSopenharmony_ci#define C2 C_FIX(0.2705980501)
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
66cabdff1aSopenharmony_ci   and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
67cabdff1aSopenharmony_ci#define C_SHIFT (4+1+12)
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_cistatic inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
70cabdff1aSopenharmony_ci{
71cabdff1aSopenharmony_ci    int c0, c1, c2, c3, a0, a1, a2, a3;
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci    a0 = col[8*0];
74cabdff1aSopenharmony_ci    a1 = col[8*2];
75cabdff1aSopenharmony_ci    a2 = col[8*4];
76cabdff1aSopenharmony_ci    a3 = col[8*6];
77cabdff1aSopenharmony_ci    c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
78cabdff1aSopenharmony_ci    c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
79cabdff1aSopenharmony_ci    c1 = a1 * C1 + a3 * C2;
80cabdff1aSopenharmony_ci    c3 = a1 * C2 - a3 * C1;
81cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT);
82cabdff1aSopenharmony_ci    dest += line_size;
83cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT);
84cabdff1aSopenharmony_ci    dest += line_size;
85cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT);
86cabdff1aSopenharmony_ci    dest += line_size;
87cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT);
88cabdff1aSopenharmony_ci}
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci#define BF(k) \
91cabdff1aSopenharmony_ci{\
92cabdff1aSopenharmony_ci    int a0, a1;\
93cabdff1aSopenharmony_ci    a0 = ptr[k];\
94cabdff1aSopenharmony_ci    a1 = ptr[8 + k];\
95cabdff1aSopenharmony_ci    ptr[k] = a0 + a1;\
96cabdff1aSopenharmony_ci    ptr[8 + k] = a0 - a1;\
97cabdff1aSopenharmony_ci}
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci/* only used by DV codec. The input must be interlaced. 128 is added
100cabdff1aSopenharmony_ci   to the pixels before clamping to avoid systematic error
101cabdff1aSopenharmony_ci   (1024*sqrt(2)) offset would be needed otherwise. */
102cabdff1aSopenharmony_ci/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
103cabdff1aSopenharmony_ci   compensate the extra butterfly stage - I don't have the full DV
104cabdff1aSopenharmony_ci   specification */
105cabdff1aSopenharmony_civoid ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
106cabdff1aSopenharmony_ci{
107cabdff1aSopenharmony_ci    int i;
108cabdff1aSopenharmony_ci    int16_t *ptr;
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci    /* butterfly */
111cabdff1aSopenharmony_ci    ptr = block;
112cabdff1aSopenharmony_ci    for(i=0;i<4;i++) {
113cabdff1aSopenharmony_ci        BF(0);
114cabdff1aSopenharmony_ci        BF(1);
115cabdff1aSopenharmony_ci        BF(2);
116cabdff1aSopenharmony_ci        BF(3);
117cabdff1aSopenharmony_ci        BF(4);
118cabdff1aSopenharmony_ci        BF(5);
119cabdff1aSopenharmony_ci        BF(6);
120cabdff1aSopenharmony_ci        BF(7);
121cabdff1aSopenharmony_ci        ptr += 2 * 8;
122cabdff1aSopenharmony_ci    }
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci    /* IDCT8 on each line */
125cabdff1aSopenharmony_ci    for(i=0; i<8; i++) {
126cabdff1aSopenharmony_ci        idctRowCondDC_int16_8bit(block + i*8, 0);
127cabdff1aSopenharmony_ci    }
128cabdff1aSopenharmony_ci
129cabdff1aSopenharmony_ci    /* IDCT4 and store */
130cabdff1aSopenharmony_ci    for(i=0;i<8;i++) {
131cabdff1aSopenharmony_ci        idct4col_put(dest + i, 2 * line_size, block + i);
132cabdff1aSopenharmony_ci        idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
133cabdff1aSopenharmony_ci    }
134cabdff1aSopenharmony_ci}
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci/* 8x4 & 4x8 WMV2 IDCT */
137cabdff1aSopenharmony_ci#undef CN_SHIFT
138cabdff1aSopenharmony_ci#undef C_SHIFT
139cabdff1aSopenharmony_ci#undef C_FIX
140cabdff1aSopenharmony_ci#undef C1
141cabdff1aSopenharmony_ci#undef C2
142cabdff1aSopenharmony_ci#define CN_SHIFT 12
143cabdff1aSopenharmony_ci#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5))
144cabdff1aSopenharmony_ci#define C1 C_FIX(0.6532814824)
145cabdff1aSopenharmony_ci#define C2 C_FIX(0.2705980501)
146cabdff1aSopenharmony_ci#define C3 C_FIX(0.5)
147cabdff1aSopenharmony_ci#define C_SHIFT (4+1+12)
148cabdff1aSopenharmony_cistatic inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
149cabdff1aSopenharmony_ci{
150cabdff1aSopenharmony_ci    int c0, c1, c2, c3, a0, a1, a2, a3;
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_ci    a0 = col[8*0];
153cabdff1aSopenharmony_ci    a1 = col[8*1];
154cabdff1aSopenharmony_ci    a2 = col[8*2];
155cabdff1aSopenharmony_ci    a3 = col[8*3];
156cabdff1aSopenharmony_ci    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
157cabdff1aSopenharmony_ci    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
158cabdff1aSopenharmony_ci    c1 = a1 * C1 + a3 * C2;
159cabdff1aSopenharmony_ci    c3 = a1 * C2 - a3 * C1;
160cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
161cabdff1aSopenharmony_ci    dest += line_size;
162cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
163cabdff1aSopenharmony_ci    dest += line_size;
164cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
165cabdff1aSopenharmony_ci    dest += line_size;
166cabdff1aSopenharmony_ci    dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
167cabdff1aSopenharmony_ci}
168cabdff1aSopenharmony_ci
169cabdff1aSopenharmony_ci#define RN_SHIFT 15
170cabdff1aSopenharmony_ci#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5))
171cabdff1aSopenharmony_ci#define R1 R_FIX(0.6532814824)
172cabdff1aSopenharmony_ci#define R2 R_FIX(0.2705980501)
173cabdff1aSopenharmony_ci#define R3 R_FIX(0.5)
174cabdff1aSopenharmony_ci#define R_SHIFT 11
175cabdff1aSopenharmony_cistatic inline void idct4row(int16_t *row)
176cabdff1aSopenharmony_ci{
177cabdff1aSopenharmony_ci    unsigned c0, c1, c2, c3;
178cabdff1aSopenharmony_ci    int a0, a1, a2, a3;
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci    a0 = row[0];
181cabdff1aSopenharmony_ci    a1 = row[1];
182cabdff1aSopenharmony_ci    a2 = row[2];
183cabdff1aSopenharmony_ci    a3 = row[3];
184cabdff1aSopenharmony_ci    c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
185cabdff1aSopenharmony_ci    c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
186cabdff1aSopenharmony_ci    c1 = a1 * R1 + a3 * R2;
187cabdff1aSopenharmony_ci    c3 = a1 * R2 - a3 * R1;
188cabdff1aSopenharmony_ci    row[0]= (c0 + c1) >> R_SHIFT;
189cabdff1aSopenharmony_ci    row[1]= (c2 + c3) >> R_SHIFT;
190cabdff1aSopenharmony_ci    row[2]= (c2 - c3) >> R_SHIFT;
191cabdff1aSopenharmony_ci    row[3]= (c0 - c1) >> R_SHIFT;
192cabdff1aSopenharmony_ci}
193cabdff1aSopenharmony_ci
194cabdff1aSopenharmony_civoid ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
195cabdff1aSopenharmony_ci{
196cabdff1aSopenharmony_ci    int i;
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci    /* IDCT8 on each line */
199cabdff1aSopenharmony_ci    for(i=0; i<4; i++) {
200cabdff1aSopenharmony_ci        idctRowCondDC_int16_8bit(block + i*8, 0);
201cabdff1aSopenharmony_ci    }
202cabdff1aSopenharmony_ci
203cabdff1aSopenharmony_ci    /* IDCT4 and store */
204cabdff1aSopenharmony_ci    for(i=0;i<8;i++) {
205cabdff1aSopenharmony_ci        idct4col_add(dest + i, line_size, block + i);
206cabdff1aSopenharmony_ci    }
207cabdff1aSopenharmony_ci}
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_civoid ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
210cabdff1aSopenharmony_ci{
211cabdff1aSopenharmony_ci    int i;
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_ci    /* IDCT4 on each line */
214cabdff1aSopenharmony_ci    for(i=0; i<8; i++) {
215cabdff1aSopenharmony_ci        idct4row(block + i*8);
216cabdff1aSopenharmony_ci    }
217cabdff1aSopenharmony_ci
218cabdff1aSopenharmony_ci    /* IDCT8 and store */
219cabdff1aSopenharmony_ci    for(i=0; i<4; i++){
220cabdff1aSopenharmony_ci        idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
221cabdff1aSopenharmony_ci    }
222cabdff1aSopenharmony_ci}
223cabdff1aSopenharmony_ci
224cabdff1aSopenharmony_civoid ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
225cabdff1aSopenharmony_ci{
226cabdff1aSopenharmony_ci    int i;
227cabdff1aSopenharmony_ci
228cabdff1aSopenharmony_ci    /* IDCT4 on each line */
229cabdff1aSopenharmony_ci    for(i=0; i<4; i++) {
230cabdff1aSopenharmony_ci        idct4row(block + i*8);
231cabdff1aSopenharmony_ci    }
232cabdff1aSopenharmony_ci
233cabdff1aSopenharmony_ci    /* IDCT4 and store */
234cabdff1aSopenharmony_ci    for(i=0; i<4; i++){
235cabdff1aSopenharmony_ci        idct4col_add(dest + i, line_size, block + i);
236cabdff1aSopenharmony_ci    }
237cabdff1aSopenharmony_ci}
238cabdff1aSopenharmony_ci
239cabdff1aSopenharmony_civoid ff_prores_idct_10(int16_t *block, const int16_t *qmat)
240cabdff1aSopenharmony_ci{
241cabdff1aSopenharmony_ci    int i;
242cabdff1aSopenharmony_ci
243cabdff1aSopenharmony_ci    for (i = 0; i < 64; i++)
244cabdff1aSopenharmony_ci        block[i] *= qmat[i];
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++)
247cabdff1aSopenharmony_ci        idctRowCondDC_extrashift_10(block + i*8, 2);
248cabdff1aSopenharmony_ci
249cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
250cabdff1aSopenharmony_ci        block[i] += 8192;
251cabdff1aSopenharmony_ci        idctSparseCol_extrashift_10(block + i);
252cabdff1aSopenharmony_ci    }
253cabdff1aSopenharmony_ci}
254cabdff1aSopenharmony_ci
255cabdff1aSopenharmony_civoid ff_prores_idct_12(int16_t *block, const int16_t *qmat)
256cabdff1aSopenharmony_ci{
257cabdff1aSopenharmony_ci    int i;
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_ci    for (i = 0; i < 64; i++)
260cabdff1aSopenharmony_ci        block[i] *= qmat[i];
261cabdff1aSopenharmony_ci
262cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++)
263cabdff1aSopenharmony_ci        idctRowCondDC_int16_12bit(block + i*8, 0);
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
266cabdff1aSopenharmony_ci        block[i] += 8192;
267cabdff1aSopenharmony_ci        idctSparseCol_int16_12bit(block + i);
268cabdff1aSopenharmony_ci    }
269cabdff1aSopenharmony_ci}
270