xref: /third_party/ffmpeg/libavcodec/tests/dct.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * (c) 2001 Fabrice Bellard
3cabdff1aSopenharmony_ci *     2007 Marc Hoffman <marc.hoffman@analog.com>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci/**
23cabdff1aSopenharmony_ci * @file
24cabdff1aSopenharmony_ci * DCT test (c) 2001 Fabrice Bellard
25cabdff1aSopenharmony_ci * Started from sample code by Juan J. Sierralta P.
26cabdff1aSopenharmony_ci */
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci#include "config.h"
29cabdff1aSopenharmony_ci#include "config_components.h"
30cabdff1aSopenharmony_ci#include <stdlib.h>
31cabdff1aSopenharmony_ci#include <stdio.h>
32cabdff1aSopenharmony_ci#include <string.h>
33cabdff1aSopenharmony_ci#if HAVE_UNISTD_H
34cabdff1aSopenharmony_ci#include <unistd.h>
35cabdff1aSopenharmony_ci#endif
36cabdff1aSopenharmony_ci#include <math.h>
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
39cabdff1aSopenharmony_ci#include "libavutil/common.h"
40cabdff1aSopenharmony_ci#include "libavutil/internal.h"
41cabdff1aSopenharmony_ci#include "libavutil/lfg.h"
42cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
43cabdff1aSopenharmony_ci#include "libavutil/time.h"
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ci#include "libavcodec/dct.h"
46cabdff1aSopenharmony_ci#include "libavcodec/idctdsp.h"
47cabdff1aSopenharmony_ci#include "libavcodec/simple_idct.h"
48cabdff1aSopenharmony_ci#include "libavcodec/xvididct.h"
49cabdff1aSopenharmony_ci#include "libavcodec/aandcttab.h"
50cabdff1aSopenharmony_ci#include "libavcodec/faandct.h"
51cabdff1aSopenharmony_ci#include "libavcodec/faanidct.h"
52cabdff1aSopenharmony_ci#include "libavcodec/dctref.h"
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_cistruct algo {
55cabdff1aSopenharmony_ci    const char *name;
56cabdff1aSopenharmony_ci    void (*func)(int16_t *block);
57cabdff1aSopenharmony_ci    enum idct_permutation_type perm_type;
58cabdff1aSopenharmony_ci    int cpu_flag;
59cabdff1aSopenharmony_ci    int nonspec;
60cabdff1aSopenharmony_ci};
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_cistatic const struct algo fdct_tab[] = {
63cabdff1aSopenharmony_ci    { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
64cabdff1aSopenharmony_ci    { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
65cabdff1aSopenharmony_ci    { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
66cabdff1aSopenharmony_ci#if CONFIG_FAANDCT
67cabdff1aSopenharmony_ci    { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
68cabdff1aSopenharmony_ci#endif /* CONFIG_FAANDCT */
69cabdff1aSopenharmony_ci};
70cabdff1aSopenharmony_ci
71cabdff1aSopenharmony_cistatic void ff_prores_idct_wrap(int16_t *dst){
72cabdff1aSopenharmony_ci    LOCAL_ALIGNED(16, int16_t, qmat, [64]);
73cabdff1aSopenharmony_ci    int i;
74cabdff1aSopenharmony_ci
75cabdff1aSopenharmony_ci    for(i=0; i<64; i++){
76cabdff1aSopenharmony_ci        qmat[i]=4;
77cabdff1aSopenharmony_ci    }
78cabdff1aSopenharmony_ci    ff_prores_idct_10(dst, qmat);
79cabdff1aSopenharmony_ci    for(i=0; i<64; i++) {
80cabdff1aSopenharmony_ci         dst[i] -= 512;
81cabdff1aSopenharmony_ci    }
82cabdff1aSopenharmony_ci}
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_cistatic const struct algo idct_tab[] = {
85cabdff1aSopenharmony_ci    { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
86cabdff1aSopenharmony_ci    { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
87cabdff1aSopenharmony_ci    { "SIMPLE-C",    ff_simple_idct_int16_8bit,     FF_IDCT_PERM_NONE },
88cabdff1aSopenharmony_ci    { "SIMPLE-C10",  ff_simple_idct_int16_10bit,    FF_IDCT_PERM_NONE },
89cabdff1aSopenharmony_ci    { "SIMPLE-C12",  ff_simple_idct_int16_12bit,    FF_IDCT_PERM_NONE, 0, 1 },
90cabdff1aSopenharmony_ci    { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
91cabdff1aSopenharmony_ci#if CONFIG_FAANIDCT
92cabdff1aSopenharmony_ci    { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
93cabdff1aSopenharmony_ci#endif /* CONFIG_FAANIDCT */
94cabdff1aSopenharmony_ci#if CONFIG_MPEG4_DECODER
95cabdff1aSopenharmony_ci    { "XVID",        ff_xvid_idct,         FF_IDCT_PERM_NONE, 0, 1 },
96cabdff1aSopenharmony_ci#endif /* CONFIG_MPEG4_DECODER */
97cabdff1aSopenharmony_ci};
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci#if ARCH_AARCH64
100cabdff1aSopenharmony_ci#include "aarch64/dct.c"
101cabdff1aSopenharmony_ci#elif ARCH_ARM
102cabdff1aSopenharmony_ci#include "arm/dct.c"
103cabdff1aSopenharmony_ci#elif ARCH_PPC
104cabdff1aSopenharmony_ci#include "ppc/dct.c"
105cabdff1aSopenharmony_ci#elif ARCH_X86
106cabdff1aSopenharmony_ci#include "x86/dct.c"
107cabdff1aSopenharmony_ci#else
108cabdff1aSopenharmony_cistatic const struct algo fdct_tab_arch[] = { { 0 } };
109cabdff1aSopenharmony_cistatic const struct algo idct_tab_arch[] = { { 0 } };
110cabdff1aSopenharmony_ci#endif
111cabdff1aSopenharmony_ci
112cabdff1aSopenharmony_ci#define AANSCALE_BITS 12
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci#define NB_ITS 20000
115cabdff1aSopenharmony_ci#define NB_ITS_SPEED 50000
116cabdff1aSopenharmony_ci
117cabdff1aSopenharmony_ciDECLARE_ALIGNED(16, static int16_t, block)[64];
118cabdff1aSopenharmony_ciDECLARE_ALIGNED(8,  static int16_t, block1)[64];
119cabdff1aSopenharmony_ci
120cabdff1aSopenharmony_cistatic void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
121cabdff1aSopenharmony_ci{
122cabdff1aSopenharmony_ci    int i, j;
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci    memset(block, 0, 64 * sizeof(*block));
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ci    switch (test) {
127cabdff1aSopenharmony_ci    case 0:
128cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
129cabdff1aSopenharmony_ci            block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
130cabdff1aSopenharmony_ci        if (is_idct) {
131cabdff1aSopenharmony_ci            ff_ref_fdct(block);
132cabdff1aSopenharmony_ci            for (i = 0; i < 64; i++)
133cabdff1aSopenharmony_ci                block[i] >>= 3;
134cabdff1aSopenharmony_ci        }
135cabdff1aSopenharmony_ci        break;
136cabdff1aSopenharmony_ci    case 1:
137cabdff1aSopenharmony_ci        j = av_lfg_get(prng) % 10 + 1;
138cabdff1aSopenharmony_ci        for (i = 0; i < j; i++) {
139cabdff1aSopenharmony_ci            int idx = av_lfg_get(prng) % 64;
140cabdff1aSopenharmony_ci            block[idx] = av_lfg_get(prng) % (2*vals) -vals;
141cabdff1aSopenharmony_ci        }
142cabdff1aSopenharmony_ci        break;
143cabdff1aSopenharmony_ci    case 2:
144cabdff1aSopenharmony_ci        block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
145cabdff1aSopenharmony_ci        block[63] = (block[0] & 1) ^ 1;
146cabdff1aSopenharmony_ci        break;
147cabdff1aSopenharmony_ci    }
148cabdff1aSopenharmony_ci}
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_cistatic void permute(int16_t dst[64], const int16_t src[64],
151cabdff1aSopenharmony_ci                    enum idct_permutation_type perm_type)
152cabdff1aSopenharmony_ci{
153cabdff1aSopenharmony_ci    int i;
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_ci#if ARCH_X86
156cabdff1aSopenharmony_ci    if (permute_x86(dst, src, perm_type))
157cabdff1aSopenharmony_ci        return;
158cabdff1aSopenharmony_ci#endif
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci    switch (perm_type) {
161cabdff1aSopenharmony_ci    case FF_IDCT_PERM_LIBMPEG2:
162cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
163cabdff1aSopenharmony_ci            dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
164cabdff1aSopenharmony_ci        break;
165cabdff1aSopenharmony_ci    case FF_IDCT_PERM_PARTTRANS:
166cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
167cabdff1aSopenharmony_ci            dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
168cabdff1aSopenharmony_ci        break;
169cabdff1aSopenharmony_ci    case FF_IDCT_PERM_TRANSPOSE:
170cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
171cabdff1aSopenharmony_ci            dst[(i>>3) | ((i<<3)&0x38)] = src[i];
172cabdff1aSopenharmony_ci        break;
173cabdff1aSopenharmony_ci    default:
174cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
175cabdff1aSopenharmony_ci            dst[i] = src[i];
176cabdff1aSopenharmony_ci        break;
177cabdff1aSopenharmony_ci    }
178cabdff1aSopenharmony_ci}
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_cistatic int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
181cabdff1aSopenharmony_ci{
182cabdff1aSopenharmony_ci    void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
183cabdff1aSopenharmony_ci    int it, i, scale;
184cabdff1aSopenharmony_ci    int err_inf, v;
185cabdff1aSopenharmony_ci    int64_t err2, ti, ti1, it1, err_sum = 0;
186cabdff1aSopenharmony_ci    int64_t sysErr[64], sysErrMax = 0;
187cabdff1aSopenharmony_ci    int64_t err2_matrix[64], err2_max = 0;
188cabdff1aSopenharmony_ci    int maxout = 0;
189cabdff1aSopenharmony_ci    int blockSumErrMax = 0, blockSumErr;
190cabdff1aSopenharmony_ci    AVLFG prng;
191cabdff1aSopenharmony_ci    const int vals=1<<bits;
192cabdff1aSopenharmony_ci    double omse, ome;
193cabdff1aSopenharmony_ci    int spec_err;
194cabdff1aSopenharmony_ci
195cabdff1aSopenharmony_ci    av_lfg_init(&prng, 1);
196cabdff1aSopenharmony_ci
197cabdff1aSopenharmony_ci    err_inf = 0;
198cabdff1aSopenharmony_ci    err2 = 0;
199cabdff1aSopenharmony_ci    for (i = 0; i < 64; i++)
200cabdff1aSopenharmony_ci        err2_matrix[i] = sysErr[i] = 0;
201cabdff1aSopenharmony_ci    for (it = 0; it < NB_ITS; it++) {
202cabdff1aSopenharmony_ci        init_block(block1, test, is_idct, &prng, vals);
203cabdff1aSopenharmony_ci        permute(block, block1, dct->perm_type);
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci        dct->func(block);
206cabdff1aSopenharmony_ci        emms_c();
207cabdff1aSopenharmony_ci
208cabdff1aSopenharmony_ci        if (!strcmp(dct->name, "IJG-AAN-INT")) {
209cabdff1aSopenharmony_ci            for (i = 0; i < 64; i++) {
210cabdff1aSopenharmony_ci                scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
211cabdff1aSopenharmony_ci                block[i] = (block[i] * scale) >> AANSCALE_BITS;
212cabdff1aSopenharmony_ci            }
213cabdff1aSopenharmony_ci        }
214cabdff1aSopenharmony_ci
215cabdff1aSopenharmony_ci        ref(block1);
216cabdff1aSopenharmony_ci        if (!strcmp(dct->name, "PR-SSE2"))
217cabdff1aSopenharmony_ci            for (i = 0; i < 64; i++)
218cabdff1aSopenharmony_ci                block1[i] = av_clip(block1[i], 4-512, 1019-512);
219cabdff1aSopenharmony_ci
220cabdff1aSopenharmony_ci        blockSumErr = 0;
221cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++) {
222cabdff1aSopenharmony_ci            int err = block[i] - block1[i];
223cabdff1aSopenharmony_ci            err_sum += err;
224cabdff1aSopenharmony_ci            v = abs(err);
225cabdff1aSopenharmony_ci            if (v > err_inf)
226cabdff1aSopenharmony_ci                err_inf = v;
227cabdff1aSopenharmony_ci            err2_matrix[i] += v * v;
228cabdff1aSopenharmony_ci            err2 += v * v;
229cabdff1aSopenharmony_ci            sysErr[i] += block[i] - block1[i];
230cabdff1aSopenharmony_ci            blockSumErr += v;
231cabdff1aSopenharmony_ci            if (abs(block[i]) > maxout)
232cabdff1aSopenharmony_ci                maxout = abs(block[i]);
233cabdff1aSopenharmony_ci        }
234cabdff1aSopenharmony_ci        if (blockSumErrMax < blockSumErr)
235cabdff1aSopenharmony_ci            blockSumErrMax = blockSumErr;
236cabdff1aSopenharmony_ci    }
237cabdff1aSopenharmony_ci    for (i = 0; i < 64; i++) {
238cabdff1aSopenharmony_ci        sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
239cabdff1aSopenharmony_ci        err2_max  = FFMAX(err2_max , FFABS(err2_matrix[i]));
240cabdff1aSopenharmony_ci    }
241cabdff1aSopenharmony_ci
242cabdff1aSopenharmony_ci    for (i = 0; i < 64; i++) {
243cabdff1aSopenharmony_ci        if (i % 8 == 0)
244cabdff1aSopenharmony_ci            printf("\n");
245cabdff1aSopenharmony_ci        printf("%7d ", (int) sysErr[i]);
246cabdff1aSopenharmony_ci    }
247cabdff1aSopenharmony_ci    printf("\n");
248cabdff1aSopenharmony_ci
249cabdff1aSopenharmony_ci    omse = (double) err2 / NB_ITS / 64;
250cabdff1aSopenharmony_ci    ome  = (double) err_sum / NB_ITS / 64;
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci    spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
253cabdff1aSopenharmony_ci    if (test < 2)
254cabdff1aSopenharmony_ci        spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015);
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci    printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
257cabdff1aSopenharmony_ci           is_idct ? "IDCT" : "DCT", dct->name, err_inf,
258cabdff1aSopenharmony_ci           omse, ome, (double) sysErrMax / NB_ITS,
259cabdff1aSopenharmony_ci           maxout, blockSumErrMax);
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_ci    if (spec_err && !dct->nonspec) {
262cabdff1aSopenharmony_ci        printf("Failed!\n");
263cabdff1aSopenharmony_ci        return 1;
264cabdff1aSopenharmony_ci    }
265cabdff1aSopenharmony_ci
266cabdff1aSopenharmony_ci    if (!speed)
267cabdff1aSopenharmony_ci        return 0;
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci    /* speed test */
270cabdff1aSopenharmony_ci
271cabdff1aSopenharmony_ci    init_block(block, test, is_idct, &prng, vals);
272cabdff1aSopenharmony_ci    permute(block1, block, dct->perm_type);
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_ci    ti = av_gettime_relative();
275cabdff1aSopenharmony_ci    it1 = 0;
276cabdff1aSopenharmony_ci    do {
277cabdff1aSopenharmony_ci        for (it = 0; it < NB_ITS_SPEED; it++) {
278cabdff1aSopenharmony_ci            memcpy(block, block1, sizeof(block));
279cabdff1aSopenharmony_ci            dct->func(block);
280cabdff1aSopenharmony_ci        }
281cabdff1aSopenharmony_ci        emms_c();
282cabdff1aSopenharmony_ci        it1 += NB_ITS_SPEED;
283cabdff1aSopenharmony_ci        ti1 = av_gettime_relative() - ti;
284cabdff1aSopenharmony_ci    } while (ti1 < 1000000);
285cabdff1aSopenharmony_ci
286cabdff1aSopenharmony_ci    printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
287cabdff1aSopenharmony_ci           (double) it1 * 1000.0 / (double) ti1);
288cabdff1aSopenharmony_ci
289cabdff1aSopenharmony_ci    return 0;
290cabdff1aSopenharmony_ci}
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
293cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
294cabdff1aSopenharmony_ci
295cabdff1aSopenharmony_cistatic void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
296cabdff1aSopenharmony_ci{
297cabdff1aSopenharmony_ci    static int init;
298cabdff1aSopenharmony_ci    static double c8[8][8];
299cabdff1aSopenharmony_ci    static double c4[4][4];
300cabdff1aSopenharmony_ci    double block1[64], block2[64], block3[64];
301cabdff1aSopenharmony_ci    double s, sum, v;
302cabdff1aSopenharmony_ci    int i, j, k;
303cabdff1aSopenharmony_ci
304cabdff1aSopenharmony_ci    if (!init) {
305cabdff1aSopenharmony_ci        init = 1;
306cabdff1aSopenharmony_ci
307cabdff1aSopenharmony_ci        for (i = 0; i < 8; i++) {
308cabdff1aSopenharmony_ci            sum = 0;
309cabdff1aSopenharmony_ci            for (j = 0; j < 8; j++) {
310cabdff1aSopenharmony_ci                s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
311cabdff1aSopenharmony_ci                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
312cabdff1aSopenharmony_ci                sum += c8[i][j] * c8[i][j];
313cabdff1aSopenharmony_ci            }
314cabdff1aSopenharmony_ci        }
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_ci        for (i = 0; i < 4; i++) {
317cabdff1aSopenharmony_ci            sum = 0;
318cabdff1aSopenharmony_ci            for (j = 0; j < 4; j++) {
319cabdff1aSopenharmony_ci                s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
320cabdff1aSopenharmony_ci                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
321cabdff1aSopenharmony_ci                sum += c4[i][j] * c4[i][j];
322cabdff1aSopenharmony_ci            }
323cabdff1aSopenharmony_ci        }
324cabdff1aSopenharmony_ci    }
325cabdff1aSopenharmony_ci
326cabdff1aSopenharmony_ci    /* butterfly */
327cabdff1aSopenharmony_ci    s = 0.5 * sqrt(2.0);
328cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
329cabdff1aSopenharmony_ci        for (j = 0; j < 8; j++) {
330cabdff1aSopenharmony_ci            block1[8 * (2 * i) + j] =
331cabdff1aSopenharmony_ci                (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
332cabdff1aSopenharmony_ci            block1[8 * (2 * i + 1) + j] =
333cabdff1aSopenharmony_ci                (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
334cabdff1aSopenharmony_ci        }
335cabdff1aSopenharmony_ci    }
336cabdff1aSopenharmony_ci
337cabdff1aSopenharmony_ci    /* idct8 on lines */
338cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
339cabdff1aSopenharmony_ci        for (j = 0; j < 8; j++) {
340cabdff1aSopenharmony_ci            sum = 0;
341cabdff1aSopenharmony_ci            for (k = 0; k < 8; k++)
342cabdff1aSopenharmony_ci                sum += c8[k][j] * block1[8 * i + k];
343cabdff1aSopenharmony_ci            block2[8 * i + j] = sum;
344cabdff1aSopenharmony_ci        }
345cabdff1aSopenharmony_ci    }
346cabdff1aSopenharmony_ci
347cabdff1aSopenharmony_ci    /* idct4 */
348cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
349cabdff1aSopenharmony_ci        for (j = 0; j < 4; j++) {
350cabdff1aSopenharmony_ci            /* top */
351cabdff1aSopenharmony_ci            sum = 0;
352cabdff1aSopenharmony_ci            for (k = 0; k < 4; k++)
353cabdff1aSopenharmony_ci                sum += c4[k][j] * block2[8 * (2 * k) + i];
354cabdff1aSopenharmony_ci            block3[8 * (2 * j) + i] = sum;
355cabdff1aSopenharmony_ci
356cabdff1aSopenharmony_ci            /* bottom */
357cabdff1aSopenharmony_ci            sum = 0;
358cabdff1aSopenharmony_ci            for (k = 0; k < 4; k++)
359cabdff1aSopenharmony_ci                sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
360cabdff1aSopenharmony_ci            block3[8 * (2 * j + 1) + i] = sum;
361cabdff1aSopenharmony_ci        }
362cabdff1aSopenharmony_ci    }
363cabdff1aSopenharmony_ci
364cabdff1aSopenharmony_ci    /* clamp and store the result */
365cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
366cabdff1aSopenharmony_ci        for (j = 0; j < 8; j++) {
367cabdff1aSopenharmony_ci            v = block3[8 * i + j];
368cabdff1aSopenharmony_ci            if      (v < 0)   v = 0;
369cabdff1aSopenharmony_ci            else if (v > 255) v = 255;
370cabdff1aSopenharmony_ci            dest[i * linesize + j] = (int) rint(v);
371cabdff1aSopenharmony_ci        }
372cabdff1aSopenharmony_ci    }
373cabdff1aSopenharmony_ci}
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_cistatic void idct248_error(const char *name,
376cabdff1aSopenharmony_ci                          void (*idct248_put)(uint8_t *dest,
377cabdff1aSopenharmony_ci                                              ptrdiff_t line_size,
378cabdff1aSopenharmony_ci                                              int16_t *block),
379cabdff1aSopenharmony_ci                          int speed)
380cabdff1aSopenharmony_ci{
381cabdff1aSopenharmony_ci    int it, i, it1, ti, ti1, err_max, v;
382cabdff1aSopenharmony_ci    AVLFG prng;
383cabdff1aSopenharmony_ci
384cabdff1aSopenharmony_ci    av_lfg_init(&prng, 1);
385cabdff1aSopenharmony_ci
386cabdff1aSopenharmony_ci    /* just one test to see if code is correct (precision is less
387cabdff1aSopenharmony_ci       important here) */
388cabdff1aSopenharmony_ci    err_max = 0;
389cabdff1aSopenharmony_ci    for (it = 0; it < NB_ITS; it++) {
390cabdff1aSopenharmony_ci        /* XXX: use forward transform to generate values */
391cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
392cabdff1aSopenharmony_ci            block1[i] = av_lfg_get(&prng) % 256 - 128;
393cabdff1aSopenharmony_ci        block1[0] += 1024;
394cabdff1aSopenharmony_ci
395cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
396cabdff1aSopenharmony_ci            block[i] = block1[i];
397cabdff1aSopenharmony_ci        idct248_ref(img_dest1, 8, block);
398cabdff1aSopenharmony_ci
399cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++)
400cabdff1aSopenharmony_ci            block[i] = block1[i];
401cabdff1aSopenharmony_ci        idct248_put(img_dest, 8, block);
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_ci        for (i = 0; i < 64; i++) {
404cabdff1aSopenharmony_ci            v = abs((int) img_dest[i] - (int) img_dest1[i]);
405cabdff1aSopenharmony_ci            if (v == 255)
406cabdff1aSopenharmony_ci                printf("%d %d\n", img_dest[i], img_dest1[i]);
407cabdff1aSopenharmony_ci            if (v > err_max)
408cabdff1aSopenharmony_ci                err_max = v;
409cabdff1aSopenharmony_ci        }
410cabdff1aSopenharmony_ci#if 0
411cabdff1aSopenharmony_ci        printf("ref=\n");
412cabdff1aSopenharmony_ci        for(i=0;i<8;i++) {
413cabdff1aSopenharmony_ci            int j;
414cabdff1aSopenharmony_ci            for(j=0;j<8;j++) {
415cabdff1aSopenharmony_ci                printf(" %3d", img_dest1[i*8+j]);
416cabdff1aSopenharmony_ci            }
417cabdff1aSopenharmony_ci            printf("\n");
418cabdff1aSopenharmony_ci        }
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_ci        printf("out=\n");
421cabdff1aSopenharmony_ci        for(i=0;i<8;i++) {
422cabdff1aSopenharmony_ci            int j;
423cabdff1aSopenharmony_ci            for(j=0;j<8;j++) {
424cabdff1aSopenharmony_ci                printf(" %3d", img_dest[i*8+j]);
425cabdff1aSopenharmony_ci            }
426cabdff1aSopenharmony_ci            printf("\n");
427cabdff1aSopenharmony_ci        }
428cabdff1aSopenharmony_ci#endif
429cabdff1aSopenharmony_ci    }
430cabdff1aSopenharmony_ci    printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
431cabdff1aSopenharmony_ci
432cabdff1aSopenharmony_ci    if (!speed)
433cabdff1aSopenharmony_ci        return;
434cabdff1aSopenharmony_ci
435cabdff1aSopenharmony_ci    ti = av_gettime_relative();
436cabdff1aSopenharmony_ci    it1 = 0;
437cabdff1aSopenharmony_ci    do {
438cabdff1aSopenharmony_ci        for (it = 0; it < NB_ITS_SPEED; it++) {
439cabdff1aSopenharmony_ci            for (i = 0; i < 64; i++)
440cabdff1aSopenharmony_ci                block[i] = block1[i];
441cabdff1aSopenharmony_ci            idct248_put(img_dest, 8, block);
442cabdff1aSopenharmony_ci        }
443cabdff1aSopenharmony_ci        emms_c();
444cabdff1aSopenharmony_ci        it1 += NB_ITS_SPEED;
445cabdff1aSopenharmony_ci        ti1 = av_gettime_relative() - ti;
446cabdff1aSopenharmony_ci    } while (ti1 < 1000000);
447cabdff1aSopenharmony_ci
448cabdff1aSopenharmony_ci    printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
449cabdff1aSopenharmony_ci           (double) it1 * 1000.0 / (double) ti1);
450cabdff1aSopenharmony_ci}
451cabdff1aSopenharmony_ci
452cabdff1aSopenharmony_cistatic void help(void)
453cabdff1aSopenharmony_ci{
454cabdff1aSopenharmony_ci    printf("dct-test [-i] [<test-number>] [<bits>]\n"
455cabdff1aSopenharmony_ci           "test-number 0 -> test with random matrixes\n"
456cabdff1aSopenharmony_ci           "            1 -> test with random sparse matrixes\n"
457cabdff1aSopenharmony_ci           "            2 -> do 3. test from MPEG-4 std\n"
458cabdff1aSopenharmony_ci           "bits        Number of time domain bits to use, 8 is default\n"
459cabdff1aSopenharmony_ci           "-i          test IDCT implementations\n"
460cabdff1aSopenharmony_ci           "-4          test IDCT248 implementations\n"
461cabdff1aSopenharmony_ci           "-t          speed test\n");
462cabdff1aSopenharmony_ci}
463cabdff1aSopenharmony_ci
464cabdff1aSopenharmony_ci#if !HAVE_GETOPT
465cabdff1aSopenharmony_ci#include "compat/getopt.c"
466cabdff1aSopenharmony_ci#endif
467cabdff1aSopenharmony_ci
468cabdff1aSopenharmony_ciint main(int argc, char **argv)
469cabdff1aSopenharmony_ci{
470cabdff1aSopenharmony_ci    int test_idct = 0, test_248_dct = 0;
471cabdff1aSopenharmony_ci    int c, i;
472cabdff1aSopenharmony_ci    int test = 1;
473cabdff1aSopenharmony_ci    int speed = 0;
474cabdff1aSopenharmony_ci    int err = 0;
475cabdff1aSopenharmony_ci    int bits=8;
476cabdff1aSopenharmony_ci
477cabdff1aSopenharmony_ci    ff_ref_dct_init();
478cabdff1aSopenharmony_ci
479cabdff1aSopenharmony_ci    for (;;) {
480cabdff1aSopenharmony_ci        c = getopt(argc, argv, "ih4t");
481cabdff1aSopenharmony_ci        if (c == -1)
482cabdff1aSopenharmony_ci            break;
483cabdff1aSopenharmony_ci        switch (c) {
484cabdff1aSopenharmony_ci        case 'i':
485cabdff1aSopenharmony_ci            test_idct = 1;
486cabdff1aSopenharmony_ci            break;
487cabdff1aSopenharmony_ci        case '4':
488cabdff1aSopenharmony_ci            test_248_dct = 1;
489cabdff1aSopenharmony_ci            break;
490cabdff1aSopenharmony_ci        case 't':
491cabdff1aSopenharmony_ci            speed = 1;
492cabdff1aSopenharmony_ci            break;
493cabdff1aSopenharmony_ci        default:
494cabdff1aSopenharmony_ci        case 'h':
495cabdff1aSopenharmony_ci            help();
496cabdff1aSopenharmony_ci            return 0;
497cabdff1aSopenharmony_ci        }
498cabdff1aSopenharmony_ci    }
499cabdff1aSopenharmony_ci
500cabdff1aSopenharmony_ci    if (optind < argc)
501cabdff1aSopenharmony_ci        test = atoi(argv[optind]);
502cabdff1aSopenharmony_ci    if(optind+1 < argc) bits= atoi(argv[optind+1]);
503cabdff1aSopenharmony_ci
504cabdff1aSopenharmony_ci    printf("ffmpeg DCT/IDCT test\n");
505cabdff1aSopenharmony_ci
506cabdff1aSopenharmony_ci    if (test_248_dct) {
507cabdff1aSopenharmony_ci        idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
508cabdff1aSopenharmony_ci    } else {
509cabdff1aSopenharmony_ci        const int cpu_flags = av_get_cpu_flags();
510cabdff1aSopenharmony_ci        if (test_idct) {
511cabdff1aSopenharmony_ci            for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++)
512cabdff1aSopenharmony_ci                err |= dct_error(&idct_tab[i], test, test_idct, speed, bits);
513cabdff1aSopenharmony_ci
514cabdff1aSopenharmony_ci            for (i = 0; idct_tab_arch[i].name; i++)
515cabdff1aSopenharmony_ci                if (!(~cpu_flags & idct_tab_arch[i].cpu_flag))
516cabdff1aSopenharmony_ci                    err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
517cabdff1aSopenharmony_ci        }
518cabdff1aSopenharmony_ci#if CONFIG_FDCTDSP
519cabdff1aSopenharmony_ci        else {
520cabdff1aSopenharmony_ci            for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++)
521cabdff1aSopenharmony_ci                err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits);
522cabdff1aSopenharmony_ci
523cabdff1aSopenharmony_ci            for (i = 0; fdct_tab_arch[i].name; i++)
524cabdff1aSopenharmony_ci                if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag))
525cabdff1aSopenharmony_ci                    err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
526cabdff1aSopenharmony_ci        }
527cabdff1aSopenharmony_ci#endif /* CONFIG_FDCTDSP */
528cabdff1aSopenharmony_ci    }
529cabdff1aSopenharmony_ci
530cabdff1aSopenharmony_ci    if (err)
531cabdff1aSopenharmony_ci        printf("Error: %d.\n", err);
532cabdff1aSopenharmony_ci
533cabdff1aSopenharmony_ci    return !!err;
534cabdff1aSopenharmony_ci}
535