1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2022 Ben Avison
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or modify
7cabdff1aSopenharmony_ci * it under the terms of the GNU General Public License as published by
8cabdff1aSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or
9cabdff1aSopenharmony_ci * (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14cabdff1aSopenharmony_ci * GNU General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU General Public License along
17cabdff1aSopenharmony_ci * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18cabdff1aSopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include <string.h>
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "checkasm.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h"
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "libavutil/common.h"
28cabdff1aSopenharmony_ci#include "libavutil/internal.h"
29cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
30cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) },
33cabdff1aSopenharmony_ci#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height },
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_citypedef struct {
36cabdff1aSopenharmony_ci    const char *name;
37cabdff1aSopenharmony_ci    size_t offset;
38cabdff1aSopenharmony_ci    int width;
39cabdff1aSopenharmony_ci    int height;
40cabdff1aSopenharmony_ci} test;
41cabdff1aSopenharmony_ci
42cabdff1aSopenharmony_citypedef struct matrix {
43cabdff1aSopenharmony_ci    size_t width;
44cabdff1aSopenharmony_ci    size_t height;
45cabdff1aSopenharmony_ci    float d[];
46cabdff1aSopenharmony_ci} matrix;
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_cistatic const matrix T8 = { 8, 8, {
49cabdff1aSopenharmony_ci        12,  12,  12,  12,  12,  12,  12,  12,
50cabdff1aSopenharmony_ci        16,  15,   9,   4,  -4,  -9, -15, -16,
51cabdff1aSopenharmony_ci        16,   6,  -6, -16, -16,  -6,   6,  16,
52cabdff1aSopenharmony_ci        15,  -4, -16,  -9,   9,  16,   4, -15,
53cabdff1aSopenharmony_ci        12, -12, -12,  12,  12, -12, -12,  12,
54cabdff1aSopenharmony_ci         9, -16,   4,  15, -15,  -4,  16,  -9,
55cabdff1aSopenharmony_ci         6, -16,  16,  -6,  -6,  16, -16,   6,
56cabdff1aSopenharmony_ci         4,  -9,  15, -16,  16, -15,   9,  -4
57cabdff1aSopenharmony_ci} };
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_cistatic const matrix T4 = { 4, 4, {
60cabdff1aSopenharmony_ci        17,  17,  17,  17,
61cabdff1aSopenharmony_ci        22,  10, -10, -22,
62cabdff1aSopenharmony_ci        17, -17, -17,  17,
63cabdff1aSopenharmony_ci        10, -22,  22, -10
64cabdff1aSopenharmony_ci} };
65cabdff1aSopenharmony_ci
66cabdff1aSopenharmony_cistatic const matrix T8t = { 8, 8, {
67cabdff1aSopenharmony_ci        12,  16,  16,  15,  12,   9,   6,   4,
68cabdff1aSopenharmony_ci        12,  15,   6,  -4, -12, -16, -16,  -9,
69cabdff1aSopenharmony_ci        12,   9,  -6, -16, -12,   4,  16,  15,
70cabdff1aSopenharmony_ci        12,   4, -16,  -9,  12,  15,  -6, -16,
71cabdff1aSopenharmony_ci        12,  -4, -16,   9,  12, -15,  -6,  16,
72cabdff1aSopenharmony_ci        12,  -9,  -6,  16, -12,  -4,  16, -15,
73cabdff1aSopenharmony_ci        12, -15,   6,   4, -12,  16, -16,   9,
74cabdff1aSopenharmony_ci        12, -16,  16, -15,  12,  -9,   6,  -4
75cabdff1aSopenharmony_ci} };
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_cistatic const matrix T4t = { 4, 4, {
78cabdff1aSopenharmony_ci        17,  22,  17,  10,
79cabdff1aSopenharmony_ci        17,  10, -17, -22,
80cabdff1aSopenharmony_ci        17, -10, -17,  22,
81cabdff1aSopenharmony_ci        17, -22,  17, -10
82cabdff1aSopenharmony_ci} };
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_cistatic matrix *new_matrix(size_t width, size_t height)
85cabdff1aSopenharmony_ci{
86cabdff1aSopenharmony_ci    matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float));
87cabdff1aSopenharmony_ci    if (out == NULL) {
88cabdff1aSopenharmony_ci        fprintf(stderr, "Memory allocation failure\n");
89cabdff1aSopenharmony_ci        exit(EXIT_FAILURE);
90cabdff1aSopenharmony_ci    }
91cabdff1aSopenharmony_ci    out->width = width;
92cabdff1aSopenharmony_ci    out->height = height;
93cabdff1aSopenharmony_ci    return out;
94cabdff1aSopenharmony_ci}
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_cistatic matrix *multiply(const matrix *a, const matrix *b)
97cabdff1aSopenharmony_ci{
98cabdff1aSopenharmony_ci    matrix *out;
99cabdff1aSopenharmony_ci    if (a->width != b->height) {
100cabdff1aSopenharmony_ci        fprintf(stderr, "Incompatible multiplication\n");
101cabdff1aSopenharmony_ci        exit(EXIT_FAILURE);
102cabdff1aSopenharmony_ci    }
103cabdff1aSopenharmony_ci    out = new_matrix(b->width, a->height);
104cabdff1aSopenharmony_ci    for (int j = 0; j < out->height; ++j)
105cabdff1aSopenharmony_ci        for (int i = 0; i < out->width; ++i) {
106cabdff1aSopenharmony_ci            float sum = 0;
107cabdff1aSopenharmony_ci            for (int k = 0; k < a->width; ++k)
108cabdff1aSopenharmony_ci                sum += a->d[j * a->width + k] * b->d[k * b->width + i];
109cabdff1aSopenharmony_ci            out->d[j * out->width + i] = sum;
110cabdff1aSopenharmony_ci        }
111cabdff1aSopenharmony_ci    return out;
112cabdff1aSopenharmony_ci}
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_cistatic void normalise(matrix *a)
115cabdff1aSopenharmony_ci{
116cabdff1aSopenharmony_ci    for (int j = 0; j < a->height; ++j)
117cabdff1aSopenharmony_ci        for (int i = 0; i < a->width; ++i) {
118cabdff1aSopenharmony_ci            float *p = a->d + j * a->width + i;
119cabdff1aSopenharmony_ci            *p *= 64;
120cabdff1aSopenharmony_ci            if (a->height == 4)
121cabdff1aSopenharmony_ci                *p /= (const unsigned[]) { 289, 292, 289, 292 } [j];
122cabdff1aSopenharmony_ci            else
123cabdff1aSopenharmony_ci                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j];
124cabdff1aSopenharmony_ci            if (a->width == 4)
125cabdff1aSopenharmony_ci                *p /= (const unsigned[]) { 289, 292, 289, 292 } [i];
126cabdff1aSopenharmony_ci            else
127cabdff1aSopenharmony_ci                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i];
128cabdff1aSopenharmony_ci        }
129cabdff1aSopenharmony_ci}
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_cistatic void divide_and_round_nearest(matrix *a, float by)
132cabdff1aSopenharmony_ci{
133cabdff1aSopenharmony_ci    for (int j = 0; j < a->height; ++j)
134cabdff1aSopenharmony_ci        for (int i = 0; i < a->width; ++i) {
135cabdff1aSopenharmony_ci            float *p = a->d + j * a->width + i;
136cabdff1aSopenharmony_ci            *p = rintf(*p / by);
137cabdff1aSopenharmony_ci        }
138cabdff1aSopenharmony_ci}
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_cistatic void tweak(matrix *a)
141cabdff1aSopenharmony_ci{
142cabdff1aSopenharmony_ci    for (int j = 4; j < a->height; ++j)
143cabdff1aSopenharmony_ci        for (int i = 0; i < a->width; ++i) {
144cabdff1aSopenharmony_ci            float *p = a->d + j * a->width + i;
145cabdff1aSopenharmony_ci            *p += 1;
146cabdff1aSopenharmony_ci        }
147cabdff1aSopenharmony_ci}
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci/* The VC-1 spec places restrictions on the values permitted at three
150cabdff1aSopenharmony_ci * different stages:
151cabdff1aSopenharmony_ci * - D: the input coefficients in frequency domain
152cabdff1aSopenharmony_ci * - E: the intermediate coefficients, inverse-transformed only horizontally
153cabdff1aSopenharmony_ci * - R: the fully inverse-transformed coefficients
154cabdff1aSopenharmony_ci *
155cabdff1aSopenharmony_ci * To fully cater for the ranges specified requires various intermediate
156cabdff1aSopenharmony_ci * values to be held to 17-bit precision; yet these conditions do not appear
157cabdff1aSopenharmony_ci * to be utilised in real-world streams. At least some assembly
158cabdff1aSopenharmony_ci * implementations have chosen to restrict these values to 16-bit precision,
159cabdff1aSopenharmony_ci * to accelerate the decoding of real-world streams at the cost of strict
160cabdff1aSopenharmony_ci * adherence to the spec. To avoid our test marking these as failures,
161cabdff1aSopenharmony_ci * reduce our random inputs.
162cabdff1aSopenharmony_ci */
163cabdff1aSopenharmony_ci#define ATTENUATION 4
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_cistatic matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height)
166cabdff1aSopenharmony_ci{
167cabdff1aSopenharmony_ci    matrix *raw, *tmp, *D, *E, *R;
168cabdff1aSopenharmony_ci    raw = new_matrix(width, height);
169cabdff1aSopenharmony_ci    for (int i = 0; i < width * height; ++i)
170cabdff1aSopenharmony_ci        raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION;
171cabdff1aSopenharmony_ci    tmp = multiply(height == 8 ? &T8 : &T4, raw);
172cabdff1aSopenharmony_ci    D = multiply(tmp, width == 8 ? &T8t : &T4t);
173cabdff1aSopenharmony_ci    normalise(D);
174cabdff1aSopenharmony_ci    divide_and_round_nearest(D, 1);
175cabdff1aSopenharmony_ci    for (int i = 0; i < width * height; ++i) {
176cabdff1aSopenharmony_ci        if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) {
177cabdff1aSopenharmony_ci            /* Rare, so simply try again */
178cabdff1aSopenharmony_ci            av_free(raw);
179cabdff1aSopenharmony_ci            av_free(tmp);
180cabdff1aSopenharmony_ci            av_free(D);
181cabdff1aSopenharmony_ci            return generate_inverse_quantized_transform_coefficients(width, height);
182cabdff1aSopenharmony_ci        }
183cabdff1aSopenharmony_ci    }
184cabdff1aSopenharmony_ci    E = multiply(D, width == 8 ? &T8 : &T4);
185cabdff1aSopenharmony_ci    divide_and_round_nearest(E, 8);
186cabdff1aSopenharmony_ci    for (int i = 0; i < width * height; ++i)
187cabdff1aSopenharmony_ci        if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) {
188cabdff1aSopenharmony_ci            /* Rare, so simply try again */
189cabdff1aSopenharmony_ci            av_free(raw);
190cabdff1aSopenharmony_ci            av_free(tmp);
191cabdff1aSopenharmony_ci            av_free(D);
192cabdff1aSopenharmony_ci            av_free(E);
193cabdff1aSopenharmony_ci            return generate_inverse_quantized_transform_coefficients(width, height);
194cabdff1aSopenharmony_ci        }
195cabdff1aSopenharmony_ci    R = multiply(height == 8 ? &T8t : &T4t, E);
196cabdff1aSopenharmony_ci    tweak(R);
197cabdff1aSopenharmony_ci    divide_and_round_nearest(R, 128);
198cabdff1aSopenharmony_ci    for (int i = 0; i < width * height; ++i)
199cabdff1aSopenharmony_ci        if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) {
200cabdff1aSopenharmony_ci            /* Rare, so simply try again */
201cabdff1aSopenharmony_ci            av_free(raw);
202cabdff1aSopenharmony_ci            av_free(tmp);
203cabdff1aSopenharmony_ci            av_free(D);
204cabdff1aSopenharmony_ci            av_free(E);
205cabdff1aSopenharmony_ci            av_free(R);
206cabdff1aSopenharmony_ci            return generate_inverse_quantized_transform_coefficients(width, height);
207cabdff1aSopenharmony_ci        }
208cabdff1aSopenharmony_ci    av_free(raw);
209cabdff1aSopenharmony_ci    av_free(tmp);
210cabdff1aSopenharmony_ci    av_free(E);
211cabdff1aSopenharmony_ci    av_free(R);
212cabdff1aSopenharmony_ci    return D;
213cabdff1aSopenharmony_ci}
214cabdff1aSopenharmony_ci
215cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER16(name, size)        \
216cabdff1aSopenharmony_ci    do {                                      \
217cabdff1aSopenharmony_ci        int i;                                \
218cabdff1aSopenharmony_ci        for (i = 0; i < size; ++i) {          \
219cabdff1aSopenharmony_ci            uint16_t r = rnd();               \
220cabdff1aSopenharmony_ci            AV_WN16A(name##0 + i, r);         \
221cabdff1aSopenharmony_ci            AV_WN16A(name##1 + i, r);         \
222cabdff1aSopenharmony_ci        }                                     \
223cabdff1aSopenharmony_ci    } while (0)
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER8(name, size)         \
226cabdff1aSopenharmony_ci    do {                                      \
227cabdff1aSopenharmony_ci        int i;                                \
228cabdff1aSopenharmony_ci        for (i = 0; i < size; ++i) {          \
229cabdff1aSopenharmony_ci            uint8_t r = rnd();                \
230cabdff1aSopenharmony_ci            name##0[i] = r;                   \
231cabdff1aSopenharmony_ci            name##1[i] = r;                   \
232cabdff1aSopenharmony_ci        }                                     \
233cabdff1aSopenharmony_ci    } while (0)
234cabdff1aSopenharmony_ci
235cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size)  \
236cabdff1aSopenharmony_ci    do {                                            \
237cabdff1aSopenharmony_ci        uint8_t *p##0 = name##0, *p##1 = name##1;   \
238cabdff1aSopenharmony_ci        int i = (size);                             \
239cabdff1aSopenharmony_ci        while (i-- > 0) {                           \
240cabdff1aSopenharmony_ci            int x = 0x80 | (rnd() & 0x7F);          \
241cabdff1aSopenharmony_ci            x >>= rnd() % 9;                        \
242cabdff1aSopenharmony_ci            if (rnd() & 1)                          \
243cabdff1aSopenharmony_ci                x = -x;                             \
244cabdff1aSopenharmony_ci            *p##1++ = *p##0++ = 0x80 + x;           \
245cabdff1aSopenharmony_ci        }                                           \
246cabdff1aSopenharmony_ci    } while (0)
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_cistatic void check_inv_trans_inplace(void)
249cabdff1aSopenharmony_ci{
250cabdff1aSopenharmony_ci    /* Inverse transform input coefficients are stored in a 16-bit buffer
251cabdff1aSopenharmony_ci     * with row stride of 8 coefficients irrespective of transform size.
252cabdff1aSopenharmony_ci     * vc1_inv_trans_8x8 differs from the others in two ways: coefficients
253cabdff1aSopenharmony_ci     * are stored in column-major order, and the outputs are written back
254cabdff1aSopenharmony_ci     * to the input buffer, so we oversize it slightly to catch overruns. */
255cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]);
256cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]);
257cabdff1aSopenharmony_ci
258cabdff1aSopenharmony_ci    VC1DSPContext h;
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci    ff_vc1dsp_init(&h);
261cabdff1aSopenharmony_ci
262cabdff1aSopenharmony_ci    if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) {
263cabdff1aSopenharmony_ci        matrix *coeffs;
264cabdff1aSopenharmony_ci        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *);
265cabdff1aSopenharmony_ci        RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8);
266cabdff1aSopenharmony_ci        coeffs = generate_inverse_quantized_transform_coefficients(8, 8);
267cabdff1aSopenharmony_ci        for (int j = 0; j < 8; ++j)
268cabdff1aSopenharmony_ci            for (int i = 0; i < 8; ++i) {
269cabdff1aSopenharmony_ci                int idx = 8 + i * 8 + j;
270cabdff1aSopenharmony_ci                inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i];
271cabdff1aSopenharmony_ci            }
272cabdff1aSopenharmony_ci        call_ref(inv_trans_in0 + 8);
273cabdff1aSopenharmony_ci        call_new(inv_trans_in1 + 8);
274cabdff1aSopenharmony_ci        if (memcmp(inv_trans_in0,  inv_trans_in1,  10 * 8 * sizeof (int16_t)))
275cabdff1aSopenharmony_ci            fail();
276cabdff1aSopenharmony_ci        bench_new(inv_trans_in1 + 8);
277cabdff1aSopenharmony_ci        av_free(coeffs);
278cabdff1aSopenharmony_ci    }
279cabdff1aSopenharmony_ci}
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_cistatic void check_inv_trans_adding(void)
282cabdff1aSopenharmony_ci{
283cabdff1aSopenharmony_ci    /* Inverse transform input coefficients are stored in a 16-bit buffer
284cabdff1aSopenharmony_ci     * with row stride of 8 coefficients irrespective of transform size. */
285cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]);
286cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]);
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci    /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and
289cabdff1aSopenharmony_ci     * added with saturation to an array of unsigned 8-bit values. Oversize
290cabdff1aSopenharmony_ci     * this by 8 samples left and right and one row above and below. */
291cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]);
292cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]);
293cabdff1aSopenharmony_ci
294cabdff1aSopenharmony_ci    VC1DSPContext h;
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci    const test tests[] = {
297cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4)
298cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8)
299cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4)
300cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8)
301cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4)
302cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8)
303cabdff1aSopenharmony_ci        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4)
304cabdff1aSopenharmony_ci    };
305cabdff1aSopenharmony_ci
306cabdff1aSopenharmony_ci    ff_vc1dsp_init(&h);
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
309cabdff1aSopenharmony_ci        void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset);
310cabdff1aSopenharmony_ci        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
311cabdff1aSopenharmony_ci            matrix *coeffs;
312cabdff1aSopenharmony_ci            declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *);
313cabdff1aSopenharmony_ci            RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8);
314cabdff1aSopenharmony_ci            RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24);
315cabdff1aSopenharmony_ci            coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height);
316cabdff1aSopenharmony_ci            for (int j = 0; j < tests[t].height; ++j)
317cabdff1aSopenharmony_ci                for (int i = 0; i < tests[t].width; ++i) {
318cabdff1aSopenharmony_ci                    int idx = j * 8 + i;
319cabdff1aSopenharmony_ci                    inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i];
320cabdff1aSopenharmony_ci                }
321cabdff1aSopenharmony_ci            call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0);
322cabdff1aSopenharmony_ci            call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1);
323cabdff1aSopenharmony_ci            if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24))
324cabdff1aSopenharmony_ci                fail();
325cabdff1aSopenharmony_ci            bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8);
326cabdff1aSopenharmony_ci            av_free(coeffs);
327cabdff1aSopenharmony_ci        }
328cabdff1aSopenharmony_ci    }
329cabdff1aSopenharmony_ci}
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_cistatic void check_loop_filter(void)
332cabdff1aSopenharmony_ci{
333cabdff1aSopenharmony_ci    /* Deblocking filter buffers are big enough to hold a 16x16 block,
334cabdff1aSopenharmony_ci     * plus 16 columns left and 4 rows above to hold filter inputs
335cabdff1aSopenharmony_ci     * (depending on whether v or h neighbouring block edge, oversized
336cabdff1aSopenharmony_ci     * horizontally to maintain 16-byte alignment) plus 16 columns and
337cabdff1aSopenharmony_ci     * 4 rows below to catch write overflows */
338cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]);
339cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]);
340cabdff1aSopenharmony_ci
341cabdff1aSopenharmony_ci    VC1DSPContext h;
342cabdff1aSopenharmony_ci
343cabdff1aSopenharmony_ci    const test tests[] = {
344cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_v_loop_filter4)
345cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_h_loop_filter4)
346cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_v_loop_filter8)
347cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_h_loop_filter8)
348cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_v_loop_filter16)
349cabdff1aSopenharmony_ci        VC1DSP_TEST(vc1_h_loop_filter16)
350cabdff1aSopenharmony_ci    };
351cabdff1aSopenharmony_ci
352cabdff1aSopenharmony_ci    ff_vc1dsp_init(&h);
353cabdff1aSopenharmony_ci
354cabdff1aSopenharmony_ci    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
355cabdff1aSopenharmony_ci        void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset);
356cabdff1aSopenharmony_ci        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
357cabdff1aSopenharmony_ci        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
358cabdff1aSopenharmony_ci            for (int count = 1000; count > 0; --count) {
359cabdff1aSopenharmony_ci                int pq = rnd() % 31 + 1;
360cabdff1aSopenharmony_ci                RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48);
361cabdff1aSopenharmony_ci                call_ref(filter_buf0 + 4 * 48 + 16, 48, pq);
362cabdff1aSopenharmony_ci                call_new(filter_buf1 + 4 * 48 + 16, 48, pq);
363cabdff1aSopenharmony_ci                if (memcmp(filter_buf0, filter_buf1, 24 * 48))
364cabdff1aSopenharmony_ci                    fail();
365cabdff1aSopenharmony_ci            }
366cabdff1aSopenharmony_ci        }
367cabdff1aSopenharmony_ci        for (int j = 0; j < 24; ++j)
368cabdff1aSopenharmony_ci            for (int i = 0; i < 48; ++i)
369cabdff1aSopenharmony_ci                filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4);
370cabdff1aSopenharmony_ci        if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name))
371cabdff1aSopenharmony_ci            bench_new(filter_buf1 + 4 * 48 + 16, 48, 1);
372cabdff1aSopenharmony_ci        if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name))
373cabdff1aSopenharmony_ci            bench_new(filter_buf1 + 4 * 48 + 16, 48, 31);
374cabdff1aSopenharmony_ci    }
375cabdff1aSopenharmony_ci}
376cabdff1aSopenharmony_ci
377cabdff1aSopenharmony_ci#define TEST_UNESCAPE                                                                               \
378cabdff1aSopenharmony_ci    do {                                                                                            \
379cabdff1aSopenharmony_ci        for (int count = 100; count > 0; --count) {                                                 \
380cabdff1aSopenharmony_ci            escaped_offset = rnd() & 7;                                                             \
381cabdff1aSopenharmony_ci            unescaped_offset = rnd() & 7;                                                           \
382cabdff1aSopenharmony_ci            escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7);                                    \
383cabdff1aSopenharmony_ci            RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE);                                        \
384cabdff1aSopenharmony_ci            len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \
385cabdff1aSopenharmony_ci            len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \
386cabdff1aSopenharmony_ci            if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE))                  \
387cabdff1aSopenharmony_ci                fail();                                                                             \
388cabdff1aSopenharmony_ci        }                                                                                           \
389cabdff1aSopenharmony_ci    } while (0)
390cabdff1aSopenharmony_ci
391cabdff1aSopenharmony_cistatic void check_unescape(void)
392cabdff1aSopenharmony_ci{
393cabdff1aSopenharmony_ci    /* This appears to be a typical length of buffer in use */
394cabdff1aSopenharmony_ci#define LOG2_UNESCAPE_BUF_SIZE 17
395cabdff1aSopenharmony_ci#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)
396cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]);
397cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]);
398cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]);
399cabdff1aSopenharmony_ci    LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]);
400cabdff1aSopenharmony_ci
401cabdff1aSopenharmony_ci    VC1DSPContext h;
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_ci    ff_vc1dsp_init(&h);
404cabdff1aSopenharmony_ci
405cabdff1aSopenharmony_ci    if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) {
406cabdff1aSopenharmony_ci        int len0, len1, escaped_offset, unescaped_offset, escaped_len;
407cabdff1aSopenharmony_ci        declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *);
408cabdff1aSopenharmony_ci
409cabdff1aSopenharmony_ci        /* Test data which consists of escapes sequences packed as tightly as possible */
410cabdff1aSopenharmony_ci        for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x)
411cabdff1aSopenharmony_ci            escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);
412cabdff1aSopenharmony_ci        TEST_UNESCAPE;
413cabdff1aSopenharmony_ci
414cabdff1aSopenharmony_ci        /* Test random data */
415cabdff1aSopenharmony_ci        RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE);
416cabdff1aSopenharmony_ci        TEST_UNESCAPE;
417cabdff1aSopenharmony_ci
418cabdff1aSopenharmony_ci        /* Test data with escape sequences at random intervals */
419cabdff1aSopenharmony_ci        for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) {
420cabdff1aSopenharmony_ci            int gap, gap_msb;
421cabdff1aSopenharmony_ci            escaped1[x+0] = escaped0[x+0] = 0;
422cabdff1aSopenharmony_ci            escaped1[x+1] = escaped0[x+1] = 0;
423cabdff1aSopenharmony_ci            escaped1[x+2] = escaped0[x+2] = 3;
424cabdff1aSopenharmony_ci            escaped1[x+3] = escaped0[x+3] = rnd() & 3;
425cabdff1aSopenharmony_ci            gap_msb = 2u << (rnd() % 8);
426cabdff1aSopenharmony_ci            gap = (rnd() &~ -gap_msb) | gap_msb;
427cabdff1aSopenharmony_ci            x += gap;
428cabdff1aSopenharmony_ci        }
429cabdff1aSopenharmony_ci        TEST_UNESCAPE;
430cabdff1aSopenharmony_ci
431cabdff1aSopenharmony_ci        /* Test data which is known to contain no escape sequences */
432cabdff1aSopenharmony_ci        memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE);
433cabdff1aSopenharmony_ci        memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE);
434cabdff1aSopenharmony_ci        TEST_UNESCAPE;
435cabdff1aSopenharmony_ci
436cabdff1aSopenharmony_ci        /* Benchmark the no-escape-sequences case */
437cabdff1aSopenharmony_ci        bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1);
438cabdff1aSopenharmony_ci    }
439cabdff1aSopenharmony_ci}
440cabdff1aSopenharmony_ci
441cabdff1aSopenharmony_civoid checkasm_check_vc1dsp(void)
442cabdff1aSopenharmony_ci{
443cabdff1aSopenharmony_ci    check_inv_trans_inplace();
444cabdff1aSopenharmony_ci    check_inv_trans_adding();
445cabdff1aSopenharmony_ci    report("inv_trans");
446cabdff1aSopenharmony_ci
447cabdff1aSopenharmony_ci    check_loop_filter();
448cabdff1aSopenharmony_ci    report("loop_filter");
449cabdff1aSopenharmony_ci
450cabdff1aSopenharmony_ci    check_unescape();
451cabdff1aSopenharmony_ci    report("unescape_buffer");
452cabdff1aSopenharmony_ci}
453