xref: /third_party/ffmpeg/tests/checkasm/vc1dsp.c (revision cabdff1a)
1/*
2 * Copyright (c) 2022 Ben Avison
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#include <string.h>
22
23#include "checkasm.h"
24
25#include "libavcodec/vc1dsp.h"
26
27#include "libavutil/common.h"
28#include "libavutil/internal.h"
29#include "libavutil/intreadwrite.h"
30#include "libavutil/mem_internal.h"
31
32#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) },
33#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height },
34
35typedef struct {
36    const char *name;
37    size_t offset;
38    int width;
39    int height;
40} test;
41
42typedef struct matrix {
43    size_t width;
44    size_t height;
45    float d[];
46} matrix;
47
48static const matrix T8 = { 8, 8, {
49        12,  12,  12,  12,  12,  12,  12,  12,
50        16,  15,   9,   4,  -4,  -9, -15, -16,
51        16,   6,  -6, -16, -16,  -6,   6,  16,
52        15,  -4, -16,  -9,   9,  16,   4, -15,
53        12, -12, -12,  12,  12, -12, -12,  12,
54         9, -16,   4,  15, -15,  -4,  16,  -9,
55         6, -16,  16,  -6,  -6,  16, -16,   6,
56         4,  -9,  15, -16,  16, -15,   9,  -4
57} };
58
59static const matrix T4 = { 4, 4, {
60        17,  17,  17,  17,
61        22,  10, -10, -22,
62        17, -17, -17,  17,
63        10, -22,  22, -10
64} };
65
66static const matrix T8t = { 8, 8, {
67        12,  16,  16,  15,  12,   9,   6,   4,
68        12,  15,   6,  -4, -12, -16, -16,  -9,
69        12,   9,  -6, -16, -12,   4,  16,  15,
70        12,   4, -16,  -9,  12,  15,  -6, -16,
71        12,  -4, -16,   9,  12, -15,  -6,  16,
72        12,  -9,  -6,  16, -12,  -4,  16, -15,
73        12, -15,   6,   4, -12,  16, -16,   9,
74        12, -16,  16, -15,  12,  -9,   6,  -4
75} };
76
77static const matrix T4t = { 4, 4, {
78        17,  22,  17,  10,
79        17,  10, -17, -22,
80        17, -10, -17,  22,
81        17, -22,  17, -10
82} };
83
84static matrix *new_matrix(size_t width, size_t height)
85{
86    matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float));
87    if (out == NULL) {
88        fprintf(stderr, "Memory allocation failure\n");
89        exit(EXIT_FAILURE);
90    }
91    out->width = width;
92    out->height = height;
93    return out;
94}
95
96static matrix *multiply(const matrix *a, const matrix *b)
97{
98    matrix *out;
99    if (a->width != b->height) {
100        fprintf(stderr, "Incompatible multiplication\n");
101        exit(EXIT_FAILURE);
102    }
103    out = new_matrix(b->width, a->height);
104    for (int j = 0; j < out->height; ++j)
105        for (int i = 0; i < out->width; ++i) {
106            float sum = 0;
107            for (int k = 0; k < a->width; ++k)
108                sum += a->d[j * a->width + k] * b->d[k * b->width + i];
109            out->d[j * out->width + i] = sum;
110        }
111    return out;
112}
113
114static void normalise(matrix *a)
115{
116    for (int j = 0; j < a->height; ++j)
117        for (int i = 0; i < a->width; ++i) {
118            float *p = a->d + j * a->width + i;
119            *p *= 64;
120            if (a->height == 4)
121                *p /= (const unsigned[]) { 289, 292, 289, 292 } [j];
122            else
123                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j];
124            if (a->width == 4)
125                *p /= (const unsigned[]) { 289, 292, 289, 292 } [i];
126            else
127                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i];
128        }
129}
130
131static void divide_and_round_nearest(matrix *a, float by)
132{
133    for (int j = 0; j < a->height; ++j)
134        for (int i = 0; i < a->width; ++i) {
135            float *p = a->d + j * a->width + i;
136            *p = rintf(*p / by);
137        }
138}
139
140static void tweak(matrix *a)
141{
142    for (int j = 4; j < a->height; ++j)
143        for (int i = 0; i < a->width; ++i) {
144            float *p = a->d + j * a->width + i;
145            *p += 1;
146        }
147}
148
149/* The VC-1 spec places restrictions on the values permitted at three
150 * different stages:
151 * - D: the input coefficients in frequency domain
152 * - E: the intermediate coefficients, inverse-transformed only horizontally
153 * - R: the fully inverse-transformed coefficients
154 *
155 * To fully cater for the ranges specified requires various intermediate
156 * values to be held to 17-bit precision; yet these conditions do not appear
157 * to be utilised in real-world streams. At least some assembly
158 * implementations have chosen to restrict these values to 16-bit precision,
159 * to accelerate the decoding of real-world streams at the cost of strict
160 * adherence to the spec. To avoid our test marking these as failures,
161 * reduce our random inputs.
162 */
163#define ATTENUATION 4
164
165static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height)
166{
167    matrix *raw, *tmp, *D, *E, *R;
168    raw = new_matrix(width, height);
169    for (int i = 0; i < width * height; ++i)
170        raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION;
171    tmp = multiply(height == 8 ? &T8 : &T4, raw);
172    D = multiply(tmp, width == 8 ? &T8t : &T4t);
173    normalise(D);
174    divide_and_round_nearest(D, 1);
175    for (int i = 0; i < width * height; ++i) {
176        if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) {
177            /* Rare, so simply try again */
178            av_free(raw);
179            av_free(tmp);
180            av_free(D);
181            return generate_inverse_quantized_transform_coefficients(width, height);
182        }
183    }
184    E = multiply(D, width == 8 ? &T8 : &T4);
185    divide_and_round_nearest(E, 8);
186    for (int i = 0; i < width * height; ++i)
187        if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) {
188            /* Rare, so simply try again */
189            av_free(raw);
190            av_free(tmp);
191            av_free(D);
192            av_free(E);
193            return generate_inverse_quantized_transform_coefficients(width, height);
194        }
195    R = multiply(height == 8 ? &T8t : &T4t, E);
196    tweak(R);
197    divide_and_round_nearest(R, 128);
198    for (int i = 0; i < width * height; ++i)
199        if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) {
200            /* Rare, so simply try again */
201            av_free(raw);
202            av_free(tmp);
203            av_free(D);
204            av_free(E);
205            av_free(R);
206            return generate_inverse_quantized_transform_coefficients(width, height);
207        }
208    av_free(raw);
209    av_free(tmp);
210    av_free(E);
211    av_free(R);
212    return D;
213}
214
215#define RANDOMIZE_BUFFER16(name, size)        \
216    do {                                      \
217        int i;                                \
218        for (i = 0; i < size; ++i) {          \
219            uint16_t r = rnd();               \
220            AV_WN16A(name##0 + i, r);         \
221            AV_WN16A(name##1 + i, r);         \
222        }                                     \
223    } while (0)
224
225#define RANDOMIZE_BUFFER8(name, size)         \
226    do {                                      \
227        int i;                                \
228        for (i = 0; i < size; ++i) {          \
229            uint8_t r = rnd();                \
230            name##0[i] = r;                   \
231            name##1[i] = r;                   \
232        }                                     \
233    } while (0)
234
235#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size)  \
236    do {                                            \
237        uint8_t *p##0 = name##0, *p##1 = name##1;   \
238        int i = (size);                             \
239        while (i-- > 0) {                           \
240            int x = 0x80 | (rnd() & 0x7F);          \
241            x >>= rnd() % 9;                        \
242            if (rnd() & 1)                          \
243                x = -x;                             \
244            *p##1++ = *p##0++ = 0x80 + x;           \
245        }                                           \
246    } while (0)
247
248static void check_inv_trans_inplace(void)
249{
250    /* Inverse transform input coefficients are stored in a 16-bit buffer
251     * with row stride of 8 coefficients irrespective of transform size.
252     * vc1_inv_trans_8x8 differs from the others in two ways: coefficients
253     * are stored in column-major order, and the outputs are written back
254     * to the input buffer, so we oversize it slightly to catch overruns. */
255    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]);
256    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]);
257
258    VC1DSPContext h;
259
260    ff_vc1dsp_init(&h);
261
262    if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) {
263        matrix *coeffs;
264        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *);
265        RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8);
266        coeffs = generate_inverse_quantized_transform_coefficients(8, 8);
267        for (int j = 0; j < 8; ++j)
268            for (int i = 0; i < 8; ++i) {
269                int idx = 8 + i * 8 + j;
270                inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i];
271            }
272        call_ref(inv_trans_in0 + 8);
273        call_new(inv_trans_in1 + 8);
274        if (memcmp(inv_trans_in0,  inv_trans_in1,  10 * 8 * sizeof (int16_t)))
275            fail();
276        bench_new(inv_trans_in1 + 8);
277        av_free(coeffs);
278    }
279}
280
281static void check_inv_trans_adding(void)
282{
283    /* Inverse transform input coefficients are stored in a 16-bit buffer
284     * with row stride of 8 coefficients irrespective of transform size. */
285    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]);
286    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]);
287
288    /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and
289     * added with saturation to an array of unsigned 8-bit values. Oversize
290     * this by 8 samples left and right and one row above and below. */
291    LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]);
292    LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]);
293
294    VC1DSPContext h;
295
296    const test tests[] = {
297        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4)
298        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8)
299        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4)
300        VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8)
301        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4)
302        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8)
303        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4)
304    };
305
306    ff_vc1dsp_init(&h);
307
308    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
309        void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset);
310        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
311            matrix *coeffs;
312            declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *);
313            RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8);
314            RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24);
315            coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height);
316            for (int j = 0; j < tests[t].height; ++j)
317                for (int i = 0; i < tests[t].width; ++i) {
318                    int idx = j * 8 + i;
319                    inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i];
320                }
321            call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0);
322            call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1);
323            if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24))
324                fail();
325            bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8);
326            av_free(coeffs);
327        }
328    }
329}
330
331static void check_loop_filter(void)
332{
333    /* Deblocking filter buffers are big enough to hold a 16x16 block,
334     * plus 16 columns left and 4 rows above to hold filter inputs
335     * (depending on whether v or h neighbouring block edge, oversized
336     * horizontally to maintain 16-byte alignment) plus 16 columns and
337     * 4 rows below to catch write overflows */
338    LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]);
339    LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]);
340
341    VC1DSPContext h;
342
343    const test tests[] = {
344        VC1DSP_TEST(vc1_v_loop_filter4)
345        VC1DSP_TEST(vc1_h_loop_filter4)
346        VC1DSP_TEST(vc1_v_loop_filter8)
347        VC1DSP_TEST(vc1_h_loop_filter8)
348        VC1DSP_TEST(vc1_v_loop_filter16)
349        VC1DSP_TEST(vc1_h_loop_filter16)
350    };
351
352    ff_vc1dsp_init(&h);
353
354    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
355        void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset);
356        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
357        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
358            for (int count = 1000; count > 0; --count) {
359                int pq = rnd() % 31 + 1;
360                RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48);
361                call_ref(filter_buf0 + 4 * 48 + 16, 48, pq);
362                call_new(filter_buf1 + 4 * 48 + 16, 48, pq);
363                if (memcmp(filter_buf0, filter_buf1, 24 * 48))
364                    fail();
365            }
366        }
367        for (int j = 0; j < 24; ++j)
368            for (int i = 0; i < 48; ++i)
369                filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4);
370        if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name))
371            bench_new(filter_buf1 + 4 * 48 + 16, 48, 1);
372        if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name))
373            bench_new(filter_buf1 + 4 * 48 + 16, 48, 31);
374    }
375}
376
377#define TEST_UNESCAPE                                                                               \
378    do {                                                                                            \
379        for (int count = 100; count > 0; --count) {                                                 \
380            escaped_offset = rnd() & 7;                                                             \
381            unescaped_offset = rnd() & 7;                                                           \
382            escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7);                                    \
383            RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE);                                        \
384            len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \
385            len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \
386            if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE))                  \
387                fail();                                                                             \
388        }                                                                                           \
389    } while (0)
390
391static void check_unescape(void)
392{
393    /* This appears to be a typical length of buffer in use */
394#define LOG2_UNESCAPE_BUF_SIZE 17
395#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)
396    LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]);
397    LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]);
398    LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]);
399    LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]);
400
401    VC1DSPContext h;
402
403    ff_vc1dsp_init(&h);
404
405    if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) {
406        int len0, len1, escaped_offset, unescaped_offset, escaped_len;
407        declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *);
408
409        /* Test data which consists of escapes sequences packed as tightly as possible */
410        for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x)
411            escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);
412        TEST_UNESCAPE;
413
414        /* Test random data */
415        RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE);
416        TEST_UNESCAPE;
417
418        /* Test data with escape sequences at random intervals */
419        for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) {
420            int gap, gap_msb;
421            escaped1[x+0] = escaped0[x+0] = 0;
422            escaped1[x+1] = escaped0[x+1] = 0;
423            escaped1[x+2] = escaped0[x+2] = 3;
424            escaped1[x+3] = escaped0[x+3] = rnd() & 3;
425            gap_msb = 2u << (rnd() % 8);
426            gap = (rnd() &~ -gap_msb) | gap_msb;
427            x += gap;
428        }
429        TEST_UNESCAPE;
430
431        /* Test data which is known to contain no escape sequences */
432        memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE);
433        memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE);
434        TEST_UNESCAPE;
435
436        /* Benchmark the no-escape-sequences case */
437        bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1);
438    }
439}
440
441void checkasm_check_vc1dsp(void)
442{
443    check_inv_trans_inplace();
444    check_inv_trans_adding();
445    report("inv_trans");
446
447    check_loop_filter();
448    report("loop_filter");
449
450    check_unescape();
451    report("unescape_buffer");
452}
453