1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2022 Ben Avison 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or modify 7cabdff1aSopenharmony_ci * it under the terms of the GNU General Public License as published by 8cabdff1aSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or 9cabdff1aSopenharmony_ci * (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14cabdff1aSopenharmony_ci * GNU General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU General Public License along 17cabdff1aSopenharmony_ci * with FFmpeg; if not, write to the Free Software Foundation, Inc., 18cabdff1aSopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include <string.h> 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "checkasm.h" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include "libavutil/common.h" 28cabdff1aSopenharmony_ci#include "libavutil/internal.h" 29cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 30cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, 33cabdff1aSopenharmony_ci#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, 34cabdff1aSopenharmony_ci 35cabdff1aSopenharmony_citypedef struct { 36cabdff1aSopenharmony_ci const char *name; 37cabdff1aSopenharmony_ci size_t offset; 38cabdff1aSopenharmony_ci int width; 39cabdff1aSopenharmony_ci int height; 40cabdff1aSopenharmony_ci} test; 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_citypedef struct matrix { 43cabdff1aSopenharmony_ci size_t width; 44cabdff1aSopenharmony_ci size_t height; 45cabdff1aSopenharmony_ci float d[]; 46cabdff1aSopenharmony_ci} matrix; 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_cistatic const matrix T8 = { 8, 8, { 49cabdff1aSopenharmony_ci 12, 12, 12, 12, 12, 12, 12, 12, 50cabdff1aSopenharmony_ci 16, 15, 9, 4, -4, -9, -15, -16, 51cabdff1aSopenharmony_ci 16, 6, -6, -16, -16, -6, 6, 16, 52cabdff1aSopenharmony_ci 15, -4, -16, -9, 9, 16, 4, -15, 53cabdff1aSopenharmony_ci 12, -12, -12, 12, 12, -12, -12, 12, 54cabdff1aSopenharmony_ci 9, -16, 4, 15, -15, -4, 16, -9, 55cabdff1aSopenharmony_ci 6, -16, 16, -6, -6, 16, -16, 6, 56cabdff1aSopenharmony_ci 4, -9, 15, -16, 16, -15, 9, -4 57cabdff1aSopenharmony_ci} }; 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_cistatic const matrix T4 = { 4, 4, { 60cabdff1aSopenharmony_ci 17, 17, 17, 17, 61cabdff1aSopenharmony_ci 22, 10, -10, -22, 62cabdff1aSopenharmony_ci 17, -17, -17, 17, 63cabdff1aSopenharmony_ci 10, -22, 22, -10 64cabdff1aSopenharmony_ci} }; 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_cistatic const matrix T8t = { 8, 8, { 67cabdff1aSopenharmony_ci 12, 16, 16, 15, 12, 9, 6, 4, 68cabdff1aSopenharmony_ci 12, 15, 6, -4, -12, -16, -16, -9, 69cabdff1aSopenharmony_ci 12, 9, -6, -16, -12, 4, 16, 15, 70cabdff1aSopenharmony_ci 12, 4, -16, -9, 12, 15, -6, -16, 71cabdff1aSopenharmony_ci 12, -4, -16, 9, 12, -15, -6, 16, 72cabdff1aSopenharmony_ci 12, -9, -6, 16, -12, -4, 16, -15, 73cabdff1aSopenharmony_ci 12, -15, 6, 4, -12, 16, -16, 9, 74cabdff1aSopenharmony_ci 12, -16, 16, -15, 12, -9, 6, -4 75cabdff1aSopenharmony_ci} }; 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_cistatic const matrix T4t = { 4, 4, { 78cabdff1aSopenharmony_ci 17, 22, 17, 10, 79cabdff1aSopenharmony_ci 17, 10, -17, -22, 80cabdff1aSopenharmony_ci 17, -10, -17, 22, 81cabdff1aSopenharmony_ci 17, -22, 17, -10 82cabdff1aSopenharmony_ci} }; 83cabdff1aSopenharmony_ci 84cabdff1aSopenharmony_cistatic matrix *new_matrix(size_t width, size_t height) 85cabdff1aSopenharmony_ci{ 86cabdff1aSopenharmony_ci matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); 87cabdff1aSopenharmony_ci if (out == NULL) { 88cabdff1aSopenharmony_ci fprintf(stderr, "Memory allocation failure\n"); 89cabdff1aSopenharmony_ci exit(EXIT_FAILURE); 90cabdff1aSopenharmony_ci } 91cabdff1aSopenharmony_ci out->width = width; 92cabdff1aSopenharmony_ci out->height = height; 93cabdff1aSopenharmony_ci return out; 94cabdff1aSopenharmony_ci} 95cabdff1aSopenharmony_ci 96cabdff1aSopenharmony_cistatic matrix *multiply(const matrix *a, const matrix *b) 97cabdff1aSopenharmony_ci{ 98cabdff1aSopenharmony_ci matrix *out; 99cabdff1aSopenharmony_ci if (a->width != b->height) { 100cabdff1aSopenharmony_ci fprintf(stderr, "Incompatible multiplication\n"); 101cabdff1aSopenharmony_ci exit(EXIT_FAILURE); 102cabdff1aSopenharmony_ci } 103cabdff1aSopenharmony_ci out = new_matrix(b->width, a->height); 104cabdff1aSopenharmony_ci for (int j = 0; j < out->height; ++j) 105cabdff1aSopenharmony_ci for (int i = 0; i < out->width; ++i) { 106cabdff1aSopenharmony_ci float sum = 0; 107cabdff1aSopenharmony_ci for (int k = 0; k < a->width; ++k) 108cabdff1aSopenharmony_ci sum += a->d[j * a->width + k] * b->d[k * b->width + i]; 109cabdff1aSopenharmony_ci out->d[j * out->width + i] = sum; 110cabdff1aSopenharmony_ci } 111cabdff1aSopenharmony_ci return out; 112cabdff1aSopenharmony_ci} 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_cistatic void normalise(matrix *a) 115cabdff1aSopenharmony_ci{ 116cabdff1aSopenharmony_ci for (int j = 0; j < a->height; ++j) 117cabdff1aSopenharmony_ci for (int i = 0; i < a->width; ++i) { 118cabdff1aSopenharmony_ci float *p = a->d + j * a->width + i; 119cabdff1aSopenharmony_ci *p *= 64; 120cabdff1aSopenharmony_ci if (a->height == 4) 121cabdff1aSopenharmony_ci *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; 122cabdff1aSopenharmony_ci else 123cabdff1aSopenharmony_ci *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; 124cabdff1aSopenharmony_ci if (a->width == 4) 125cabdff1aSopenharmony_ci *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; 126cabdff1aSopenharmony_ci else 127cabdff1aSopenharmony_ci *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; 128cabdff1aSopenharmony_ci } 129cabdff1aSopenharmony_ci} 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_cistatic void divide_and_round_nearest(matrix *a, float by) 132cabdff1aSopenharmony_ci{ 133cabdff1aSopenharmony_ci for (int j = 0; j < a->height; ++j) 134cabdff1aSopenharmony_ci for (int i = 0; i < a->width; ++i) { 135cabdff1aSopenharmony_ci float *p = a->d + j * a->width + i; 136cabdff1aSopenharmony_ci *p = rintf(*p / by); 137cabdff1aSopenharmony_ci } 138cabdff1aSopenharmony_ci} 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_cistatic void tweak(matrix *a) 141cabdff1aSopenharmony_ci{ 142cabdff1aSopenharmony_ci for (int j = 4; j < a->height; ++j) 143cabdff1aSopenharmony_ci for (int i = 0; i < a->width; ++i) { 144cabdff1aSopenharmony_ci float *p = a->d + j * a->width + i; 145cabdff1aSopenharmony_ci *p += 1; 146cabdff1aSopenharmony_ci } 147cabdff1aSopenharmony_ci} 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci/* The VC-1 spec places restrictions on the values permitted at three 150cabdff1aSopenharmony_ci * different stages: 151cabdff1aSopenharmony_ci * - D: the input coefficients in frequency domain 152cabdff1aSopenharmony_ci * - E: the intermediate coefficients, inverse-transformed only horizontally 153cabdff1aSopenharmony_ci * - R: the fully inverse-transformed coefficients 154cabdff1aSopenharmony_ci * 155cabdff1aSopenharmony_ci * To fully cater for the ranges specified requires various intermediate 156cabdff1aSopenharmony_ci * values to be held to 17-bit precision; yet these conditions do not appear 157cabdff1aSopenharmony_ci * to be utilised in real-world streams. At least some assembly 158cabdff1aSopenharmony_ci * implementations have chosen to restrict these values to 16-bit precision, 159cabdff1aSopenharmony_ci * to accelerate the decoding of real-world streams at the cost of strict 160cabdff1aSopenharmony_ci * adherence to the spec. To avoid our test marking these as failures, 161cabdff1aSopenharmony_ci * reduce our random inputs. 162cabdff1aSopenharmony_ci */ 163cabdff1aSopenharmony_ci#define ATTENUATION 4 164cabdff1aSopenharmony_ci 165cabdff1aSopenharmony_cistatic matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) 166cabdff1aSopenharmony_ci{ 167cabdff1aSopenharmony_ci matrix *raw, *tmp, *D, *E, *R; 168cabdff1aSopenharmony_ci raw = new_matrix(width, height); 169cabdff1aSopenharmony_ci for (int i = 0; i < width * height; ++i) 170cabdff1aSopenharmony_ci raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; 171cabdff1aSopenharmony_ci tmp = multiply(height == 8 ? &T8 : &T4, raw); 172cabdff1aSopenharmony_ci D = multiply(tmp, width == 8 ? &T8t : &T4t); 173cabdff1aSopenharmony_ci normalise(D); 174cabdff1aSopenharmony_ci divide_and_round_nearest(D, 1); 175cabdff1aSopenharmony_ci for (int i = 0; i < width * height; ++i) { 176cabdff1aSopenharmony_ci if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { 177cabdff1aSopenharmony_ci /* Rare, so simply try again */ 178cabdff1aSopenharmony_ci av_free(raw); 179cabdff1aSopenharmony_ci av_free(tmp); 180cabdff1aSopenharmony_ci av_free(D); 181cabdff1aSopenharmony_ci return generate_inverse_quantized_transform_coefficients(width, height); 182cabdff1aSopenharmony_ci } 183cabdff1aSopenharmony_ci } 184cabdff1aSopenharmony_ci E = multiply(D, width == 8 ? &T8 : &T4); 185cabdff1aSopenharmony_ci divide_and_round_nearest(E, 8); 186cabdff1aSopenharmony_ci for (int i = 0; i < width * height; ++i) 187cabdff1aSopenharmony_ci if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { 188cabdff1aSopenharmony_ci /* Rare, so simply try again */ 189cabdff1aSopenharmony_ci av_free(raw); 190cabdff1aSopenharmony_ci av_free(tmp); 191cabdff1aSopenharmony_ci av_free(D); 192cabdff1aSopenharmony_ci av_free(E); 193cabdff1aSopenharmony_ci return generate_inverse_quantized_transform_coefficients(width, height); 194cabdff1aSopenharmony_ci } 195cabdff1aSopenharmony_ci R = multiply(height == 8 ? &T8t : &T4t, E); 196cabdff1aSopenharmony_ci tweak(R); 197cabdff1aSopenharmony_ci divide_and_round_nearest(R, 128); 198cabdff1aSopenharmony_ci for (int i = 0; i < width * height; ++i) 199cabdff1aSopenharmony_ci if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { 200cabdff1aSopenharmony_ci /* Rare, so simply try again */ 201cabdff1aSopenharmony_ci av_free(raw); 202cabdff1aSopenharmony_ci av_free(tmp); 203cabdff1aSopenharmony_ci av_free(D); 204cabdff1aSopenharmony_ci av_free(E); 205cabdff1aSopenharmony_ci av_free(R); 206cabdff1aSopenharmony_ci return generate_inverse_quantized_transform_coefficients(width, height); 207cabdff1aSopenharmony_ci } 208cabdff1aSopenharmony_ci av_free(raw); 209cabdff1aSopenharmony_ci av_free(tmp); 210cabdff1aSopenharmony_ci av_free(E); 211cabdff1aSopenharmony_ci av_free(R); 212cabdff1aSopenharmony_ci return D; 213cabdff1aSopenharmony_ci} 214cabdff1aSopenharmony_ci 215cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER16(name, size) \ 216cabdff1aSopenharmony_ci do { \ 217cabdff1aSopenharmony_ci int i; \ 218cabdff1aSopenharmony_ci for (i = 0; i < size; ++i) { \ 219cabdff1aSopenharmony_ci uint16_t r = rnd(); \ 220cabdff1aSopenharmony_ci AV_WN16A(name##0 + i, r); \ 221cabdff1aSopenharmony_ci AV_WN16A(name##1 + i, r); \ 222cabdff1aSopenharmony_ci } \ 223cabdff1aSopenharmony_ci } while (0) 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER8(name, size) \ 226cabdff1aSopenharmony_ci do { \ 227cabdff1aSopenharmony_ci int i; \ 228cabdff1aSopenharmony_ci for (i = 0; i < size; ++i) { \ 229cabdff1aSopenharmony_ci uint8_t r = rnd(); \ 230cabdff1aSopenharmony_ci name##0[i] = r; \ 231cabdff1aSopenharmony_ci name##1[i] = r; \ 232cabdff1aSopenharmony_ci } \ 233cabdff1aSopenharmony_ci } while (0) 234cabdff1aSopenharmony_ci 235cabdff1aSopenharmony_ci#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ 236cabdff1aSopenharmony_ci do { \ 237cabdff1aSopenharmony_ci uint8_t *p##0 = name##0, *p##1 = name##1; \ 238cabdff1aSopenharmony_ci int i = (size); \ 239cabdff1aSopenharmony_ci while (i-- > 0) { \ 240cabdff1aSopenharmony_ci int x = 0x80 | (rnd() & 0x7F); \ 241cabdff1aSopenharmony_ci x >>= rnd() % 9; \ 242cabdff1aSopenharmony_ci if (rnd() & 1) \ 243cabdff1aSopenharmony_ci x = -x; \ 244cabdff1aSopenharmony_ci *p##1++ = *p##0++ = 0x80 + x; \ 245cabdff1aSopenharmony_ci } \ 246cabdff1aSopenharmony_ci } while (0) 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_cistatic void check_inv_trans_inplace(void) 249cabdff1aSopenharmony_ci{ 250cabdff1aSopenharmony_ci /* Inverse transform input coefficients are stored in a 16-bit buffer 251cabdff1aSopenharmony_ci * with row stride of 8 coefficients irrespective of transform size. 252cabdff1aSopenharmony_ci * vc1_inv_trans_8x8 differs from the others in two ways: coefficients 253cabdff1aSopenharmony_ci * are stored in column-major order, and the outputs are written back 254cabdff1aSopenharmony_ci * to the input buffer, so we oversize it slightly to catch overruns. */ 255cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); 256cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); 257cabdff1aSopenharmony_ci 258cabdff1aSopenharmony_ci VC1DSPContext h; 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci ff_vc1dsp_init(&h); 261cabdff1aSopenharmony_ci 262cabdff1aSopenharmony_ci if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { 263cabdff1aSopenharmony_ci matrix *coeffs; 264cabdff1aSopenharmony_ci declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *); 265cabdff1aSopenharmony_ci RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); 266cabdff1aSopenharmony_ci coeffs = generate_inverse_quantized_transform_coefficients(8, 8); 267cabdff1aSopenharmony_ci for (int j = 0; j < 8; ++j) 268cabdff1aSopenharmony_ci for (int i = 0; i < 8; ++i) { 269cabdff1aSopenharmony_ci int idx = 8 + i * 8 + j; 270cabdff1aSopenharmony_ci inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; 271cabdff1aSopenharmony_ci } 272cabdff1aSopenharmony_ci call_ref(inv_trans_in0 + 8); 273cabdff1aSopenharmony_ci call_new(inv_trans_in1 + 8); 274cabdff1aSopenharmony_ci if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) 275cabdff1aSopenharmony_ci fail(); 276cabdff1aSopenharmony_ci bench_new(inv_trans_in1 + 8); 277cabdff1aSopenharmony_ci av_free(coeffs); 278cabdff1aSopenharmony_ci } 279cabdff1aSopenharmony_ci} 280cabdff1aSopenharmony_ci 281cabdff1aSopenharmony_cistatic void check_inv_trans_adding(void) 282cabdff1aSopenharmony_ci{ 283cabdff1aSopenharmony_ci /* Inverse transform input coefficients are stored in a 16-bit buffer 284cabdff1aSopenharmony_ci * with row stride of 8 coefficients irrespective of transform size. */ 285cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); 286cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); 287cabdff1aSopenharmony_ci 288cabdff1aSopenharmony_ci /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and 289cabdff1aSopenharmony_ci * added with saturation to an array of unsigned 8-bit values. Oversize 290cabdff1aSopenharmony_ci * this by 8 samples left and right and one row above and below. */ 291cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); 292cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_ci VC1DSPContext h; 295cabdff1aSopenharmony_ci 296cabdff1aSopenharmony_ci const test tests[] = { 297cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) 298cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) 299cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) 300cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) 301cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) 302cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) 303cabdff1aSopenharmony_ci VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) 304cabdff1aSopenharmony_ci }; 305cabdff1aSopenharmony_ci 306cabdff1aSopenharmony_ci ff_vc1dsp_init(&h); 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { 309cabdff1aSopenharmony_ci void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); 310cabdff1aSopenharmony_ci if (check_func(func, "vc1dsp.%s", tests[t].name)) { 311cabdff1aSopenharmony_ci matrix *coeffs; 312cabdff1aSopenharmony_ci declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); 313cabdff1aSopenharmony_ci RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); 314cabdff1aSopenharmony_ci RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); 315cabdff1aSopenharmony_ci coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); 316cabdff1aSopenharmony_ci for (int j = 0; j < tests[t].height; ++j) 317cabdff1aSopenharmony_ci for (int i = 0; i < tests[t].width; ++i) { 318cabdff1aSopenharmony_ci int idx = j * 8 + i; 319cabdff1aSopenharmony_ci inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; 320cabdff1aSopenharmony_ci } 321cabdff1aSopenharmony_ci call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); 322cabdff1aSopenharmony_ci call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); 323cabdff1aSopenharmony_ci if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) 324cabdff1aSopenharmony_ci fail(); 325cabdff1aSopenharmony_ci bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); 326cabdff1aSopenharmony_ci av_free(coeffs); 327cabdff1aSopenharmony_ci } 328cabdff1aSopenharmony_ci } 329cabdff1aSopenharmony_ci} 330cabdff1aSopenharmony_ci 331cabdff1aSopenharmony_cistatic void check_loop_filter(void) 332cabdff1aSopenharmony_ci{ 333cabdff1aSopenharmony_ci /* Deblocking filter buffers are big enough to hold a 16x16 block, 334cabdff1aSopenharmony_ci * plus 16 columns left and 4 rows above to hold filter inputs 335cabdff1aSopenharmony_ci * (depending on whether v or h neighbouring block edge, oversized 336cabdff1aSopenharmony_ci * horizontally to maintain 16-byte alignment) plus 16 columns and 337cabdff1aSopenharmony_ci * 4 rows below to catch write overflows */ 338cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); 339cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_ci VC1DSPContext h; 342cabdff1aSopenharmony_ci 343cabdff1aSopenharmony_ci const test tests[] = { 344cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_v_loop_filter4) 345cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_h_loop_filter4) 346cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_v_loop_filter8) 347cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_h_loop_filter8) 348cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_v_loop_filter16) 349cabdff1aSopenharmony_ci VC1DSP_TEST(vc1_h_loop_filter16) 350cabdff1aSopenharmony_ci }; 351cabdff1aSopenharmony_ci 352cabdff1aSopenharmony_ci ff_vc1dsp_init(&h); 353cabdff1aSopenharmony_ci 354cabdff1aSopenharmony_ci for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { 355cabdff1aSopenharmony_ci void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); 356cabdff1aSopenharmony_ci declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); 357cabdff1aSopenharmony_ci if (check_func(func, "vc1dsp.%s", tests[t].name)) { 358cabdff1aSopenharmony_ci for (int count = 1000; count > 0; --count) { 359cabdff1aSopenharmony_ci int pq = rnd() % 31 + 1; 360cabdff1aSopenharmony_ci RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); 361cabdff1aSopenharmony_ci call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); 362cabdff1aSopenharmony_ci call_new(filter_buf1 + 4 * 48 + 16, 48, pq); 363cabdff1aSopenharmony_ci if (memcmp(filter_buf0, filter_buf1, 24 * 48)) 364cabdff1aSopenharmony_ci fail(); 365cabdff1aSopenharmony_ci } 366cabdff1aSopenharmony_ci } 367cabdff1aSopenharmony_ci for (int j = 0; j < 24; ++j) 368cabdff1aSopenharmony_ci for (int i = 0; i < 48; ++i) 369cabdff1aSopenharmony_ci filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); 370cabdff1aSopenharmony_ci if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) 371cabdff1aSopenharmony_ci bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); 372cabdff1aSopenharmony_ci if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) 373cabdff1aSopenharmony_ci bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); 374cabdff1aSopenharmony_ci } 375cabdff1aSopenharmony_ci} 376cabdff1aSopenharmony_ci 377cabdff1aSopenharmony_ci#define TEST_UNESCAPE \ 378cabdff1aSopenharmony_ci do { \ 379cabdff1aSopenharmony_ci for (int count = 100; count > 0; --count) { \ 380cabdff1aSopenharmony_ci escaped_offset = rnd() & 7; \ 381cabdff1aSopenharmony_ci unescaped_offset = rnd() & 7; \ 382cabdff1aSopenharmony_ci escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ 383cabdff1aSopenharmony_ci RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ 384cabdff1aSopenharmony_ci len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ 385cabdff1aSopenharmony_ci len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ 386cabdff1aSopenharmony_ci if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ 387cabdff1aSopenharmony_ci fail(); \ 388cabdff1aSopenharmony_ci } \ 389cabdff1aSopenharmony_ci } while (0) 390cabdff1aSopenharmony_ci 391cabdff1aSopenharmony_cistatic void check_unescape(void) 392cabdff1aSopenharmony_ci{ 393cabdff1aSopenharmony_ci /* This appears to be a typical length of buffer in use */ 394cabdff1aSopenharmony_ci#define LOG2_UNESCAPE_BUF_SIZE 17 395cabdff1aSopenharmony_ci#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) 396cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); 397cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); 398cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); 399cabdff1aSopenharmony_ci LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); 400cabdff1aSopenharmony_ci 401cabdff1aSopenharmony_ci VC1DSPContext h; 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_ci ff_vc1dsp_init(&h); 404cabdff1aSopenharmony_ci 405cabdff1aSopenharmony_ci if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { 406cabdff1aSopenharmony_ci int len0, len1, escaped_offset, unescaped_offset, escaped_len; 407cabdff1aSopenharmony_ci declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *); 408cabdff1aSopenharmony_ci 409cabdff1aSopenharmony_ci /* Test data which consists of escapes sequences packed as tightly as possible */ 410cabdff1aSopenharmony_ci for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) 411cabdff1aSopenharmony_ci escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); 412cabdff1aSopenharmony_ci TEST_UNESCAPE; 413cabdff1aSopenharmony_ci 414cabdff1aSopenharmony_ci /* Test random data */ 415cabdff1aSopenharmony_ci RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); 416cabdff1aSopenharmony_ci TEST_UNESCAPE; 417cabdff1aSopenharmony_ci 418cabdff1aSopenharmony_ci /* Test data with escape sequences at random intervals */ 419cabdff1aSopenharmony_ci for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { 420cabdff1aSopenharmony_ci int gap, gap_msb; 421cabdff1aSopenharmony_ci escaped1[x+0] = escaped0[x+0] = 0; 422cabdff1aSopenharmony_ci escaped1[x+1] = escaped0[x+1] = 0; 423cabdff1aSopenharmony_ci escaped1[x+2] = escaped0[x+2] = 3; 424cabdff1aSopenharmony_ci escaped1[x+3] = escaped0[x+3] = rnd() & 3; 425cabdff1aSopenharmony_ci gap_msb = 2u << (rnd() % 8); 426cabdff1aSopenharmony_ci gap = (rnd() &~ -gap_msb) | gap_msb; 427cabdff1aSopenharmony_ci x += gap; 428cabdff1aSopenharmony_ci } 429cabdff1aSopenharmony_ci TEST_UNESCAPE; 430cabdff1aSopenharmony_ci 431cabdff1aSopenharmony_ci /* Test data which is known to contain no escape sequences */ 432cabdff1aSopenharmony_ci memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); 433cabdff1aSopenharmony_ci memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); 434cabdff1aSopenharmony_ci TEST_UNESCAPE; 435cabdff1aSopenharmony_ci 436cabdff1aSopenharmony_ci /* Benchmark the no-escape-sequences case */ 437cabdff1aSopenharmony_ci bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); 438cabdff1aSopenharmony_ci } 439cabdff1aSopenharmony_ci} 440cabdff1aSopenharmony_ci 441cabdff1aSopenharmony_civoid checkasm_check_vc1dsp(void) 442cabdff1aSopenharmony_ci{ 443cabdff1aSopenharmony_ci check_inv_trans_inplace(); 444cabdff1aSopenharmony_ci check_inv_trans_adding(); 445cabdff1aSopenharmony_ci report("inv_trans"); 446cabdff1aSopenharmony_ci 447cabdff1aSopenharmony_ci check_loop_filter(); 448cabdff1aSopenharmony_ci report("loop_filter"); 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci check_unescape(); 451cabdff1aSopenharmony_ci report("unescape_buffer"); 452cabdff1aSopenharmony_ci} 453