1/* 2 * Copyright (c) 2015 Henrik Gramner 3 * Copyright (c) 2021 Josh Dekker 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License along 18 * with FFmpeg; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22#include <string.h> 23#include "checkasm.h" 24#include "libavcodec/hevcdsp.h" 25#include "libavutil/common.h" 26#include "libavutil/internal.h" 27#include "libavutil/intreadwrite.h" 28 29static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff }; 30static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff }; 31static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 }; 32static const int weights[] = { 0, 128, 255, -1 }; 33static const int denoms[] = {0, 7, 12, -1 }; 34static const int offsets[] = {0, 255, -1 }; 35 36#define SIZEOF_PIXEL ((bit_depth + 7) / 8) 37#define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE)) 38 39#define randomize_buffers() \ 40 do { \ 41 uint32_t mask = pixel_mask[bit_depth - 8]; \ 42 int k; \ 43 for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \ 44 uint32_t r = rnd() & mask; \ 45 AV_WN32A(buf0 + k, r); \ 46 AV_WN32A(buf1 + k, r); \ 47 if (k >= BUF_SIZE) \ 48 continue; \ 49 r = rnd(); \ 50 AV_WN32A(dst0 + k, r); \ 51 AV_WN32A(dst1 + k, r); \ 52 } \ 53 } while (0) 54 55#define randomize_buffers_ref() \ 56 randomize_buffers(); \ 57 do { \ 58 uint32_t mask = pixel_mask16[bit_depth - 8]; \ 59 int k; \ 60 for (k = 0; k < BUF_SIZE; k += 2) { \ 61 uint32_t r = rnd() & mask; \ 62 AV_WN32A(ref0 + k, r); \ 63 AV_WN32A(ref1 + k, r); \ 64 } \ 65 } while (0) 66 67#define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */ 68#define src1 (buf1 + 2 * 4 * MAX_PB_SIZE) 69 70/* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */ 71#define SRC_EXTRA 8 72 73static void checkasm_check_hevc_qpel(void) 74{ 75 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]); 76 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]); 77 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 78 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 79 80 HEVCDSPContext h; 81 int size, bit_depth, i, j, row; 82 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride, 83 int height, intptr_t mx, intptr_t my, int width); 84 85 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 86 ff_hevc_dsp_init(&h, bit_depth); 87 88 for (i = 0; i < 2; i++) { 89 for (j = 0; j < 2; j++) { 90 for (size = 1; size < 10; size++) { 91 const char *type; 92 switch ((j << 1) | i) { 93 case 0: type = "pel_pixels"; break; // 0 0 94 case 1: type = "qpel_h"; break; // 0 1 95 case 2: type = "qpel_v"; break; // 1 0 96 case 3: type = "qpel_hv"; break; // 1 1 97 } 98 99 if (check_func(h.put_hevc_qpel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 100 int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1; 101 randomize_buffers(); 102 call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 103 call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 104 for (row = 0; row < size[sizes]; row++) { 105 if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL)) 106 fail(); 107 } 108 bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 109 } 110 } 111 } 112 } 113 } 114 report("qpel"); 115} 116 117static void checkasm_check_hevc_qpel_uni(void) 118{ 119 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]); 120 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]); 121 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 122 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 123 124 HEVCDSPContext h; 125 int size, bit_depth, i, j; 126 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 127 int height, intptr_t mx, intptr_t my, int width); 128 129 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 130 ff_hevc_dsp_init(&h, bit_depth); 131 132 for (i = 0; i < 2; i++) { 133 for (j = 0; j < 2; j++) { 134 for (size = 1; size < 10; size++) { 135 const char *type; 136 switch ((j << 1) | i) { 137 case 0: type = "pel_uni_pixels"; break; // 0 0 138 case 1: type = "qpel_uni_h"; break; // 0 1 139 case 2: type = "qpel_uni_v"; break; // 1 0 140 case 3: type = "qpel_uni_hv"; break; // 1 1 141 } 142 143 if (check_func(h.put_hevc_qpel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 144 randomize_buffers(); 145 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 146 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 147 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 148 fail(); 149 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 150 } 151 } 152 } 153 } 154 } 155 report("qpel_uni"); 156} 157 158static void checkasm_check_hevc_qpel_uni_w(void) 159{ 160 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]); 161 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]); 162 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 163 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 164 165 HEVCDSPContext h; 166 int size, bit_depth, i, j; 167 const int *denom, *wx, *ox; 168 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 169 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); 170 171 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 172 ff_hevc_dsp_init(&h, bit_depth); 173 174 for (i = 0; i < 2; i++) { 175 for (j = 0; j < 2; j++) { 176 for (size = 1; size < 10; size++) { 177 const char *type; 178 switch ((j << 1) | i) { 179 case 0: type = "pel_uni_w_pixels"; break; // 0 0 180 case 1: type = "qpel_uni_w_h"; break; // 0 1 181 case 2: type = "qpel_uni_w_v"; break; // 1 0 182 case 3: type = "qpel_uni_w_hv"; break; // 1 1 183 } 184 185 if (check_func(h.put_hevc_qpel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 186 for (denom = denoms; *denom >= 0; denom++) { 187 for (wx = weights; *wx >= 0; wx++) { 188 for (ox = offsets; *ox >= 0; ox++) { 189 randomize_buffers(); 190 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 191 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 192 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 193 fail(); 194 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 195 } 196 } 197 } 198 } 199 } 200 } 201 } 202 } 203 report("qpel_uni_w"); 204} 205 206static void checkasm_check_hevc_qpel_bi(void) 207{ 208 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]); 209 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]); 210 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 211 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 212 LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]); 213 LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]); 214 215 HEVCDSPContext h; 216 int size, bit_depth, i, j; 217 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 218 int16_t *src2, 219 int height, intptr_t mx, intptr_t my, int width); 220 221 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 222 ff_hevc_dsp_init(&h, bit_depth); 223 224 for (i = 0; i < 2; i++) { 225 for (j = 0; j < 2; j++) { 226 for (size = 1; size < 10; size++) { 227 const char *type; 228 switch ((j << 1) | i) { 229 case 0: type = "pel_bi_pixels"; break; // 0 0 230 case 1: type = "qpel_bi_h"; break; // 0 1 231 case 2: type = "qpel_bi_v"; break; // 1 0 232 case 3: type = "qpel_bi_hv"; break; // 1 1 233 } 234 235 if (check_func(h.put_hevc_qpel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 236 randomize_buffers_ref(); 237 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]); 238 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]); 239 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 240 fail(); 241 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]); 242 } 243 } 244 } 245 } 246 } 247 report("qpel_bi"); 248} 249 250static void checkasm_check_hevc_qpel_bi_w(void) 251{ 252 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]); 253 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]); 254 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 255 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 256 LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]); 257 LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]); 258 259 HEVCDSPContext h; 260 int size, bit_depth, i, j; 261 const int *denom, *wx, *ox; 262 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 263 int16_t *src2, 264 int height, int denom, int wx0, int wx1, 265 int ox0, int ox1, intptr_t mx, intptr_t my, int width); 266 267 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 268 ff_hevc_dsp_init(&h, bit_depth); 269 270 for (i = 0; i < 2; i++) { 271 for (j = 0; j < 2; j++) { 272 for (size = 1; size < 10; size++) { 273 const char *type; 274 switch ((j << 1) | i) { 275 case 0: type = "pel_bi_w_pixels"; break; // 0 0 276 case 1: type = "qpel_bi_w_h"; break; // 0 1 277 case 2: type = "qpel_bi_w_v"; break; // 1 0 278 case 3: type = "qpel_bi_w_hv"; break; // 1 1 279 } 280 281 if (check_func(h.put_hevc_qpel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 282 for (denom = denoms; *denom >= 0; denom++) { 283 for (wx = weights; *wx >= 0; wx++) { 284 for (ox = offsets; *ox >= 0; ox++) { 285 randomize_buffers_ref(); 286 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 287 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 288 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 289 fail(); 290 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 291 } 292 } 293 } 294 } 295 } 296 } 297 } 298 } 299 report("qpel_bi_w"); 300} 301 302#undef SRC_EXTRA 303#define SRC_EXTRA 0 304 305static void checkasm_check_hevc_epel(void) 306{ 307 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); 308 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); 309 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 310 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 311 312 HEVCDSPContext h; 313 int size, bit_depth, i, j, row; 314 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride, 315 int height, intptr_t mx, intptr_t my, int width); 316 317 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 318 ff_hevc_dsp_init(&h, bit_depth); 319 320 for (i = 0; i < 2; i++) { 321 for (j = 0; j < 2; j++) { 322 for (size = 1; size < 10; size++) { 323 const char *type; 324 switch ((j << 1) | i) { 325 case 0: type = "pel_pixels"; break; // 0 0 326 case 1: type = "epel_h"; break; // 0 1 327 case 2: type = "epel_v"; break; // 1 0 328 case 3: type = "epel_hv"; break; // 1 1 329 } 330 331 if (check_func(h.put_hevc_epel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 332 int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1; 333 randomize_buffers(); 334 call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 335 call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 336 for (row = 0; row < size[sizes]; row++) { 337 if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL)) 338 fail(); 339 } 340 bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 341 } 342 } 343 } 344 } 345 } 346 report("epel"); 347} 348 349static void checkasm_check_hevc_epel_uni(void) 350{ 351 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); 352 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); 353 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 354 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 355 356 HEVCDSPContext h; 357 int size, bit_depth, i, j; 358 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 359 int height, intptr_t mx, intptr_t my, int width); 360 361 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 362 ff_hevc_dsp_init(&h, bit_depth); 363 364 for (i = 0; i < 2; i++) { 365 for (j = 0; j < 2; j++) { 366 for (size = 1; size < 10; size++) { 367 const char *type; 368 switch ((j << 1) | i) { 369 case 0: type = "pel_uni_pixels"; break; // 0 0 370 case 1: type = "epel_uni_h"; break; // 0 1 371 case 2: type = "epel_uni_v"; break; // 1 0 372 case 3: type = "epel_uni_hv"; break; // 1 1 373 } 374 375 if (check_func(h.put_hevc_epel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 376 randomize_buffers(); 377 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 378 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 379 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 380 fail(); 381 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]); 382 } 383 } 384 } 385 } 386 } 387 report("epel_uni"); 388} 389 390static void checkasm_check_hevc_epel_uni_w(void) 391{ 392 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); 393 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); 394 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 395 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 396 397 HEVCDSPContext h; 398 int size, bit_depth, i, j; 399 const int *denom, *wx, *ox; 400 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 401 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); 402 403 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 404 ff_hevc_dsp_init(&h, bit_depth); 405 406 for (i = 0; i < 2; i++) { 407 for (j = 0; j < 2; j++) { 408 for (size = 1; size < 10; size++) { 409 const char *type; 410 switch ((j << 1) | i) { 411 case 0: type = "pel_uni_w_pixels"; break; // 0 0 412 case 1: type = "epel_uni_w_h"; break; // 0 1 413 case 2: type = "epel_uni_w_v"; break; // 1 0 414 case 3: type = "epel_uni_w_hv"; break; // 1 1 415 } 416 417 if (check_func(h.put_hevc_epel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 418 for (denom = denoms; *denom >= 0; denom++) { 419 for (wx = weights; *wx >= 0; wx++) { 420 for (ox = offsets; *ox >= 0; ox++) { 421 randomize_buffers(); 422 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 423 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 424 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 425 fail(); 426 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]); 427 } 428 } 429 } 430 } 431 } 432 } 433 } 434 } 435 report("epel_uni_w"); 436} 437 438static void checkasm_check_hevc_epel_bi(void) 439{ 440 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); 441 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); 442 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 443 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 444 LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]); 445 LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]); 446 447 HEVCDSPContext h; 448 int size, bit_depth, i, j; 449 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 450 int16_t *src2, 451 int height, intptr_t mx, intptr_t my, int width); 452 453 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 454 ff_hevc_dsp_init(&h, bit_depth); 455 456 for (i = 0; i < 2; i++) { 457 for (j = 0; j < 2; j++) { 458 for (size = 1; size < 10; size++) { 459 const char *type; 460 switch ((j << 1) | i) { 461 case 0: type = "pel_bi_pixels"; break; // 0 0 462 case 1: type = "epel_bi_h"; break; // 0 1 463 case 2: type = "epel_bi_v"; break; // 1 0 464 case 3: type = "epel_bi_hv"; break; // 1 1 465 } 466 467 if (check_func(h.put_hevc_epel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 468 randomize_buffers_ref(); 469 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]); 470 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]); 471 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 472 fail(); 473 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]); 474 } 475 } 476 } 477 } 478 } 479 report("epel_bi"); 480} 481 482static void checkasm_check_hevc_epel_bi_w(void) 483{ 484 LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); 485 LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); 486 LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); 487 LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); 488 LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]); 489 LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]); 490 491 HEVCDSPContext h; 492 int size, bit_depth, i, j; 493 const int *denom, *wx, *ox; 494 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, 495 int16_t *src2, 496 int height, int denom, int wx0, int wx1, 497 int ox0, int ox1, intptr_t mx, intptr_t my, int width); 498 499 for (bit_depth = 8; bit_depth <= 12; bit_depth++) { 500 ff_hevc_dsp_init(&h, bit_depth); 501 502 for (i = 0; i < 2; i++) { 503 for (j = 0; j < 2; j++) { 504 for (size = 1; size < 10; size++) { 505 const char *type; 506 switch ((j << 1) | i) { 507 case 0: type = "pel_bi_w_pixels"; break; // 0 0 508 case 1: type = "epel_bi_w_h"; break; // 0 1 509 case 2: type = "epel_bi_w_v"; break; // 1 0 510 case 3: type = "epel_bi_w_hv"; break; // 1 1 511 } 512 513 if (check_func(h.put_hevc_epel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) { 514 for (denom = denoms; *denom >= 0; denom++) { 515 for (wx = weights; *wx >= 0; wx++) { 516 for (ox = offsets; *ox >= 0; ox++) { 517 randomize_buffers_ref(); 518 call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 519 call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 520 if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL)) 521 fail(); 522 bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]); 523 } 524 } 525 } 526 } 527 } 528 } 529 } 530 } 531 report("epel_bi_w"); 532} 533 534void checkasm_check_hevc_pel(void) 535{ 536 checkasm_check_hevc_qpel(); 537 checkasm_check_hevc_qpel_uni(); 538 checkasm_check_hevc_qpel_uni_w(); 539 checkasm_check_hevc_qpel_bi(); 540 checkasm_check_hevc_qpel_bi_w(); 541 checkasm_check_hevc_epel(); 542 checkasm_check_hevc_epel_uni(); 543 checkasm_check_hevc_epel_uni_w(); 544 checkasm_check_hevc_epel_bi(); 545 checkasm_check_hevc_epel_bi_w(); 546} 547