1/*
2 * Copyright (c) 2015 Henrik Gramner
3 * Copyright (c) 2021 Josh Dekker
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <string.h>
23#include "checkasm.h"
24#include "libavcodec/hevcdsp.h"
25#include "libavutil/common.h"
26#include "libavutil/internal.h"
27#include "libavutil/intreadwrite.h"
28
29static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
30static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
31static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
32static const int weights[] = { 0, 128, 255, -1 };
33static const int denoms[] = {0, 7, 12, -1 };
34static const int offsets[] = {0, 255, -1 };
35
36#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37#define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
38
39#define randomize_buffers()                          \
40    do {                                             \
41        uint32_t mask = pixel_mask[bit_depth - 8];   \
42        int k;                                       \
43        for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
44            uint32_t r = rnd() & mask;               \
45            AV_WN32A(buf0 + k, r);                   \
46            AV_WN32A(buf1 + k, r);                   \
47            if (k >= BUF_SIZE)                       \
48                continue;                            \
49            r = rnd();                               \
50            AV_WN32A(dst0 + k, r);                   \
51            AV_WN32A(dst1 + k, r);                   \
52        }                                            \
53    } while (0)
54
55#define randomize_buffers_ref()                      \
56    randomize_buffers();                             \
57    do {                                             \
58        uint32_t mask = pixel_mask16[bit_depth - 8]; \
59        int k;                                       \
60        for (k = 0; k < BUF_SIZE; k += 2) {          \
61            uint32_t r = rnd() & mask;               \
62            AV_WN32A(ref0 + k, r);                   \
63            AV_WN32A(ref1 + k, r);                   \
64        }                                            \
65    } while (0)
66
67#define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
68#define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
69
70/* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */
71#define SRC_EXTRA 8
72
73static void checkasm_check_hevc_qpel(void)
74{
75    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
76    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
77    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
78    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
79
80    HEVCDSPContext h;
81    int size, bit_depth, i, j, row;
82    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
83                                                                  int height, intptr_t mx, intptr_t my, int width);
84
85    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
86        ff_hevc_dsp_init(&h, bit_depth);
87
88        for (i = 0; i < 2; i++) {
89            for (j = 0; j < 2; j++) {
90                for (size = 1; size < 10; size++) {
91                    const char *type;
92                    switch ((j << 1) | i) {
93                    case 0: type = "pel_pixels"; break; // 0 0
94                    case 1: type = "qpel_h"; break; // 0 1
95                    case 2: type = "qpel_v"; break; // 1 0
96                    case 3: type = "qpel_hv"; break; // 1 1
97                    }
98
99                    if (check_func(h.put_hevc_qpel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
100                        int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
101                        randomize_buffers();
102                        call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
103                        call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
104                        for (row = 0; row < size[sizes]; row++) {
105                            if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
106                                fail();
107                        }
108                        bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
109                    }
110                }
111            }
112        }
113    }
114    report("qpel");
115}
116
117static void checkasm_check_hevc_qpel_uni(void)
118{
119    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
120    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
121    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
122    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
123
124    HEVCDSPContext h;
125    int size, bit_depth, i, j;
126    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
127                                                                  int height, intptr_t mx, intptr_t my, int width);
128
129    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
130        ff_hevc_dsp_init(&h, bit_depth);
131
132        for (i = 0; i < 2; i++) {
133            for (j = 0; j < 2; j++) {
134                for (size = 1; size < 10; size++) {
135                    const char *type;
136                    switch ((j << 1) | i) {
137                    case 0: type = "pel_uni_pixels"; break; // 0 0
138                    case 1: type = "qpel_uni_h"; break; // 0 1
139                    case 2: type = "qpel_uni_v"; break; // 1 0
140                    case 3: type = "qpel_uni_hv"; break; // 1 1
141                    }
142
143                    if (check_func(h.put_hevc_qpel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
144                        randomize_buffers();
145                        call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
146                        call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
147                        if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
148                            fail();
149                        bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
150                    }
151                }
152            }
153        }
154    }
155    report("qpel_uni");
156}
157
158static void checkasm_check_hevc_qpel_uni_w(void)
159{
160    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
161    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
162    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
163    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
164
165    HEVCDSPContext h;
166    int size, bit_depth, i, j;
167    const int *denom, *wx, *ox;
168    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
169                                                                  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
170
171    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
172        ff_hevc_dsp_init(&h, bit_depth);
173
174        for (i = 0; i < 2; i++) {
175            for (j = 0; j < 2; j++) {
176                for (size = 1; size < 10; size++) {
177                    const char *type;
178                    switch ((j << 1) | i) {
179                    case 0: type = "pel_uni_w_pixels"; break; // 0 0
180                    case 1: type = "qpel_uni_w_h"; break; // 0 1
181                    case 2: type = "qpel_uni_w_v"; break; // 1 0
182                    case 3: type = "qpel_uni_w_hv"; break; // 1 1
183                    }
184
185                    if (check_func(h.put_hevc_qpel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
186                        for (denom = denoms; *denom >= 0; denom++) {
187                            for (wx = weights; *wx >= 0; wx++) {
188                                for (ox = offsets; *ox >= 0; ox++) {
189                                    randomize_buffers();
190                                    call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
191                                    call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
192                                    if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
193                                        fail();
194                                    bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
195                                }
196                            }
197                        }
198                    }
199                }
200            }
201        }
202    }
203    report("qpel_uni_w");
204}
205
206static void checkasm_check_hevc_qpel_bi(void)
207{
208    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
209    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
210    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
211    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
212    LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
213    LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
214
215    HEVCDSPContext h;
216    int size, bit_depth, i, j;
217    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
218                                                                  int16_t *src2,
219                                                                  int height, intptr_t mx, intptr_t my, int width);
220
221    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
222        ff_hevc_dsp_init(&h, bit_depth);
223
224        for (i = 0; i < 2; i++) {
225            for (j = 0; j < 2; j++) {
226                for (size = 1; size < 10; size++) {
227                    const char *type;
228                    switch ((j << 1) | i) {
229                    case 0: type = "pel_bi_pixels"; break; // 0 0
230                    case 1: type = "qpel_bi_h"; break; // 0 1
231                    case 2: type = "qpel_bi_v"; break; // 1 0
232                    case 3: type = "qpel_bi_hv"; break; // 1 1
233                    }
234
235                    if (check_func(h.put_hevc_qpel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
236                        randomize_buffers_ref();
237                        call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
238                        call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
239                        if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
240                            fail();
241                        bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
242                    }
243                }
244            }
245        }
246    }
247    report("qpel_bi");
248}
249
250static void checkasm_check_hevc_qpel_bi_w(void)
251{
252    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
253    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
254    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
255    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
256    LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
257    LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
258
259    HEVCDSPContext h;
260    int size, bit_depth, i, j;
261    const int *denom, *wx, *ox;
262    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
263                                                                  int16_t *src2,
264                                                                  int height, int denom, int wx0, int wx1,
265                                                                  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
266
267    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
268        ff_hevc_dsp_init(&h, bit_depth);
269
270        for (i = 0; i < 2; i++) {
271            for (j = 0; j < 2; j++) {
272                for (size = 1; size < 10; size++) {
273                    const char *type;
274                    switch ((j << 1) | i) {
275                    case 0: type = "pel_bi_w_pixels"; break; // 0 0
276                    case 1: type = "qpel_bi_w_h"; break; // 0 1
277                    case 2: type = "qpel_bi_w_v"; break; // 1 0
278                    case 3: type = "qpel_bi_w_hv"; break; // 1 1
279                    }
280
281                    if (check_func(h.put_hevc_qpel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
282                        for (denom = denoms; *denom >= 0; denom++) {
283                            for (wx = weights; *wx >= 0; wx++) {
284                                for (ox = offsets; *ox >= 0; ox++) {
285                                    randomize_buffers_ref();
286                                    call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
287                                    call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
288                                    if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
289                                        fail();
290                                    bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
291                                }
292                            }
293                        }
294                    }
295                }
296            }
297        }
298    }
299    report("qpel_bi_w");
300}
301
302#undef SRC_EXTRA
303#define SRC_EXTRA 0
304
305static void checkasm_check_hevc_epel(void)
306{
307    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
308    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
309    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
310    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
311
312    HEVCDSPContext h;
313    int size, bit_depth, i, j, row;
314    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
315                                                                  int height, intptr_t mx, intptr_t my, int width);
316
317    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
318        ff_hevc_dsp_init(&h, bit_depth);
319
320        for (i = 0; i < 2; i++) {
321            for (j = 0; j < 2; j++) {
322                for (size = 1; size < 10; size++) {
323                    const char *type;
324                    switch ((j << 1) | i) {
325                    case 0: type = "pel_pixels"; break; // 0 0
326                    case 1: type = "epel_h"; break; // 0 1
327                    case 2: type = "epel_v"; break; // 1 0
328                    case 3: type = "epel_hv"; break; // 1 1
329                    }
330
331                    if (check_func(h.put_hevc_epel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
332                        int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
333                        randomize_buffers();
334                        call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
335                        call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
336                        for (row = 0; row < size[sizes]; row++) {
337                            if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
338                                fail();
339                        }
340                        bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
341                    }
342                }
343            }
344        }
345    }
346    report("epel");
347}
348
349static void checkasm_check_hevc_epel_uni(void)
350{
351    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
352    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
353    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
354    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
355
356    HEVCDSPContext h;
357    int size, bit_depth, i, j;
358    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
359                                                                  int height, intptr_t mx, intptr_t my, int width);
360
361    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
362        ff_hevc_dsp_init(&h, bit_depth);
363
364        for (i = 0; i < 2; i++) {
365            for (j = 0; j < 2; j++) {
366                for (size = 1; size < 10; size++) {
367                    const char *type;
368                    switch ((j << 1) | i) {
369                    case 0: type = "pel_uni_pixels"; break; // 0 0
370                    case 1: type = "epel_uni_h"; break; // 0 1
371                    case 2: type = "epel_uni_v"; break; // 1 0
372                    case 3: type = "epel_uni_hv"; break; // 1 1
373                    }
374
375                    if (check_func(h.put_hevc_epel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
376                        randomize_buffers();
377                        call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
378                        call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
379                        if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
380                            fail();
381                        bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
382                    }
383                }
384            }
385        }
386    }
387    report("epel_uni");
388}
389
390static void checkasm_check_hevc_epel_uni_w(void)
391{
392    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
393    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
394    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
395    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
396
397    HEVCDSPContext h;
398    int size, bit_depth, i, j;
399    const int *denom, *wx, *ox;
400    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
401                                                                  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
402
403    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
404        ff_hevc_dsp_init(&h, bit_depth);
405
406        for (i = 0; i < 2; i++) {
407            for (j = 0; j < 2; j++) {
408                for (size = 1; size < 10; size++) {
409                    const char *type;
410                    switch ((j << 1) | i) {
411                    case 0: type = "pel_uni_w_pixels"; break; // 0 0
412                    case 1: type = "epel_uni_w_h"; break; // 0 1
413                    case 2: type = "epel_uni_w_v"; break; // 1 0
414                    case 3: type = "epel_uni_w_hv"; break; // 1 1
415                    }
416
417                    if (check_func(h.put_hevc_epel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
418                        for (denom = denoms; *denom >= 0; denom++) {
419                            for (wx = weights; *wx >= 0; wx++) {
420                                for (ox = offsets; *ox >= 0; ox++) {
421                                    randomize_buffers();
422                                    call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
423                                    call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
424                                    if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
425                                        fail();
426                                    bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
427                                }
428                            }
429                        }
430                    }
431                }
432            }
433        }
434    }
435    report("epel_uni_w");
436}
437
438static void checkasm_check_hevc_epel_bi(void)
439{
440    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
441    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
442    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
443    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
444    LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
445    LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
446
447    HEVCDSPContext h;
448    int size, bit_depth, i, j;
449    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
450                                                                  int16_t *src2,
451                                                                  int height, intptr_t mx, intptr_t my, int width);
452
453    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
454        ff_hevc_dsp_init(&h, bit_depth);
455
456        for (i = 0; i < 2; i++) {
457            for (j = 0; j < 2; j++) {
458                for (size = 1; size < 10; size++) {
459                    const char *type;
460                    switch ((j << 1) | i) {
461                    case 0: type = "pel_bi_pixels"; break; // 0 0
462                    case 1: type = "epel_bi_h"; break; // 0 1
463                    case 2: type = "epel_bi_v"; break; // 1 0
464                    case 3: type = "epel_bi_hv"; break; // 1 1
465                    }
466
467                    if (check_func(h.put_hevc_epel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
468                        randomize_buffers_ref();
469                        call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
470                        call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
471                        if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
472                            fail();
473                        bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
474                    }
475                }
476            }
477        }
478    }
479    report("epel_bi");
480}
481
482static void checkasm_check_hevc_epel_bi_w(void)
483{
484    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
485    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
486    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
487    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
488    LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
489    LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
490
491    HEVCDSPContext h;
492    int size, bit_depth, i, j;
493    const int *denom, *wx, *ox;
494    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
495                                                                  int16_t *src2,
496                                                                  int height, int denom, int wx0, int wx1,
497                                                                  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
498
499    for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
500        ff_hevc_dsp_init(&h, bit_depth);
501
502        for (i = 0; i < 2; i++) {
503            for (j = 0; j < 2; j++) {
504                for (size = 1; size < 10; size++) {
505                    const char *type;
506                    switch ((j << 1) | i) {
507                    case 0: type = "pel_bi_w_pixels"; break; // 0 0
508                    case 1: type = "epel_bi_w_h"; break; // 0 1
509                    case 2: type = "epel_bi_w_v"; break; // 1 0
510                    case 3: type = "epel_bi_w_hv"; break; // 1 1
511                    }
512
513                    if (check_func(h.put_hevc_epel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
514                        for (denom = denoms; *denom >= 0; denom++) {
515                            for (wx = weights; *wx >= 0; wx++) {
516                                for (ox = offsets; *ox >= 0; ox++) {
517                                    randomize_buffers_ref();
518                                    call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
519                                    call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
520                                    if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
521                                        fail();
522                                    bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
523                                }
524                            }
525                        }
526                    }
527                }
528            }
529        }
530    }
531    report("epel_bi_w");
532}
533
534void checkasm_check_hevc_pel(void)
535{
536    checkasm_check_hevc_qpel();
537    checkasm_check_hevc_qpel_uni();
538    checkasm_check_hevc_qpel_uni_w();
539    checkasm_check_hevc_qpel_bi();
540    checkasm_check_hevc_qpel_bi_w();
541    checkasm_check_hevc_epel();
542    checkasm_check_hevc_epel_uni();
543    checkasm_check_hevc_epel_uni_w();
544    checkasm_check_hevc_epel_bi();
545    checkasm_check_hevc_epel_bi_w();
546}
547