1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/pixdesc.h"
26 
27 #include "libswscale/swscale.h"
28 #include "libswscale/swscale_internal.h"
29 
30 #include "checkasm.h"
31 
32 #define randomize_buffers(buf, size)      \
33     do {                                  \
34         int j;                            \
35         for (j = 0; j < size; j+=4)       \
36             AV_WN32(buf + j, rnd());      \
37     } while (0)
38 
39 static const int planar_fmts[] = {
40     AV_PIX_FMT_GBRP,
41     AV_PIX_FMT_GBRP9BE,
42     AV_PIX_FMT_GBRP9LE,
43     AV_PIX_FMT_GBRP10BE,
44     AV_PIX_FMT_GBRP10LE,
45     AV_PIX_FMT_GBRP12BE,
46     AV_PIX_FMT_GBRP12LE,
47     AV_PIX_FMT_GBRP14BE,
48     AV_PIX_FMT_GBRP14LE,
49     AV_PIX_FMT_GBRAP,
50     AV_PIX_FMT_GBRAP10BE,
51     AV_PIX_FMT_GBRAP10LE,
52     AV_PIX_FMT_GBRAP12BE,
53     AV_PIX_FMT_GBRAP12LE,
54     AV_PIX_FMT_GBRP16BE,
55     AV_PIX_FMT_GBRP16LE,
56     AV_PIX_FMT_GBRAP16BE,
57     AV_PIX_FMT_GBRAP16LE,
58     AV_PIX_FMT_GBRPF32BE,
59     AV_PIX_FMT_GBRPF32LE,
60     AV_PIX_FMT_GBRAPF32BE,
61     AV_PIX_FMT_GBRAPF32LE
62 };
63 
check_output_yuv2gbrp(void)64 static void check_output_yuv2gbrp(void)
65 {
66     struct SwsContext *ctx;
67     const AVPixFmtDescriptor *desc;
68     int fmi, fsi, isi, i;
69     int dstW, byte_size, luma_filter_size, chr_filter_size;
70 #define LARGEST_FILTER 16
71 #define FILTER_SIZES 4
72     static const int filter_sizes[] = {1, 4, 8, 16};
73 #define LARGEST_INPUT_SIZE 512
74 #define INPUT_SIZES 6
75     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
76     uint8_t *dst0[4];
77     uint8_t *dst1[4];
78 
79     declare_func(void, void *c, const int16_t *lumFilter,
80                        const int16_t **lumSrcx, int lumFilterSize,
81                        const int16_t *chrFilter, const int16_t **chrUSrcx,
82                        const int16_t **chrVSrcx, int chrFilterSize,
83                        const int16_t **alpSrcx, uint8_t **dest,
84                        int dstW, int y);
85 
86     const int16_t *luma[LARGEST_FILTER];
87     const int16_t *chru[LARGEST_FILTER];
88     const int16_t *chrv[LARGEST_FILTER];
89     const int16_t *alpha[LARGEST_FILTER];
90 
91     LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]);
92     LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]);
93 
94     LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
95     LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
96     LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
97     LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
98 
99     LOCAL_ALIGNED_8(uint8_t, dst0_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
100     LOCAL_ALIGNED_8(uint8_t, dst0_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
101     LOCAL_ALIGNED_8(uint8_t, dst0_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
102     LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
103 
104     LOCAL_ALIGNED_8(uint8_t, dst1_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
105     LOCAL_ALIGNED_8(uint8_t, dst1_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
106     LOCAL_ALIGNED_8(uint8_t, dst1_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
107     LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
108 
109     randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
110     randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
111     randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
112     randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
113     randomize_buffers((uint8_t*)luma_filter, LARGEST_FILTER * sizeof(int16_t));
114     randomize_buffers((uint8_t*)chr_filter, LARGEST_FILTER * sizeof(int16_t));
115 
116     dst0[0] = (uint8_t*)dst0_g;
117     dst0[1] = (uint8_t*)dst0_b;
118     dst0[2] = (uint8_t*)dst0_r;
119     dst0[3] = (uint8_t*)dst0_a;
120 
121     dst1[0] = (uint8_t*)dst1_g;
122     dst1[1] = (uint8_t*)dst1_b;
123     dst1[2] = (uint8_t*)dst1_r;
124     dst1[3] = (uint8_t*)dst1_a;
125 
126     for (i = 0; i < LARGEST_FILTER; i++) {
127         luma[i] =  (int16_t *)(src_y + i*LARGEST_INPUT_SIZE);
128         chru[i] =  (int16_t *)(src_u + i*LARGEST_INPUT_SIZE);
129         chrv[i] =  (int16_t *)(src_v + i*LARGEST_INPUT_SIZE);
130         alpha[i] = (int16_t *)(src_a + i*LARGEST_INPUT_SIZE);
131     }
132 
133     ctx = sws_alloc_context();
134     if (sws_init_context(ctx, NULL, NULL) < 0)
135         fail();
136 
137     ctx->flags |= SWS_FULL_CHR_H_INT;
138     ctx->yuv2rgb_y_offset  = rnd();
139     ctx->yuv2rgb_y_coeff   = rnd();
140     ctx->yuv2rgb_v2r_coeff = rnd();
141     ctx->yuv2rgb_v2g_coeff = rnd();
142     ctx->yuv2rgb_u2g_coeff = rnd();
143     ctx->yuv2rgb_u2b_coeff = rnd();
144 
145     for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
146         for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
147             for (isi = 0; isi < INPUT_SIZES; isi++ ) {
148                 desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
149                 ctx->dstFormat = planar_fmts[fmi];
150 
151                 dstW = input_sizes[isi];
152                 luma_filter_size = filter_sizes[fsi];
153                 chr_filter_size = filter_sizes[fsi];
154 
155                 if (desc->comp[0].depth > 16) {
156                     byte_size = 4;
157                 } else if (desc->comp[0].depth > 8) {
158                     byte_size = 2;
159                 } else {
160                     byte_size = 1;
161                 }
162 
163                 ff_sws_init_scale(ctx);
164                 if (check_func(ctx->yuv2anyX, "yuv2%s_full_X_%d_%d", desc->name, luma_filter_size, dstW)) {
165                     for (i = 0; i < 4; i ++) {
166                         memset(dst0[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
167                         memset(dst1[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
168                     }
169 
170                     call_ref(ctx, luma_filter, luma, luma_filter_size,
171                              chr_filter, chru, chrv, chr_filter_size,
172                              alpha, dst0, dstW, 0);
173                     call_new(ctx, luma_filter, luma, luma_filter_size,
174                              chr_filter, chru, chrv, chr_filter_size,
175                              alpha, dst1, dstW, 0);
176 
177                     if (memcmp(dst0[0], dst1[0], dstW * byte_size) ||
178                         memcmp(dst0[1], dst1[1], dstW * byte_size) ||
179                         memcmp(dst0[2], dst1[2], dstW * byte_size) ||
180                         memcmp(dst0[3], dst1[3], dstW * byte_size) )
181                         fail();
182 
183                     bench_new(ctx, luma_filter, luma, luma_filter_size,
184                               chr_filter, chru, chrv, chr_filter_size,
185                               alpha, dst1, dstW, 0);
186                 }
187             }
188         }
189     }
190     sws_freeContext(ctx);
191 }
192 
193 #undef LARGEST_INPUT_SIZE
194 #undef INPUT_SIZES
195 
check_input_planar_rgb_to_y(void)196 static void check_input_planar_rgb_to_y(void)
197 {
198     struct SwsContext *ctx;
199     const AVPixFmtDescriptor *desc;
200     int fmi, isi;
201     int dstW, byte_size;
202 #define LARGEST_INPUT_SIZE 512
203 #define INPUT_SIZES 6
204     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
205     uint8_t *src[4];
206     int32_t rgb2yuv[9] = {0};
207 
208     declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
209 
210     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
211     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
212     LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
213     LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
214 
215     LOCAL_ALIGNED_8(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
216     LOCAL_ALIGNED_8(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
217 
218     randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
219     randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
220     randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
221     randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
222     randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
223 
224     src[0] = (uint8_t*)src_g;
225     src[1] = (uint8_t*)src_b;
226     src[2] = (uint8_t*)src_r;
227     src[3] = (uint8_t*)src_a;
228 
229     ctx = sws_alloc_context();
230     if (sws_init_context(ctx, NULL, NULL) < 0)
231         fail();
232 
233     for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
234         for (isi = 0; isi < INPUT_SIZES; isi++ ) {
235             desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
236             ctx->srcFormat = planar_fmts[fmi];
237             ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
238             byte_size = 2;
239             dstW = input_sizes[isi];
240 
241             ff_sws_init_scale(ctx);
242             if(check_func(ctx->readLumPlanar, "planar_%s_to_y_%d",  desc->name, dstW)) {
243                 memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
244                 memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
245 
246                 call_ref(dst0_y, src, dstW, rgb2yuv);
247                 call_new(dst1_y, src, dstW, rgb2yuv);
248 
249                 if (memcmp(dst0_y, dst1_y, dstW * byte_size))
250                     fail();
251 
252                 bench_new(dst1_y, src, dstW, rgb2yuv);
253 
254             }
255         }
256     }
257     sws_freeContext(ctx);
258 }
259 
260 #undef LARGEST_INPUT_SIZE
261 #undef INPUT_SIZES
262 
check_input_planar_rgb_to_uv(void)263 static void check_input_planar_rgb_to_uv(void)
264 {
265     struct SwsContext *ctx;
266     const AVPixFmtDescriptor *desc;
267     int fmi, isi;
268     int dstW, byte_size;
269 #define LARGEST_INPUT_SIZE 512
270 #define INPUT_SIZES 6
271     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
272     uint8_t *src[4];
273     int32_t rgb2yuv[9] = {0};
274 
275     declare_func(void, uint8_t *dstU, uint8_t *dstV,
276                        uint8_t *src[4], int w, int32_t *rgb2yuv);
277 
278     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
279     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
280     LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
281     LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
282 
283     LOCAL_ALIGNED_8(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
284     LOCAL_ALIGNED_8(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
285 
286     LOCAL_ALIGNED_8(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
287     LOCAL_ALIGNED_8(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
288 
289     randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
290     randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
291     randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
292     randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
293     randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
294 
295     src[0] = (uint8_t*)src_g;
296     src[1] = (uint8_t*)src_b;
297     src[2] = (uint8_t*)src_r;
298     src[3] = (uint8_t*)src_a;
299 
300     ctx = sws_alloc_context();
301     if (sws_init_context(ctx, NULL, NULL) < 0)
302         fail();
303 
304     for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
305         for (isi = 0; isi < INPUT_SIZES; isi++ ) {
306             desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
307             ctx->srcFormat = planar_fmts[fmi];
308             ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
309             byte_size = 2;
310             dstW = input_sizes[isi];
311 
312             ff_sws_init_scale(ctx);
313             if(check_func(ctx->readChrPlanar, "planar_%s_to_uv_%d",  desc->name, dstW)) {
314                 memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
315                 memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
316                 memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
317                 memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
318 
319                 call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv);
320                 call_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
321 
322                 if (memcmp(dst0_u, dst1_u, dstW * byte_size) ||
323                     memcmp(dst0_v, dst1_v, dstW * byte_size))
324                     fail();
325 
326                 bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
327             }
328         }
329     }
330     sws_freeContext(ctx);
331 }
332 
333 #undef LARGEST_INPUT_SIZE
334 #undef INPUT_SIZES
335 
check_input_planar_rgb_to_a(void)336 static void check_input_planar_rgb_to_a(void)
337 {
338     struct SwsContext *ctx;
339     const AVPixFmtDescriptor *desc;
340     int fmi, isi;
341     int dstW, byte_size;
342 #define LARGEST_INPUT_SIZE 512
343 #define INPUT_SIZES 6
344     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
345     uint8_t *src[4];
346     int32_t rgb2yuv[9] = {0};
347 
348     declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
349 
350     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
351     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
352     LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
353     LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
354 
355     LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
356     LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
357 
358     randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
359     randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
360     randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
361     randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
362     randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
363 
364     src[0] = (uint8_t*)src_g;
365     src[1] = (uint8_t*)src_b;
366     src[2] = (uint8_t*)src_r;
367     src[3] = (uint8_t*)src_a;
368 
369     ctx = sws_alloc_context();
370     if (sws_init_context(ctx, NULL, NULL) < 0)
371         fail();
372 
373     for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
374         for (isi = 0; isi < INPUT_SIZES; isi++ ) {
375             desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
376             if (!(desc->flags & AV_PIX_FMT_FLAG_ALPHA))
377                 continue;
378 
379             ctx->srcFormat = planar_fmts[fmi];
380             ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
381             byte_size = 2;
382             dstW = input_sizes[isi];
383 
384             ff_sws_init_scale(ctx);
385             if(check_func(ctx->readAlpPlanar, "planar_%s_to_a_%d",  desc->name, dstW)) {
386                 memset(dst0_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
387                 memset(dst1_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
388 
389                 call_ref(dst0_a, src, dstW, rgb2yuv);
390                 call_new(dst1_a, src, dstW, rgb2yuv);
391 
392                 if (memcmp(dst0_a, dst1_a, dstW * byte_size))
393                     fail();
394                 bench_new(dst1_a, src, dstW, rgb2yuv);
395             }
396         }
397     }
398     sws_freeContext(ctx);
399 }
400 
checkasm_check_sw_gbrp(void)401 void checkasm_check_sw_gbrp(void)
402 {
403     check_output_yuv2gbrp();
404     report("output_yuv2gbrp");
405 
406     check_input_planar_rgb_to_y();
407     report("input_planar_rgb_y");
408 
409     check_input_planar_rgb_to_uv();
410     report("input_planar_rgb_uv");
411 
412     check_input_planar_rgb_to_a();
413     report("input_planar_rgb_a");
414 }
415