1 /*
2 *
3 * This file is part of FFmpeg.
4 *
5 * FFmpeg is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * FFmpeg is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <string.h>
21
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/pixdesc.h"
26
27 #include "libswscale/swscale.h"
28 #include "libswscale/swscale_internal.h"
29
30 #include "checkasm.h"
31
32 #define randomize_buffers(buf, size) \
33 do { \
34 int j; \
35 for (j = 0; j < size; j+=4) \
36 AV_WN32(buf + j, rnd()); \
37 } while (0)
38
39 static const int planar_fmts[] = {
40 AV_PIX_FMT_GBRP,
41 AV_PIX_FMT_GBRP9BE,
42 AV_PIX_FMT_GBRP9LE,
43 AV_PIX_FMT_GBRP10BE,
44 AV_PIX_FMT_GBRP10LE,
45 AV_PIX_FMT_GBRP12BE,
46 AV_PIX_FMT_GBRP12LE,
47 AV_PIX_FMT_GBRP14BE,
48 AV_PIX_FMT_GBRP14LE,
49 AV_PIX_FMT_GBRAP,
50 AV_PIX_FMT_GBRAP10BE,
51 AV_PIX_FMT_GBRAP10LE,
52 AV_PIX_FMT_GBRAP12BE,
53 AV_PIX_FMT_GBRAP12LE,
54 AV_PIX_FMT_GBRP16BE,
55 AV_PIX_FMT_GBRP16LE,
56 AV_PIX_FMT_GBRAP16BE,
57 AV_PIX_FMT_GBRAP16LE,
58 AV_PIX_FMT_GBRPF32BE,
59 AV_PIX_FMT_GBRPF32LE,
60 AV_PIX_FMT_GBRAPF32BE,
61 AV_PIX_FMT_GBRAPF32LE
62 };
63
check_output_yuv2gbrp(void)64 static void check_output_yuv2gbrp(void)
65 {
66 struct SwsContext *ctx;
67 const AVPixFmtDescriptor *desc;
68 int fmi, fsi, isi, i;
69 int dstW, byte_size, luma_filter_size, chr_filter_size;
70 #define LARGEST_FILTER 16
71 #define FILTER_SIZES 4
72 static const int filter_sizes[] = {1, 4, 8, 16};
73 #define LARGEST_INPUT_SIZE 512
74 #define INPUT_SIZES 6
75 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
76 uint8_t *dst0[4];
77 uint8_t *dst1[4];
78
79 declare_func(void, void *c, const int16_t *lumFilter,
80 const int16_t **lumSrcx, int lumFilterSize,
81 const int16_t *chrFilter, const int16_t **chrUSrcx,
82 const int16_t **chrVSrcx, int chrFilterSize,
83 const int16_t **alpSrcx, uint8_t **dest,
84 int dstW, int y);
85
86 const int16_t *luma[LARGEST_FILTER];
87 const int16_t *chru[LARGEST_FILTER];
88 const int16_t *chrv[LARGEST_FILTER];
89 const int16_t *alpha[LARGEST_FILTER];
90
91 LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]);
92 LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]);
93
94 LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
95 LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
96 LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
97 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
98
99 LOCAL_ALIGNED_8(uint8_t, dst0_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
100 LOCAL_ALIGNED_8(uint8_t, dst0_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
101 LOCAL_ALIGNED_8(uint8_t, dst0_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
102 LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
103
104 LOCAL_ALIGNED_8(uint8_t, dst1_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
105 LOCAL_ALIGNED_8(uint8_t, dst1_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
106 LOCAL_ALIGNED_8(uint8_t, dst1_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
107 LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
108
109 randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
110 randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
111 randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
112 randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
113 randomize_buffers((uint8_t*)luma_filter, LARGEST_FILTER * sizeof(int16_t));
114 randomize_buffers((uint8_t*)chr_filter, LARGEST_FILTER * sizeof(int16_t));
115
116 dst0[0] = (uint8_t*)dst0_g;
117 dst0[1] = (uint8_t*)dst0_b;
118 dst0[2] = (uint8_t*)dst0_r;
119 dst0[3] = (uint8_t*)dst0_a;
120
121 dst1[0] = (uint8_t*)dst1_g;
122 dst1[1] = (uint8_t*)dst1_b;
123 dst1[2] = (uint8_t*)dst1_r;
124 dst1[3] = (uint8_t*)dst1_a;
125
126 for (i = 0; i < LARGEST_FILTER; i++) {
127 luma[i] = (int16_t *)(src_y + i*LARGEST_INPUT_SIZE);
128 chru[i] = (int16_t *)(src_u + i*LARGEST_INPUT_SIZE);
129 chrv[i] = (int16_t *)(src_v + i*LARGEST_INPUT_SIZE);
130 alpha[i] = (int16_t *)(src_a + i*LARGEST_INPUT_SIZE);
131 }
132
133 ctx = sws_alloc_context();
134 if (sws_init_context(ctx, NULL, NULL) < 0)
135 fail();
136
137 ctx->flags |= SWS_FULL_CHR_H_INT;
138 ctx->yuv2rgb_y_offset = rnd();
139 ctx->yuv2rgb_y_coeff = rnd();
140 ctx->yuv2rgb_v2r_coeff = rnd();
141 ctx->yuv2rgb_v2g_coeff = rnd();
142 ctx->yuv2rgb_u2g_coeff = rnd();
143 ctx->yuv2rgb_u2b_coeff = rnd();
144
145 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
146 for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
147 for (isi = 0; isi < INPUT_SIZES; isi++ ) {
148 desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
149 ctx->dstFormat = planar_fmts[fmi];
150
151 dstW = input_sizes[isi];
152 luma_filter_size = filter_sizes[fsi];
153 chr_filter_size = filter_sizes[fsi];
154
155 if (desc->comp[0].depth > 16) {
156 byte_size = 4;
157 } else if (desc->comp[0].depth > 8) {
158 byte_size = 2;
159 } else {
160 byte_size = 1;
161 }
162
163 ff_sws_init_scale(ctx);
164 if (check_func(ctx->yuv2anyX, "yuv2%s_full_X_%d_%d", desc->name, luma_filter_size, dstW)) {
165 for (i = 0; i < 4; i ++) {
166 memset(dst0[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
167 memset(dst1[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
168 }
169
170 call_ref(ctx, luma_filter, luma, luma_filter_size,
171 chr_filter, chru, chrv, chr_filter_size,
172 alpha, dst0, dstW, 0);
173 call_new(ctx, luma_filter, luma, luma_filter_size,
174 chr_filter, chru, chrv, chr_filter_size,
175 alpha, dst1, dstW, 0);
176
177 if (memcmp(dst0[0], dst1[0], dstW * byte_size) ||
178 memcmp(dst0[1], dst1[1], dstW * byte_size) ||
179 memcmp(dst0[2], dst1[2], dstW * byte_size) ||
180 memcmp(dst0[3], dst1[3], dstW * byte_size) )
181 fail();
182
183 bench_new(ctx, luma_filter, luma, luma_filter_size,
184 chr_filter, chru, chrv, chr_filter_size,
185 alpha, dst1, dstW, 0);
186 }
187 }
188 }
189 }
190 sws_freeContext(ctx);
191 }
192
193 #undef LARGEST_INPUT_SIZE
194 #undef INPUT_SIZES
195
check_input_planar_rgb_to_y(void)196 static void check_input_planar_rgb_to_y(void)
197 {
198 struct SwsContext *ctx;
199 const AVPixFmtDescriptor *desc;
200 int fmi, isi;
201 int dstW, byte_size;
202 #define LARGEST_INPUT_SIZE 512
203 #define INPUT_SIZES 6
204 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
205 uint8_t *src[4];
206 int32_t rgb2yuv[9] = {0};
207
208 declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
209
210 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
211 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
212 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
213 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
214
215 LOCAL_ALIGNED_8(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
216 LOCAL_ALIGNED_8(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
217
218 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
219 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
220 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
221 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
222 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
223
224 src[0] = (uint8_t*)src_g;
225 src[1] = (uint8_t*)src_b;
226 src[2] = (uint8_t*)src_r;
227 src[3] = (uint8_t*)src_a;
228
229 ctx = sws_alloc_context();
230 if (sws_init_context(ctx, NULL, NULL) < 0)
231 fail();
232
233 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
234 for (isi = 0; isi < INPUT_SIZES; isi++ ) {
235 desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
236 ctx->srcFormat = planar_fmts[fmi];
237 ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
238 byte_size = 2;
239 dstW = input_sizes[isi];
240
241 ff_sws_init_scale(ctx);
242 if(check_func(ctx->readLumPlanar, "planar_%s_to_y_%d", desc->name, dstW)) {
243 memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
244 memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
245
246 call_ref(dst0_y, src, dstW, rgb2yuv);
247 call_new(dst1_y, src, dstW, rgb2yuv);
248
249 if (memcmp(dst0_y, dst1_y, dstW * byte_size))
250 fail();
251
252 bench_new(dst1_y, src, dstW, rgb2yuv);
253
254 }
255 }
256 }
257 sws_freeContext(ctx);
258 }
259
260 #undef LARGEST_INPUT_SIZE
261 #undef INPUT_SIZES
262
check_input_planar_rgb_to_uv(void)263 static void check_input_planar_rgb_to_uv(void)
264 {
265 struct SwsContext *ctx;
266 const AVPixFmtDescriptor *desc;
267 int fmi, isi;
268 int dstW, byte_size;
269 #define LARGEST_INPUT_SIZE 512
270 #define INPUT_SIZES 6
271 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
272 uint8_t *src[4];
273 int32_t rgb2yuv[9] = {0};
274
275 declare_func(void, uint8_t *dstU, uint8_t *dstV,
276 uint8_t *src[4], int w, int32_t *rgb2yuv);
277
278 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
279 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
280 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
281 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
282
283 LOCAL_ALIGNED_8(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
284 LOCAL_ALIGNED_8(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
285
286 LOCAL_ALIGNED_8(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
287 LOCAL_ALIGNED_8(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
288
289 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
290 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
291 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
292 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
293 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
294
295 src[0] = (uint8_t*)src_g;
296 src[1] = (uint8_t*)src_b;
297 src[2] = (uint8_t*)src_r;
298 src[3] = (uint8_t*)src_a;
299
300 ctx = sws_alloc_context();
301 if (sws_init_context(ctx, NULL, NULL) < 0)
302 fail();
303
304 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
305 for (isi = 0; isi < INPUT_SIZES; isi++ ) {
306 desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
307 ctx->srcFormat = planar_fmts[fmi];
308 ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
309 byte_size = 2;
310 dstW = input_sizes[isi];
311
312 ff_sws_init_scale(ctx);
313 if(check_func(ctx->readChrPlanar, "planar_%s_to_uv_%d", desc->name, dstW)) {
314 memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
315 memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
316 memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
317 memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
318
319 call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv);
320 call_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
321
322 if (memcmp(dst0_u, dst1_u, dstW * byte_size) ||
323 memcmp(dst0_v, dst1_v, dstW * byte_size))
324 fail();
325
326 bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
327 }
328 }
329 }
330 sws_freeContext(ctx);
331 }
332
333 #undef LARGEST_INPUT_SIZE
334 #undef INPUT_SIZES
335
check_input_planar_rgb_to_a(void)336 static void check_input_planar_rgb_to_a(void)
337 {
338 struct SwsContext *ctx;
339 const AVPixFmtDescriptor *desc;
340 int fmi, isi;
341 int dstW, byte_size;
342 #define LARGEST_INPUT_SIZE 512
343 #define INPUT_SIZES 6
344 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
345 uint8_t *src[4];
346 int32_t rgb2yuv[9] = {0};
347
348 declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
349
350 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
351 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
352 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
353 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
354
355 LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
356 LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
357
358 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
359 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
360 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
361 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
362 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
363
364 src[0] = (uint8_t*)src_g;
365 src[1] = (uint8_t*)src_b;
366 src[2] = (uint8_t*)src_r;
367 src[3] = (uint8_t*)src_a;
368
369 ctx = sws_alloc_context();
370 if (sws_init_context(ctx, NULL, NULL) < 0)
371 fail();
372
373 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
374 for (isi = 0; isi < INPUT_SIZES; isi++ ) {
375 desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
376 if (!(desc->flags & AV_PIX_FMT_FLAG_ALPHA))
377 continue;
378
379 ctx->srcFormat = planar_fmts[fmi];
380 ctx->dstFormat = AV_PIX_FMT_YUVA444P16;
381 byte_size = 2;
382 dstW = input_sizes[isi];
383
384 ff_sws_init_scale(ctx);
385 if(check_func(ctx->readAlpPlanar, "planar_%s_to_a_%d", desc->name, dstW)) {
386 memset(dst0_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
387 memset(dst1_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
388
389 call_ref(dst0_a, src, dstW, rgb2yuv);
390 call_new(dst1_a, src, dstW, rgb2yuv);
391
392 if (memcmp(dst0_a, dst1_a, dstW * byte_size))
393 fail();
394 bench_new(dst1_a, src, dstW, rgb2yuv);
395 }
396 }
397 }
398 sws_freeContext(ctx);
399 }
400
checkasm_check_sw_gbrp(void)401 void checkasm_check_sw_gbrp(void)
402 {
403 check_output_yuv2gbrp();
404 report("output_yuv2gbrp");
405
406 check_input_planar_rgb_to_y();
407 report("input_planar_rgb_y");
408
409 check_input_planar_rgb_to_uv();
410 report("input_planar_rgb_uv");
411
412 check_input_planar_rgb_to_a();
413 report("input_planar_rgb_a");
414 }
415