1/* 2 * 3 * This file is part of FFmpeg. 4 * 5 * FFmpeg is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * FFmpeg is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with FFmpeg; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 */ 19 20#include <string.h> 21 22#include "libavutil/common.h" 23#include "libavutil/intreadwrite.h" 24#include "libavutil/mem_internal.h" 25#include "libavutil/pixdesc.h" 26 27#include "libswscale/swscale.h" 28#include "libswscale/swscale_internal.h" 29 30#include "checkasm.h" 31 32#define randomize_buffers(buf, size) \ 33 do { \ 34 int j; \ 35 for (j = 0; j < size; j+=4) \ 36 AV_WN32(buf + j, rnd()); \ 37 } while (0) 38 39static const int planar_fmts[] = { 40 AV_PIX_FMT_GBRP, 41 AV_PIX_FMT_GBRP9BE, 42 AV_PIX_FMT_GBRP9LE, 43 AV_PIX_FMT_GBRP10BE, 44 AV_PIX_FMT_GBRP10LE, 45 AV_PIX_FMT_GBRP12BE, 46 AV_PIX_FMT_GBRP12LE, 47 AV_PIX_FMT_GBRP14BE, 48 AV_PIX_FMT_GBRP14LE, 49 AV_PIX_FMT_GBRAP, 50 AV_PIX_FMT_GBRAP10BE, 51 AV_PIX_FMT_GBRAP10LE, 52 AV_PIX_FMT_GBRAP12BE, 53 AV_PIX_FMT_GBRAP12LE, 54 AV_PIX_FMT_GBRP16BE, 55 AV_PIX_FMT_GBRP16LE, 56 AV_PIX_FMT_GBRAP16BE, 57 AV_PIX_FMT_GBRAP16LE, 58 AV_PIX_FMT_GBRPF32BE, 59 AV_PIX_FMT_GBRPF32LE, 60 AV_PIX_FMT_GBRAPF32BE, 61 AV_PIX_FMT_GBRAPF32LE 62}; 63 64static void check_output_yuv2gbrp(void) 65{ 66 struct SwsContext *ctx; 67 const AVPixFmtDescriptor *desc; 68 int fmi, fsi, isi, i; 69 int dstW, byte_size, luma_filter_size, chr_filter_size; 70#define LARGEST_FILTER 16 71#define FILTER_SIZES 4 72 static const int filter_sizes[] = {1, 4, 8, 16}; 73#define LARGEST_INPUT_SIZE 512 74#define INPUT_SIZES 6 75 static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; 76 uint8_t *dst0[4]; 77 uint8_t *dst1[4]; 78 79 declare_func(void, void *c, const int16_t *lumFilter, 80 const int16_t **lumSrcx, int lumFilterSize, 81 const int16_t *chrFilter, const int16_t **chrUSrcx, 82 const int16_t **chrVSrcx, int chrFilterSize, 83 const int16_t **alpSrcx, uint8_t **dest, 84 int dstW, int y); 85 86 const int16_t *luma[LARGEST_FILTER]; 87 const int16_t *chru[LARGEST_FILTER]; 88 const int16_t *chrv[LARGEST_FILTER]; 89 const int16_t *alpha[LARGEST_FILTER]; 90 91 LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]); 92 LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]); 93 94 LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); 95 LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); 96 LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); 97 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * LARGEST_INPUT_SIZE]); 98 99 LOCAL_ALIGNED_8(uint8_t, dst0_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 100 LOCAL_ALIGNED_8(uint8_t, dst0_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 101 LOCAL_ALIGNED_8(uint8_t, dst0_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 102 LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 103 104 LOCAL_ALIGNED_8(uint8_t, dst1_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 105 LOCAL_ALIGNED_8(uint8_t, dst1_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 106 LOCAL_ALIGNED_8(uint8_t, dst1_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 107 LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 108 109 randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t)); 110 randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t)); 111 randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t)); 112 randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t)); 113 randomize_buffers((uint8_t*)luma_filter, LARGEST_FILTER * sizeof(int16_t)); 114 randomize_buffers((uint8_t*)chr_filter, LARGEST_FILTER * sizeof(int16_t)); 115 116 dst0[0] = (uint8_t*)dst0_g; 117 dst0[1] = (uint8_t*)dst0_b; 118 dst0[2] = (uint8_t*)dst0_r; 119 dst0[3] = (uint8_t*)dst0_a; 120 121 dst1[0] = (uint8_t*)dst1_g; 122 dst1[1] = (uint8_t*)dst1_b; 123 dst1[2] = (uint8_t*)dst1_r; 124 dst1[3] = (uint8_t*)dst1_a; 125 126 for (i = 0; i < LARGEST_FILTER; i++) { 127 luma[i] = (int16_t *)(src_y + i*LARGEST_INPUT_SIZE); 128 chru[i] = (int16_t *)(src_u + i*LARGEST_INPUT_SIZE); 129 chrv[i] = (int16_t *)(src_v + i*LARGEST_INPUT_SIZE); 130 alpha[i] = (int16_t *)(src_a + i*LARGEST_INPUT_SIZE); 131 } 132 133 ctx = sws_alloc_context(); 134 if (sws_init_context(ctx, NULL, NULL) < 0) 135 fail(); 136 137 ctx->flags |= SWS_FULL_CHR_H_INT; 138 ctx->yuv2rgb_y_offset = rnd(); 139 ctx->yuv2rgb_y_coeff = rnd(); 140 ctx->yuv2rgb_v2r_coeff = rnd(); 141 ctx->yuv2rgb_v2g_coeff = rnd(); 142 ctx->yuv2rgb_u2g_coeff = rnd(); 143 ctx->yuv2rgb_u2b_coeff = rnd(); 144 145 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) { 146 for (fsi = 0; fsi < FILTER_SIZES; fsi++) { 147 for (isi = 0; isi < INPUT_SIZES; isi++ ) { 148 desc = av_pix_fmt_desc_get(planar_fmts[fmi]); 149 ctx->dstFormat = planar_fmts[fmi]; 150 151 dstW = input_sizes[isi]; 152 luma_filter_size = filter_sizes[fsi]; 153 chr_filter_size = filter_sizes[fsi]; 154 155 if (desc->comp[0].depth > 16) { 156 byte_size = 4; 157 } else if (desc->comp[0].depth > 8) { 158 byte_size = 2; 159 } else { 160 byte_size = 1; 161 } 162 163 ff_sws_init_scale(ctx); 164 if (check_func(ctx->yuv2anyX, "yuv2%s_full_X_%d_%d", desc->name, luma_filter_size, dstW)) { 165 for (i = 0; i < 4; i ++) { 166 memset(dst0[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 167 memset(dst1[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 168 } 169 170 call_ref(ctx, luma_filter, luma, luma_filter_size, 171 chr_filter, chru, chrv, chr_filter_size, 172 alpha, dst0, dstW, 0); 173 call_new(ctx, luma_filter, luma, luma_filter_size, 174 chr_filter, chru, chrv, chr_filter_size, 175 alpha, dst1, dstW, 0); 176 177 if (memcmp(dst0[0], dst1[0], dstW * byte_size) || 178 memcmp(dst0[1], dst1[1], dstW * byte_size) || 179 memcmp(dst0[2], dst1[2], dstW * byte_size) || 180 memcmp(dst0[3], dst1[3], dstW * byte_size) ) 181 fail(); 182 183 bench_new(ctx, luma_filter, luma, luma_filter_size, 184 chr_filter, chru, chrv, chr_filter_size, 185 alpha, dst1, dstW, 0); 186 } 187 } 188 } 189 } 190 sws_freeContext(ctx); 191} 192 193#undef LARGEST_INPUT_SIZE 194#undef INPUT_SIZES 195 196static void check_input_planar_rgb_to_y(void) 197{ 198 struct SwsContext *ctx; 199 const AVPixFmtDescriptor *desc; 200 int fmi, isi; 201 int dstW, byte_size; 202#define LARGEST_INPUT_SIZE 512 203#define INPUT_SIZES 6 204 static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; 205 uint8_t *src[4]; 206 int32_t rgb2yuv[9] = {0}; 207 208 declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv); 209 210 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]); 211 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]); 212 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]); 213 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]); 214 215 LOCAL_ALIGNED_8(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 216 LOCAL_ALIGNED_8(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 217 218 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t)); 219 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t)); 220 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t)); 221 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t)); 222 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t)); 223 224 src[0] = (uint8_t*)src_g; 225 src[1] = (uint8_t*)src_b; 226 src[2] = (uint8_t*)src_r; 227 src[3] = (uint8_t*)src_a; 228 229 ctx = sws_alloc_context(); 230 if (sws_init_context(ctx, NULL, NULL) < 0) 231 fail(); 232 233 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) { 234 for (isi = 0; isi < INPUT_SIZES; isi++ ) { 235 desc = av_pix_fmt_desc_get(planar_fmts[fmi]); 236 ctx->srcFormat = planar_fmts[fmi]; 237 ctx->dstFormat = AV_PIX_FMT_YUVA444P16; 238 byte_size = 2; 239 dstW = input_sizes[isi]; 240 241 ff_sws_init_scale(ctx); 242 if(check_func(ctx->readLumPlanar, "planar_%s_to_y_%d", desc->name, dstW)) { 243 memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 244 memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 245 246 call_ref(dst0_y, src, dstW, rgb2yuv); 247 call_new(dst1_y, src, dstW, rgb2yuv); 248 249 if (memcmp(dst0_y, dst1_y, dstW * byte_size)) 250 fail(); 251 252 bench_new(dst1_y, src, dstW, rgb2yuv); 253 254 } 255 } 256 } 257 sws_freeContext(ctx); 258} 259 260#undef LARGEST_INPUT_SIZE 261#undef INPUT_SIZES 262 263static void check_input_planar_rgb_to_uv(void) 264{ 265 struct SwsContext *ctx; 266 const AVPixFmtDescriptor *desc; 267 int fmi, isi; 268 int dstW, byte_size; 269#define LARGEST_INPUT_SIZE 512 270#define INPUT_SIZES 6 271 static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; 272 uint8_t *src[4]; 273 int32_t rgb2yuv[9] = {0}; 274 275 declare_func(void, uint8_t *dstU, uint8_t *dstV, 276 uint8_t *src[4], int w, int32_t *rgb2yuv); 277 278 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]); 279 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]); 280 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]); 281 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]); 282 283 LOCAL_ALIGNED_8(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 284 LOCAL_ALIGNED_8(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 285 286 LOCAL_ALIGNED_8(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 287 LOCAL_ALIGNED_8(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 288 289 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t)); 290 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t)); 291 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t)); 292 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t)); 293 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t)); 294 295 src[0] = (uint8_t*)src_g; 296 src[1] = (uint8_t*)src_b; 297 src[2] = (uint8_t*)src_r; 298 src[3] = (uint8_t*)src_a; 299 300 ctx = sws_alloc_context(); 301 if (sws_init_context(ctx, NULL, NULL) < 0) 302 fail(); 303 304 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) { 305 for (isi = 0; isi < INPUT_SIZES; isi++ ) { 306 desc = av_pix_fmt_desc_get(planar_fmts[fmi]); 307 ctx->srcFormat = planar_fmts[fmi]; 308 ctx->dstFormat = AV_PIX_FMT_YUVA444P16; 309 byte_size = 2; 310 dstW = input_sizes[isi]; 311 312 ff_sws_init_scale(ctx); 313 if(check_func(ctx->readChrPlanar, "planar_%s_to_uv_%d", desc->name, dstW)) { 314 memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 315 memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 316 memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 317 memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t)); 318 319 call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv); 320 call_new(dst1_u, dst1_v, src, dstW, rgb2yuv); 321 322 if (memcmp(dst0_u, dst1_u, dstW * byte_size) || 323 memcmp(dst0_v, dst1_v, dstW * byte_size)) 324 fail(); 325 326 bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv); 327 } 328 } 329 } 330 sws_freeContext(ctx); 331} 332 333#undef LARGEST_INPUT_SIZE 334#undef INPUT_SIZES 335 336static void check_input_planar_rgb_to_a(void) 337{ 338 struct SwsContext *ctx; 339 const AVPixFmtDescriptor *desc; 340 int fmi, isi; 341 int dstW, byte_size; 342#define LARGEST_INPUT_SIZE 512 343#define INPUT_SIZES 6 344 static const int input_sizes[] = {8, 24, 128, 144, 256, 512}; 345 uint8_t *src[4]; 346 int32_t rgb2yuv[9] = {0}; 347 348 declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv); 349 350 LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]); 351 LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]); 352 LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]); 353 LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]); 354 355 LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 356 LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]); 357 358 randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t)); 359 randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t)); 360 randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t)); 361 randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t)); 362 randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t)); 363 364 src[0] = (uint8_t*)src_g; 365 src[1] = (uint8_t*)src_b; 366 src[2] = (uint8_t*)src_r; 367 src[3] = (uint8_t*)src_a; 368 369 ctx = sws_alloc_context(); 370 if (sws_init_context(ctx, NULL, NULL) < 0) 371 fail(); 372 373 for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) { 374 for (isi = 0; isi < INPUT_SIZES; isi++ ) { 375 desc = av_pix_fmt_desc_get(planar_fmts[fmi]); 376 if (!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) 377 continue; 378 379 ctx->srcFormat = planar_fmts[fmi]; 380 ctx->dstFormat = AV_PIX_FMT_YUVA444P16; 381 byte_size = 2; 382 dstW = input_sizes[isi]; 383 384 ff_sws_init_scale(ctx); 385 if(check_func(ctx->readAlpPlanar, "planar_%s_to_a_%d", desc->name, dstW)) { 386 memset(dst0_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t)); 387 memset(dst1_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t)); 388 389 call_ref(dst0_a, src, dstW, rgb2yuv); 390 call_new(dst1_a, src, dstW, rgb2yuv); 391 392 if (memcmp(dst0_a, dst1_a, dstW * byte_size)) 393 fail(); 394 bench_new(dst1_a, src, dstW, rgb2yuv); 395 } 396 } 397 } 398 sws_freeContext(ctx); 399} 400 401void checkasm_check_sw_gbrp(void) 402{ 403 check_output_yuv2gbrp(); 404 report("output_yuv2gbrp"); 405 406 check_input_planar_rgb_to_y(); 407 report("input_planar_rgb_y"); 408 409 check_input_planar_rgb_to_uv(); 410 report("input_planar_rgb_uv"); 411 412 check_input_planar_rgb_to_a(); 413 report("input_planar_rgb_a"); 414} 415