1/* 2 * 3 * This file is part of FFmpeg. 4 * 5 * FFmpeg is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * FFmpeg is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with FFmpeg; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 */ 19 20#include <string.h> 21 22#include "libavutil/common.h" 23#include "libavutil/intreadwrite.h" 24#include "libavutil/mem_internal.h" 25 26#include "libswscale/rgb2rgb.h" 27 28#include "checkasm.h" 29 30#define randomize_buffers(buf, size) \ 31 do { \ 32 int j; \ 33 for (j = 0; j < size; j+=4) \ 34 AV_WN32(buf + j, rnd()); \ 35 } while (0) 36 37static const uint8_t width[] = {12, 16, 20, 32, 36, 128}; 38static const struct {uint8_t w, h, s;} planes[] = { 39 {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128} 40}; 41 42#define MAX_STRIDE 128 43#define MAX_HEIGHT 128 44 45static void check_shuffle_bytes(void * func, const char * report) 46{ 47 int i; 48 LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]); 49 LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]); 50 LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]); 51 LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]); 52 53 declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *src, uint8_t *dst, int src_size); 54 55 memset(dst0, 0, MAX_STRIDE); 56 memset(dst1, 0, MAX_STRIDE); 57 randomize_buffers(src0, MAX_STRIDE); 58 memcpy(src1, src0, MAX_STRIDE); 59 60 if (check_func(func, "%s", report)) { 61 for (i = 0; i < 6; i ++) { 62 call_ref(src0, dst0, width[i]); 63 call_new(src1, dst1, width[i]); 64 if (memcmp(dst0, dst1, MAX_STRIDE)) 65 fail(); 66 } 67 bench_new(src0, dst0, width[5]); 68 } 69} 70 71static void check_uyvy_to_422p(void) 72{ 73 int i; 74 75 LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]); 76 LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]); 77 LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]); 78 LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]); 79 LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]); 80 LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]); 81 LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]); 82 LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]); 83 84 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 85 const uint8_t *src, int width, int height, 86 int lumStride, int chromStride, int srcStride); 87 88 randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT * 2); 89 memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2); 90 91 if (check_func(uyvytoyuv422, "uyvytoyuv422")) { 92 for (i = 0; i < 6; i ++) { 93 memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT); 94 memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT); 95 memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT); 96 memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT); 97 memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT); 98 memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT); 99 100 call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h, 101 MAX_STRIDE, MAX_STRIDE / 2, planes[i].s); 102 call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h, 103 MAX_STRIDE, MAX_STRIDE / 2, planes[i].s); 104 if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) || 105 memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) || 106 memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT)) 107 fail(); 108 } 109 bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h, 110 MAX_STRIDE, MAX_STRIDE / 2, planes[5].s); 111 } 112} 113 114static void check_interleave_bytes(void) 115{ 116 LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]); 117 LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]); 118 LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]); 119 LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]); 120 // Intentionally using unaligned buffers, as this function doesn't have 121 // any alignment requirements. 122 uint8_t *src0 = src0_buf + 1; 123 uint8_t *src1 = src1_buf + 1; 124 uint8_t *dst0 = dst0_buf + 2; 125 uint8_t *dst1 = dst1_buf + 2; 126 127 declare_func_emms(AV_CPU_FLAG_MMX, void, const uint8_t *, const uint8_t *, 128 uint8_t *, int, int, int, int, int); 129 130 randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT); 131 randomize_buffers(src1, MAX_STRIDE * MAX_HEIGHT); 132 133 if (check_func(interleaveBytes, "interleave_bytes")) { 134 for (int i = 0; i <= 16; i++) { 135 // Try all widths [1,16], and try one random width. 136 137 int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2))); 138 int h = 1 + (rnd() % (MAX_HEIGHT-2)); 139 140 int src0_offset = 0, src0_stride = MAX_STRIDE; 141 int src1_offset = 0, src1_stride = MAX_STRIDE; 142 int dst_offset = 0, dst_stride = 2 * MAX_STRIDE; 143 144 memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT); 145 memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT); 146 147 // Try different combinations of negative strides 148 if (i & 1) { 149 src0_offset = (h-1)*src0_stride; 150 src0_stride = -src0_stride; 151 } 152 if (i & 2) { 153 src1_offset = (h-1)*src1_stride; 154 src1_stride = -src1_stride; 155 } 156 if (i & 4) { 157 dst_offset = (h-1)*dst_stride; 158 dst_stride = -dst_stride; 159 } 160 161 call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset, 162 w, h, src0_stride, src1_stride, dst_stride); 163 call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset, 164 w, h, src0_stride, src1_stride, dst_stride); 165 // Check a one pixel-pair edge around the destination area, 166 // to catch overwrites past the end. 167 checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE, 168 2 * w + 2, h + 1, "dst"); 169 } 170 171 bench_new(src0, src1, dst1, 127, MAX_HEIGHT, 172 MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE); 173 } 174 if (check_func(interleaveBytes, "interleave_bytes_aligned")) { 175 // Bench the function in a more typical case, with aligned 176 // buffers and widths. 177 bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT, 178 MAX_STRIDE, MAX_STRIDE, 2*MAX_STRIDE); 179 } 180} 181 182void checkasm_check_sw_rgb(void) 183{ 184 ff_sws_rgb2rgb_init(); 185 186 check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103"); 187 report("shuffle_bytes_2103"); 188 189 check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321"); 190 report("shuffle_bytes_0321"); 191 192 check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230"); 193 report("shuffle_bytes_1230"); 194 195 check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012"); 196 report("shuffle_bytes_3012"); 197 198 check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210"); 199 report("shuffle_bytes_3210"); 200 201 check_uyvy_to_422p(); 202 report("uyvytoyuv422"); 203 204 check_interleave_bytes(); 205 report("interleave_bytes"); 206} 207