1/* 2 * DSP utils 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "libavutil/attributes.h" 24#include "libavutil/internal.h" 25#include "libavutil/mem_internal.h" 26#include "avcodec.h" 27#include "copy_block.h" 28#include "simple_idct.h" 29#include "me_cmp.h" 30#include "mpegvideoenc.h" 31#include "config.h" 32#include "config_components.h" 33 34/* (i - 256) * (i - 256) */ 35const uint32_t ff_square_tab[512] = { 36 65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081, 37 57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625, 38 50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681, 39 43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249, 40 36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329, 41 30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921, 42 25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025, 43 20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641, 44 16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769, 45 12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409, 46 9216, 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561, 47 6400, 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225, 48 4096, 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401, 49 2304, 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089, 50 1024, 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289, 51 256, 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1, 52 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 53 256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 54 1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, 55 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 56 4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, 57 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 58 9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321, 59 12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129, 60 16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449, 61 20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281, 62 25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625, 63 30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481, 64 36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849, 65 43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729, 66 50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121, 67 57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025, 68}; 69 70static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 71 ptrdiff_t stride, int h) 72{ 73 int s = 0, i; 74 const uint32_t *sq = ff_square_tab + 256; 75 76 for (i = 0; i < h; i++) { 77 s += sq[pix1[0] - pix2[0]]; 78 s += sq[pix1[1] - pix2[1]]; 79 s += sq[pix1[2] - pix2[2]]; 80 s += sq[pix1[3] - pix2[3]]; 81 pix1 += stride; 82 pix2 += stride; 83 } 84 return s; 85} 86 87static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 88 ptrdiff_t stride, int h) 89{ 90 int s = 0, i; 91 const uint32_t *sq = ff_square_tab + 256; 92 93 for (i = 0; i < h; i++) { 94 s += sq[pix1[0] - pix2[0]]; 95 s += sq[pix1[1] - pix2[1]]; 96 s += sq[pix1[2] - pix2[2]]; 97 s += sq[pix1[3] - pix2[3]]; 98 s += sq[pix1[4] - pix2[4]]; 99 s += sq[pix1[5] - pix2[5]]; 100 s += sq[pix1[6] - pix2[6]]; 101 s += sq[pix1[7] - pix2[7]]; 102 pix1 += stride; 103 pix2 += stride; 104 } 105 return s; 106} 107 108static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 109 ptrdiff_t stride, int h) 110{ 111 int s = 0, i; 112 const uint32_t *sq = ff_square_tab + 256; 113 114 for (i = 0; i < h; i++) { 115 s += sq[pix1[0] - pix2[0]]; 116 s += sq[pix1[1] - pix2[1]]; 117 s += sq[pix1[2] - pix2[2]]; 118 s += sq[pix1[3] - pix2[3]]; 119 s += sq[pix1[4] - pix2[4]]; 120 s += sq[pix1[5] - pix2[5]]; 121 s += sq[pix1[6] - pix2[6]]; 122 s += sq[pix1[7] - pix2[7]]; 123 s += sq[pix1[8] - pix2[8]]; 124 s += sq[pix1[9] - pix2[9]]; 125 s += sq[pix1[10] - pix2[10]]; 126 s += sq[pix1[11] - pix2[11]]; 127 s += sq[pix1[12] - pix2[12]]; 128 s += sq[pix1[13] - pix2[13]]; 129 s += sq[pix1[14] - pix2[14]]; 130 s += sq[pix1[15] - pix2[15]]; 131 132 pix1 += stride; 133 pix2 += stride; 134 } 135 return s; 136} 137 138static int sum_abs_dctelem_c(int16_t *block) 139{ 140 int sum = 0, i; 141 142 for (i = 0; i < 64; i++) 143 sum += FFABS(block[i]); 144 return sum; 145} 146 147#define avg2(a, b) (((a) + (b) + 1) >> 1) 148#define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2) 149 150static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 151 ptrdiff_t stride, int h) 152{ 153 int s = 0, i; 154 155 for (i = 0; i < h; i++) { 156 s += abs(pix1[0] - pix2[0]); 157 s += abs(pix1[1] - pix2[1]); 158 s += abs(pix1[2] - pix2[2]); 159 s += abs(pix1[3] - pix2[3]); 160 s += abs(pix1[4] - pix2[4]); 161 s += abs(pix1[5] - pix2[5]); 162 s += abs(pix1[6] - pix2[6]); 163 s += abs(pix1[7] - pix2[7]); 164 s += abs(pix1[8] - pix2[8]); 165 s += abs(pix1[9] - pix2[9]); 166 s += abs(pix1[10] - pix2[10]); 167 s += abs(pix1[11] - pix2[11]); 168 s += abs(pix1[12] - pix2[12]); 169 s += abs(pix1[13] - pix2[13]); 170 s += abs(pix1[14] - pix2[14]); 171 s += abs(pix1[15] - pix2[15]); 172 pix1 += stride; 173 pix2 += stride; 174 } 175 return s; 176} 177 178static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 179 ptrdiff_t stride, int h) 180{ 181 int s = 0, i, j; 182 183#define V(x) (pix1[x] - pix2[x]) 184 185 s += abs(V(0)); 186 s += abs(V(1) - V(0)); 187 s += abs(V(2) - V(1)); 188 s += abs(V(3) - V(2)); 189 s += abs(V(4) - V(3)); 190 s += abs(V(5) - V(4)); 191 s += abs(V(6) - V(5)); 192 s += abs(V(7) - V(6)); 193 s += abs(V(8) - V(7)); 194 s += abs(V(9) - V(8)); 195 s += abs(V(10) - V(9)); 196 s += abs(V(11) - V(10)); 197 s += abs(V(12) - V(11)); 198 s += abs(V(13) - V(12)); 199 s += abs(V(14) - V(13)); 200 s += abs(V(15) - V(14)); 201 202 pix1 += stride; 203 pix2 += stride; 204 205 for (i = 1; i < h; i++) { 206 s += abs(V(0) - V(-stride)); 207 for (j = 1; j < 16; j++) 208 s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); 209 pix1 += stride; 210 pix2 += stride; 211 212 } 213#undef V 214 return s; 215} 216 217static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 218 ptrdiff_t stride, int h) 219{ 220 int s = 0, i; 221 222 for (i = 0; i < h; i++) { 223 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 224 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 225 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 226 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 227 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 228 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 229 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 230 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 231 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); 232 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); 233 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); 234 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); 235 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); 236 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); 237 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); 238 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); 239 pix1 += stride; 240 pix2 += stride; 241 } 242 return s; 243} 244 245static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 246 ptrdiff_t stride, int h) 247{ 248 int s = 0, i; 249 uint8_t *pix3 = pix2 + stride; 250 251 for (i = 0; i < h; i++) { 252 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 253 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 254 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 255 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 256 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 257 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 258 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 259 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 260 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); 261 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); 262 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); 263 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); 264 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); 265 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); 266 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); 267 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); 268 pix1 += stride; 269 pix2 += stride; 270 pix3 += stride; 271 } 272 return s; 273} 274 275static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 276 ptrdiff_t stride, int h) 277{ 278 int s = 0, i; 279 uint8_t *pix3 = pix2 + stride; 280 281 for (i = 0; i < h; i++) { 282 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 283 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 284 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 285 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 286 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 287 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 288 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 289 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 290 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 291 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 292 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 293 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 294 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 295 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 296 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 297 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 298 pix1 += stride; 299 pix2 += stride; 300 pix3 += stride; 301 } 302 return s; 303} 304 305static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 306 ptrdiff_t stride, int h) 307{ 308 int s = 0, i; 309 310 for (i = 0; i < h; i++) { 311 s += abs(pix1[0] - pix2[0]); 312 s += abs(pix1[1] - pix2[1]); 313 s += abs(pix1[2] - pix2[2]); 314 s += abs(pix1[3] - pix2[3]); 315 s += abs(pix1[4] - pix2[4]); 316 s += abs(pix1[5] - pix2[5]); 317 s += abs(pix1[6] - pix2[6]); 318 s += abs(pix1[7] - pix2[7]); 319 pix1 += stride; 320 pix2 += stride; 321 } 322 return s; 323} 324 325static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 326 ptrdiff_t stride, int h) 327{ 328 int s = 0, i, j; 329 330#define V(x) (pix1[x] - pix2[x]) 331 332 s += abs(V(0)); 333 s += abs(V(1) - V(0)); 334 s += abs(V(2) - V(1)); 335 s += abs(V(3) - V(2)); 336 s += abs(V(4) - V(3)); 337 s += abs(V(5) - V(4)); 338 s += abs(V(6) - V(5)); 339 s += abs(V(7) - V(6)); 340 341 pix1 += stride; 342 pix2 += stride; 343 344 for (i = 1; i < h; i++) { 345 s += abs(V(0) - V(-stride)); 346 for (j = 1; j < 8; j++) 347 s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); 348 pix1 += stride; 349 pix2 += stride; 350 351 } 352#undef V 353 return s; 354} 355 356static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 357 ptrdiff_t stride, int h) 358{ 359 int s = 0, i; 360 361 for (i = 0; i < h; i++) { 362 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 363 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 364 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 365 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 366 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 367 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 368 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 369 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 370 pix1 += stride; 371 pix2 += stride; 372 } 373 return s; 374} 375 376static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 377 ptrdiff_t stride, int h) 378{ 379 int s = 0, i; 380 uint8_t *pix3 = pix2 + stride; 381 382 for (i = 0; i < h; i++) { 383 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 384 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 385 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 386 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 387 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 388 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 389 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 390 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 391 pix1 += stride; 392 pix2 += stride; 393 pix3 += stride; 394 } 395 return s; 396} 397 398static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 399 ptrdiff_t stride, int h) 400{ 401 int s = 0, i; 402 uint8_t *pix3 = pix2 + stride; 403 404 for (i = 0; i < h; i++) { 405 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 406 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 407 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 408 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 409 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 410 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 411 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 412 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 413 pix1 += stride; 414 pix2 += stride; 415 pix3 += stride; 416 } 417 return s; 418} 419 420static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 421 ptrdiff_t stride, int h) 422{ 423 int score1 = 0, score2 = 0, x, y; 424 425 for (y = 0; y < h; y++) { 426 for (x = 0; x < 16; x++) 427 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); 428 if (y + 1 < h) { 429 for (x = 0; x < 15; x++) 430 score2 += FFABS(s1[x] - s1[x + stride] - 431 s1[x + 1] + s1[x + stride + 1]) - 432 FFABS(s2[x] - s2[x + stride] - 433 s2[x + 1] + s2[x + stride + 1]); 434 } 435 s1 += stride; 436 s2 += stride; 437 } 438 439 if (c) 440 return score1 + FFABS(score2) * c->avctx->nsse_weight; 441 else 442 return score1 + FFABS(score2) * 8; 443} 444 445static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 446 ptrdiff_t stride, int h) 447{ 448 int score1 = 0, score2 = 0, x, y; 449 450 for (y = 0; y < h; y++) { 451 for (x = 0; x < 8; x++) 452 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); 453 if (y + 1 < h) { 454 for (x = 0; x < 7; x++) 455 score2 += FFABS(s1[x] - s1[x + stride] - 456 s1[x + 1] + s1[x + stride + 1]) - 457 FFABS(s2[x] - s2[x + stride] - 458 s2[x + 1] + s2[x + stride + 1]); 459 } 460 s1 += stride; 461 s2 += stride; 462 } 463 464 if (c) 465 return score1 + FFABS(score2) * c->avctx->nsse_weight; 466 else 467 return score1 + FFABS(score2) * 8; 468} 469 470static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, 471 ptrdiff_t stride, int h) 472{ 473 return 0; 474} 475 476int ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type) 477{ 478 int ret = 0; 479 int i; 480 481 memset(cmp, 0, sizeof(void *) * 6); 482 483 for (i = 0; i < 6; i++) { 484 switch (type & 0xFF) { 485 case FF_CMP_SAD: 486 cmp[i] = c->sad[i]; 487 break; 488 case FF_CMP_MEDIAN_SAD: 489 cmp[i] = c->median_sad[i]; 490 break; 491 case FF_CMP_SATD: 492 cmp[i] = c->hadamard8_diff[i]; 493 break; 494 case FF_CMP_SSE: 495 cmp[i] = c->sse[i]; 496 break; 497 case FF_CMP_DCT: 498 cmp[i] = c->dct_sad[i]; 499 break; 500 case FF_CMP_DCT264: 501 cmp[i] = c->dct264_sad[i]; 502 break; 503 case FF_CMP_DCTMAX: 504 cmp[i] = c->dct_max[i]; 505 break; 506 case FF_CMP_PSNR: 507 cmp[i] = c->quant_psnr[i]; 508 break; 509 case FF_CMP_BIT: 510 cmp[i] = c->bit[i]; 511 break; 512 case FF_CMP_RD: 513 cmp[i] = c->rd[i]; 514 break; 515 case FF_CMP_VSAD: 516 cmp[i] = c->vsad[i]; 517 break; 518 case FF_CMP_VSSE: 519 cmp[i] = c->vsse[i]; 520 break; 521 case FF_CMP_ZERO: 522 cmp[i] = zero_cmp; 523 break; 524 case FF_CMP_NSSE: 525 cmp[i] = c->nsse[i]; 526 break; 527#if CONFIG_DWT 528 case FF_CMP_W53: 529 cmp[i]= c->w53[i]; 530 break; 531 case FF_CMP_W97: 532 cmp[i]= c->w97[i]; 533 break; 534#endif 535 default: 536 av_log(NULL, AV_LOG_ERROR, 537 "invalid cmp function selection\n"); 538 ret = -1; 539 break; 540 } 541 } 542 543 return ret; 544} 545 546#define BUTTERFLY2(o1, o2, i1, i2) \ 547 o1 = (i1) + (i2); \ 548 o2 = (i1) - (i2); 549 550#define BUTTERFLY1(x, y) \ 551 { \ 552 int a, b; \ 553 a = x; \ 554 b = y; \ 555 x = a + b; \ 556 y = a - b; \ 557 } 558 559#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y))) 560 561static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst, 562 uint8_t *src, ptrdiff_t stride, int h) 563{ 564 int i, temp[64], sum = 0; 565 566 av_assert2(h == 8); 567 568 for (i = 0; i < 8; i++) { 569 // FIXME: try pointer walks 570 BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], 571 src[stride * i + 0] - dst[stride * i + 0], 572 src[stride * i + 1] - dst[stride * i + 1]); 573 BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], 574 src[stride * i + 2] - dst[stride * i + 2], 575 src[stride * i + 3] - dst[stride * i + 3]); 576 BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], 577 src[stride * i + 4] - dst[stride * i + 4], 578 src[stride * i + 5] - dst[stride * i + 5]); 579 BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], 580 src[stride * i + 6] - dst[stride * i + 6], 581 src[stride * i + 7] - dst[stride * i + 7]); 582 583 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); 584 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); 585 BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); 586 BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); 587 588 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); 589 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); 590 BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); 591 BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); 592 } 593 594 for (i = 0; i < 8; i++) { 595 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); 596 BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); 597 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); 598 BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); 599 600 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); 601 BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); 602 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); 603 BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); 604 605 sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) + 606 BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) + 607 BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) + 608 BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); 609 } 610 return sum; 611} 612 613static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src, 614 uint8_t *dummy, ptrdiff_t stride, int h) 615{ 616 int i, temp[64], sum = 0; 617 618 av_assert2(h == 8); 619 620 for (i = 0; i < 8; i++) { 621 // FIXME: try pointer walks 622 BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], 623 src[stride * i + 0], src[stride * i + 1]); 624 BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], 625 src[stride * i + 2], src[stride * i + 3]); 626 BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], 627 src[stride * i + 4], src[stride * i + 5]); 628 BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], 629 src[stride * i + 6], src[stride * i + 7]); 630 631 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); 632 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); 633 BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); 634 BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); 635 636 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); 637 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); 638 BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); 639 BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); 640 } 641 642 for (i = 0; i < 8; i++) { 643 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); 644 BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); 645 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); 646 BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); 647 648 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); 649 BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); 650 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); 651 BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); 652 653 sum += 654 BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) 655 + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) 656 + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) 657 + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); 658 } 659 660 sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean 661 662 return sum; 663} 664 665static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, 666 uint8_t *src2, ptrdiff_t stride, int h) 667{ 668 LOCAL_ALIGNED_16(int16_t, temp, [64]); 669 670 av_assert2(h == 8); 671 672 s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 673 s->fdsp.fdct(temp); 674 return s->mecc.sum_abs_dctelem(temp); 675} 676 677#if CONFIG_GPL 678#define DCT8_1D \ 679 { \ 680 const int s07 = SRC(0) + SRC(7); \ 681 const int s16 = SRC(1) + SRC(6); \ 682 const int s25 = SRC(2) + SRC(5); \ 683 const int s34 = SRC(3) + SRC(4); \ 684 const int a0 = s07 + s34; \ 685 const int a1 = s16 + s25; \ 686 const int a2 = s07 - s34; \ 687 const int a3 = s16 - s25; \ 688 const int d07 = SRC(0) - SRC(7); \ 689 const int d16 = SRC(1) - SRC(6); \ 690 const int d25 = SRC(2) - SRC(5); \ 691 const int d34 = SRC(3) - SRC(4); \ 692 const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \ 693 const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \ 694 const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \ 695 const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \ 696 DST(0, a0 + a1); \ 697 DST(1, a4 + (a7 >> 2)); \ 698 DST(2, a2 + (a3 >> 1)); \ 699 DST(3, a5 + (a6 >> 2)); \ 700 DST(4, a0 - a1); \ 701 DST(5, a6 - (a5 >> 2)); \ 702 DST(6, (a2 >> 1) - a3); \ 703 DST(7, (a4 >> 2) - a7); \ 704 } 705 706static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, 707 uint8_t *src2, ptrdiff_t stride, int h) 708{ 709 int16_t dct[8][8]; 710 int i, sum = 0; 711 712 s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride); 713 714#define SRC(x) dct[i][x] 715#define DST(x, v) dct[i][x] = v 716 for (i = 0; i < 8; i++) 717 DCT8_1D 718#undef SRC 719#undef DST 720 721#define SRC(x) dct[x][i] 722#define DST(x, v) sum += FFABS(v) 723 for (i = 0; i < 8; i++) 724 DCT8_1D 725#undef SRC 726#undef DST 727 return sum; 728} 729#endif 730 731static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, 732 uint8_t *src2, ptrdiff_t stride, int h) 733{ 734 LOCAL_ALIGNED_16(int16_t, temp, [64]); 735 int sum = 0, i; 736 737 av_assert2(h == 8); 738 739 s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 740 s->fdsp.fdct(temp); 741 742 for (i = 0; i < 64; i++) 743 sum = FFMAX(sum, FFABS(temp[i])); 744 745 return sum; 746} 747 748static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, 749 uint8_t *src2, ptrdiff_t stride, int h) 750{ 751 LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]); 752 int16_t *const bak = temp + 64; 753 int sum = 0, i; 754 755 av_assert2(h == 8); 756 s->mb_intra = 0; 757 758 s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 759 760 memcpy(bak, temp, 64 * sizeof(int16_t)); 761 762 s->block_last_index[0 /* FIXME */] = 763 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 764 s->dct_unquantize_inter(s, temp, 0, s->qscale); 765 ff_simple_idct_int16_8bit(temp); // FIXME 766 767 for (i = 0; i < 64; i++) 768 sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); 769 770 return sum; 771} 772 773static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, 774 ptrdiff_t stride, int h) 775{ 776 const uint8_t *scantable = s->intra_scantable.permutated; 777 LOCAL_ALIGNED_16(int16_t, temp, [64]); 778 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); 779 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); 780 int i, last, run, bits, level, distortion, start_i; 781 const int esc_length = s->ac_esc_length; 782 uint8_t *length, *last_length; 783 784 av_assert2(h == 8); 785 786 copy_block8(lsrc1, src1, 8, stride, 8); 787 copy_block8(lsrc2, src2, 8, stride, 8); 788 789 s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); 790 791 s->block_last_index[0 /* FIXME */] = 792 last = 793 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 794 795 bits = 0; 796 797 if (s->mb_intra) { 798 start_i = 1; 799 length = s->intra_ac_vlc_length; 800 last_length = s->intra_ac_vlc_last_length; 801 bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma 802 } else { 803 start_i = 0; 804 length = s->inter_ac_vlc_length; 805 last_length = s->inter_ac_vlc_last_length; 806 } 807 808 if (last >= start_i) { 809 run = 0; 810 for (i = start_i; i < last; i++) { 811 int j = scantable[i]; 812 level = temp[j]; 813 814 if (level) { 815 level += 64; 816 if ((level & (~127)) == 0) 817 bits += length[UNI_AC_ENC_INDEX(run, level)]; 818 else 819 bits += esc_length; 820 run = 0; 821 } else 822 run++; 823 } 824 i = scantable[last]; 825 826 level = temp[i] + 64; 827 828 av_assert2(level - 64); 829 830 if ((level & (~127)) == 0) { 831 bits += last_length[UNI_AC_ENC_INDEX(run, level)]; 832 } else 833 bits += esc_length; 834 } 835 836 if (last >= 0) { 837 if (s->mb_intra) 838 s->dct_unquantize_intra(s, temp, 0, s->qscale); 839 else 840 s->dct_unquantize_inter(s, temp, 0, s->qscale); 841 } 842 843 s->idsp.idct_add(lsrc2, 8, temp); 844 845 distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8); 846 847 return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7); 848} 849 850static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, 851 ptrdiff_t stride, int h) 852{ 853 const uint8_t *scantable = s->intra_scantable.permutated; 854 LOCAL_ALIGNED_16(int16_t, temp, [64]); 855 int i, last, run, bits, level, start_i; 856 const int esc_length = s->ac_esc_length; 857 uint8_t *length, *last_length; 858 859 av_assert2(h == 8); 860 861 s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 862 863 s->block_last_index[0 /* FIXME */] = 864 last = 865 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 866 867 bits = 0; 868 869 if (s->mb_intra) { 870 start_i = 1; 871 length = s->intra_ac_vlc_length; 872 last_length = s->intra_ac_vlc_last_length; 873 bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma 874 } else { 875 start_i = 0; 876 length = s->inter_ac_vlc_length; 877 last_length = s->inter_ac_vlc_last_length; 878 } 879 880 if (last >= start_i) { 881 run = 0; 882 for (i = start_i; i < last; i++) { 883 int j = scantable[i]; 884 level = temp[j]; 885 886 if (level) { 887 level += 64; 888 if ((level & (~127)) == 0) 889 bits += length[UNI_AC_ENC_INDEX(run, level)]; 890 else 891 bits += esc_length; 892 run = 0; 893 } else 894 run++; 895 } 896 i = scantable[last]; 897 898 level = temp[i] + 64; 899 900 av_assert2(level - 64); 901 902 if ((level & (~127)) == 0) 903 bits += last_length[UNI_AC_ENC_INDEX(run, level)]; 904 else 905 bits += esc_length; 906 } 907 908 return bits; 909} 910 911#define VSAD_INTRA(size) \ 912static int vsad_intra ## size ## _c(MpegEncContext *c, \ 913 uint8_t *s, uint8_t *dummy, \ 914 ptrdiff_t stride, int h) \ 915{ \ 916 int score = 0, x, y; \ 917 \ 918 for (y = 1; y < h; y++) { \ 919 for (x = 0; x < size; x += 4) { \ 920 score += FFABS(s[x] - s[x + stride]) + \ 921 FFABS(s[x + 1] - s[x + stride + 1]) + \ 922 FFABS(s[x + 2] - s[x + 2 + stride]) + \ 923 FFABS(s[x + 3] - s[x + 3 + stride]); \ 924 } \ 925 s += stride; \ 926 } \ 927 \ 928 return score; \ 929} 930VSAD_INTRA(8) 931VSAD_INTRA(16) 932 933#define VSAD(size) \ 934static int vsad ## size ## _c(MpegEncContext *c, \ 935 uint8_t *s1, uint8_t *s2, \ 936 ptrdiff_t stride, int h) \ 937{ \ 938 int score = 0, x, y; \ 939 \ 940 for (y = 1; y < h; y++) { \ 941 for (x = 0; x < size; x++) \ 942 score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ 943 s1 += stride; \ 944 s2 += stride; \ 945 } \ 946 \ 947 return score; \ 948} 949VSAD(8) 950VSAD(16) 951 952#define SQ(a) ((a) * (a)) 953#define VSSE_INTRA(size) \ 954static int vsse_intra ## size ## _c(MpegEncContext *c, \ 955 uint8_t *s, uint8_t *dummy, \ 956 ptrdiff_t stride, int h) \ 957{ \ 958 int score = 0, x, y; \ 959 \ 960 for (y = 1; y < h; y++) { \ 961 for (x = 0; x < size; x += 4) { \ 962 score += SQ(s[x] - s[x + stride]) + \ 963 SQ(s[x + 1] - s[x + stride + 1]) + \ 964 SQ(s[x + 2] - s[x + stride + 2]) + \ 965 SQ(s[x + 3] - s[x + stride + 3]); \ 966 } \ 967 s += stride; \ 968 } \ 969 \ 970 return score; \ 971} 972VSSE_INTRA(8) 973VSSE_INTRA(16) 974 975#define VSSE(size) \ 976static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ 977 ptrdiff_t stride, int h) \ 978{ \ 979 int score = 0, x, y; \ 980 \ 981 for (y = 1; y < h; y++) { \ 982 for (x = 0; x < size; x++) \ 983 score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ 984 s1 += stride; \ 985 s2 += stride; \ 986 } \ 987 \ 988 return score; \ 989} 990VSSE(8) 991VSSE(16) 992 993#define WRAPPER8_16_SQ(name8, name16) \ 994static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ 995 ptrdiff_t stride, int h) \ 996{ \ 997 int score = 0; \ 998 \ 999 score += name8(s, dst, src, stride, 8); \ 1000 score += name8(s, dst + 8, src + 8, stride, 8); \ 1001 if (h == 16) { \ 1002 dst += 8 * stride; \ 1003 src += 8 * stride; \ 1004 score += name8(s, dst, src, stride, 8); \ 1005 score += name8(s, dst + 8, src + 8, stride, 8); \ 1006 } \ 1007 return score; \ 1008} 1009 1010WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 1011WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 1012WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) 1013#if CONFIG_GPL 1014WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) 1015#endif 1016WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) 1017WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 1018WRAPPER8_16_SQ(rd8x8_c, rd16_c) 1019WRAPPER8_16_SQ(bit8x8_c, bit16_c) 1020 1021av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx) 1022{ 1023 c->sum_abs_dctelem = sum_abs_dctelem_c; 1024 1025 /* TODO [0] 16 [1] 8 */ 1026 c->pix_abs[0][0] = pix_abs16_c; 1027 c->pix_abs[0][1] = pix_abs16_x2_c; 1028 c->pix_abs[0][2] = pix_abs16_y2_c; 1029 c->pix_abs[0][3] = pix_abs16_xy2_c; 1030 c->pix_abs[1][0] = pix_abs8_c; 1031 c->pix_abs[1][1] = pix_abs8_x2_c; 1032 c->pix_abs[1][2] = pix_abs8_y2_c; 1033 c->pix_abs[1][3] = pix_abs8_xy2_c; 1034 1035#define SET_CMP_FUNC(name) \ 1036 c->name[0] = name ## 16_c; \ 1037 c->name[1] = name ## 8x8_c; 1038 1039 SET_CMP_FUNC(hadamard8_diff) 1040 c->hadamard8_diff[4] = hadamard8_intra16_c; 1041 c->hadamard8_diff[5] = hadamard8_intra8x8_c; 1042 SET_CMP_FUNC(dct_sad) 1043 SET_CMP_FUNC(dct_max) 1044#if CONFIG_GPL 1045 SET_CMP_FUNC(dct264_sad) 1046#endif 1047 c->sad[0] = pix_abs16_c; 1048 c->sad[1] = pix_abs8_c; 1049 c->sse[0] = sse16_c; 1050 c->sse[1] = sse8_c; 1051 c->sse[2] = sse4_c; 1052 SET_CMP_FUNC(quant_psnr) 1053 SET_CMP_FUNC(rd) 1054 SET_CMP_FUNC(bit) 1055 c->vsad[0] = vsad16_c; 1056 c->vsad[1] = vsad8_c; 1057 c->vsad[4] = vsad_intra16_c; 1058 c->vsad[5] = vsad_intra8_c; 1059 c->vsse[0] = vsse16_c; 1060 c->vsse[1] = vsse8_c; 1061 c->vsse[4] = vsse_intra16_c; 1062 c->vsse[5] = vsse_intra8_c; 1063 c->nsse[0] = nsse16_c; 1064 c->nsse[1] = nsse8_c; 1065#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER 1066 ff_dsputil_init_dwt(c); 1067#endif 1068 1069#if ARCH_AARCH64 1070 ff_me_cmp_init_aarch64(c, avctx); 1071#elif ARCH_ALPHA 1072 ff_me_cmp_init_alpha(c, avctx); 1073#elif ARCH_ARM 1074 ff_me_cmp_init_arm(c, avctx); 1075#elif ARCH_PPC 1076 ff_me_cmp_init_ppc(c, avctx); 1077#elif ARCH_X86 1078 ff_me_cmp_init_x86(c, avctx); 1079#elif ARCH_MIPS 1080 ff_me_cmp_init_mips(c, avctx); 1081#endif 1082 1083 c->median_sad[0] = pix_median_abs16_c; 1084 c->median_sad[1] = pix_median_abs8_c; 1085} 1086