1/* 2 * Motion estimation 3 * Copyright (c) 2002-2004 Michael Niedermayer 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * Motion estimation template. 25 */ 26 27#include "libavutil/qsort.h" 28#include "mpegvideo.h" 29 30//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) 31#define LOAD_COMMON\ 32 uint32_t av_unused * const score_map= c->score_map;\ 33 const int av_unused xmin= c->xmin;\ 34 const int av_unused ymin= c->ymin;\ 35 const int av_unused xmax= c->xmax;\ 36 const int av_unused ymax= c->ymax;\ 37 const uint8_t *mv_penalty = c->current_mv_penalty; \ 38 const int pred_x= c->pred_x;\ 39 const int pred_y= c->pred_y;\ 40 41#define CHECK_HALF_MV(dx, dy, x, y)\ 42{\ 43 const int hx= 2*(x)+(dx);\ 44 const int hy= 2*(y)+(dy);\ 45 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\ 46 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ 47 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ 48} 49 50static int hpel_motion_search(MpegEncContext * s, 51 int *mx_ptr, int *my_ptr, int dmin, 52 int src_index, int ref_index, 53 int size, int h) 54{ 55 MotionEstContext * const c= &s->me; 56 const int mx = *mx_ptr; 57 const int my = *my_ptr; 58 const int penalty_factor= c->sub_penalty_factor; 59 me_cmp_func cmp_sub, chroma_cmp_sub; 60 int bx=2*mx, by=2*my; 61 62 LOAD_COMMON 63 int flags= c->sub_flags; 64 65 //FIXME factorize 66 67 cmp_sub = s->mecc.me_sub_cmp[size]; 68 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1]; 69 70 if(c->skip){ //FIXME move out of hpel? 71 *mx_ptr = 0; 72 *my_ptr = 0; 73 return dmin; 74 } 75 76 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){ 77 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); 78 if(mx || my || size>0) 79 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; 80 } 81 82 if (mx > xmin && mx < xmax && 83 my > ymin && my < ymax) { 84 int d= dmin; 85 const int index = my * (1 << ME_MAP_SHIFT) + mx; 86 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] 87 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor; 88 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)] 89 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor; 90 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)] 91 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor; 92 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] 93 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor; 94 95#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1 96 unsigned key; 97 unsigned map_generation= c->map_generation; 98 key = (my - 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation; 99 av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); 100 key = (my + 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation; 101 av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); 102 key = (my) * (1 << ME_MAP_MV_BITS) + (mx + 1) + map_generation; 103 av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key); 104 key = (my) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation; 105 av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key); 106#endif 107 if(t<=b){ 108 CHECK_HALF_MV(0, 1, mx ,my-1) 109 if(l<=r){ 110 CHECK_HALF_MV(1, 1, mx-1, my-1) 111 if(t+r<=b+l){ 112 CHECK_HALF_MV(1, 1, mx , my-1) 113 }else{ 114 CHECK_HALF_MV(1, 1, mx-1, my ) 115 } 116 CHECK_HALF_MV(1, 0, mx-1, my ) 117 }else{ 118 CHECK_HALF_MV(1, 1, mx , my-1) 119 if(t+l<=b+r){ 120 CHECK_HALF_MV(1, 1, mx-1, my-1) 121 }else{ 122 CHECK_HALF_MV(1, 1, mx , my ) 123 } 124 CHECK_HALF_MV(1, 0, mx , my ) 125 } 126 }else{ 127 if(l<=r){ 128 if(t+l<=b+r){ 129 CHECK_HALF_MV(1, 1, mx-1, my-1) 130 }else{ 131 CHECK_HALF_MV(1, 1, mx , my ) 132 } 133 CHECK_HALF_MV(1, 0, mx-1, my) 134 CHECK_HALF_MV(1, 1, mx-1, my) 135 }else{ 136 if(t+r<=b+l){ 137 CHECK_HALF_MV(1, 1, mx , my-1) 138 }else{ 139 CHECK_HALF_MV(1, 1, mx-1, my) 140 } 141 CHECK_HALF_MV(1, 0, mx , my) 142 CHECK_HALF_MV(1, 1, mx , my) 143 } 144 CHECK_HALF_MV(0, 1, mx , my) 145 } 146 av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); 147 } 148 149 *mx_ptr = bx; 150 *my_ptr = by; 151 152 return dmin; 153} 154 155static int no_sub_motion_search(MpegEncContext * s, 156 int *mx_ptr, int *my_ptr, int dmin, 157 int src_index, int ref_index, 158 int size, int h) 159{ 160 (*mx_ptr) *= 2; 161 (*my_ptr) *= 2; 162 return dmin; 163} 164 165static inline int get_mb_score(MpegEncContext *s, int mx, int my, 166 int src_index, int ref_index, int size, 167 int h, int add_rate) 168{ 169 MotionEstContext * const c= &s->me; 170 const int penalty_factor= c->mb_penalty_factor; 171 const int flags= c->mb_flags; 172 const int qpel= flags & FLAG_QPEL; 173 const int mask= 1+2*qpel; 174 me_cmp_func cmp_sub, chroma_cmp_sub; 175 int d; 176 177 LOAD_COMMON 178 179 //FIXME factorize 180 181 cmp_sub = s->mecc.mb_cmp[size]; 182 chroma_cmp_sub = s->mecc.mb_cmp[size + 1]; 183 184 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); 185 //FIXME check cbp before adding penalty for (0,0) vector 186 if(add_rate && (mx || my || size>0)) 187 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; 188 189 return d; 190} 191 192int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index, 193 int ref_index, int size, int h, int add_rate) 194{ 195 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate); 196} 197 198#define CHECK_QUARTER_MV(dx, dy, x, y)\ 199{\ 200 const int hx= 4*(x)+(dx);\ 201 const int hy= 4*(y)+(dy);\ 202 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ 203 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ 204 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ 205} 206 207static int qpel_motion_search(MpegEncContext * s, 208 int *mx_ptr, int *my_ptr, int dmin, 209 int src_index, int ref_index, 210 int size, int h) 211{ 212 MotionEstContext * const c= &s->me; 213 const int mx = *mx_ptr; 214 const int my = *my_ptr; 215 const int penalty_factor= c->sub_penalty_factor; 216 const unsigned map_generation = c->map_generation; 217 const int subpel_quality= c->avctx->me_subpel_quality; 218 uint32_t *map= c->map; 219 me_cmp_func cmpf, chroma_cmpf; 220 me_cmp_func cmp_sub, chroma_cmp_sub; 221 222 LOAD_COMMON 223 int flags= c->sub_flags; 224 225 cmpf = s->mecc.me_cmp[size]; 226 chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize 227 //FIXME factorize 228 229 cmp_sub = s->mecc.me_sub_cmp[size]; 230 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1]; 231 232 if(c->skip){ //FIXME somehow move up (benchmark) 233 *mx_ptr = 0; 234 *my_ptr = 0; 235 return dmin; 236 } 237 238 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){ 239 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); 240 if(mx || my || size>0) 241 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; 242 } 243 244 if (mx > xmin && mx < xmax && 245 my > ymin && my < ymax) { 246 int bx=4*mx, by=4*my; 247 int d= dmin; 248 int i, nx, ny; 249 const int index = my * (1 << ME_MAP_SHIFT) + mx; 250 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; 251 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]; 252 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]; 253 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; 254 const int c= score_map[(index )&(ME_MAP_SIZE-1)]; 255 int best[8]; 256 int best_pos[8][2]; 257 258 memset(best, 64, sizeof(int)*8); 259 if(s->me.dia_size>=2){ 260 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; 261 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; 262 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; 263 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; 264 265 for(ny= -3; ny <= 3; ny++){ 266 for(nx= -3; nx <= 3; nx++){ 267 //FIXME this could overflow (unlikely though) 268 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; 269 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; 270 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; 271 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10; 272 int i; 273 274 if((nx&3)==0 && (ny&3)==0) continue; 275 276 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; 277 278// if(nx&1) score-=1024*c->penalty_factor; 279// if(ny&1) score-=1024*c->penalty_factor; 280 281 for(i=0; i<8; i++){ 282 if(score < best[i]){ 283 memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); 284 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); 285 best[i]= score; 286 best_pos[i][0]= nx + 4*mx; 287 best_pos[i][1]= ny + 4*my; 288 break; 289 } 290 } 291 } 292 } 293 }else{ 294 int tl; 295 //FIXME this could overflow (unlikely though) 296 const int cx = 4*(r - l); 297 const int cx2= r + l - 2*c; 298 const int cy = 4*(b - t); 299 const int cy2= b + t - 2*c; 300 int cxy; 301 302 if (map[(index - (1 << ME_MAP_SHIFT) - 1) & (ME_MAP_SIZE - 1)] == 303 (my - 1) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation) { 304 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; 305 }else{ 306 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different 307 } 308 309 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; 310 311 av_assert2(16*cx2 + 4*cx + 32*c == 32*r); 312 av_assert2(16*cx2 - 4*cx + 32*c == 32*l); 313 av_assert2(16*cy2 + 4*cy + 32*c == 32*b); 314 av_assert2(16*cy2 - 4*cy + 32*c == 32*t); 315 av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl); 316 317 for(ny= -3; ny <= 3; ny++){ 318 for(nx= -3; nx <= 3; nx++){ 319 //FIXME this could overflow (unlikely though) 320 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor 321 int i; 322 323 if((nx&3)==0 && (ny&3)==0) continue; 324 325 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; 326// if(nx&1) score-=32*c->penalty_factor; 327 // if(ny&1) score-=32*c->penalty_factor; 328 329 for(i=0; i<8; i++){ 330 if(score < best[i]){ 331 memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); 332 memmove(best_pos[i + 1], best_pos[i], sizeof(best_pos[0]) * (7 - i)); 333 best[i]= score; 334 best_pos[i][0]= nx + 4*mx; 335 best_pos[i][1]= ny + 4*my; 336 break; 337 } 338 } 339 } 340 } 341 } 342 for(i=0; i<subpel_quality; i++){ 343 nx= best_pos[i][0]; 344 ny= best_pos[i][1]; 345 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2) 346 } 347 348 av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4); 349 350 *mx_ptr = bx; 351 *my_ptr = by; 352 }else{ 353 *mx_ptr =4*mx; 354 *my_ptr =4*my; 355 } 356 357 return dmin; 358} 359 360 361#define CHECK_MV(x,y)\ 362{\ 363 const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ 364 const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ 365 av_assert2((x) >= xmin);\ 366 av_assert2((x) <= xmax);\ 367 av_assert2((y) >= ymin);\ 368 av_assert2((y) <= ymax);\ 369 if(map[index]!=key){\ 370 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ 371 map[index]= key;\ 372 score_map[index]= d;\ 373 d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\ 374 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ 375 }\ 376} 377 378#define CHECK_CLIPPED_MV(ax,ay)\ 379{\ 380 const int Lx= ax;\ 381 const int Ly= ay;\ 382 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\ 383 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\ 384 CHECK_MV(Lx2, Ly2)\ 385} 386 387#define CHECK_MV_DIR(x,y,new_dir)\ 388{\ 389 const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ 390 const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ 391 if(map[index]!=key){\ 392 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ 393 map[index]= key;\ 394 score_map[index]= d;\ 395 d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\ 396 if(d<dmin){\ 397 best[0]=x;\ 398 best[1]=y;\ 399 dmin=d;\ 400 next_dir= new_dir;\ 401 }\ 402 }\ 403} 404 405#define check(x,y,S,v)\ 406if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ 407if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ 408if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ 409if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ 410 411#define LOAD_COMMON2\ 412 uint32_t *map= c->map;\ 413 const int qpel= flags&FLAG_QPEL;\ 414 const int shift= 1+qpel;\ 415 416static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, 417 int src_index, int ref_index, const int penalty_factor, 418 int size, int h, int flags) 419{ 420 MotionEstContext * const c= &s->me; 421 me_cmp_func cmpf, chroma_cmpf; 422 int next_dir=-1; 423 LOAD_COMMON 424 LOAD_COMMON2 425 unsigned map_generation = c->map_generation; 426 427 cmpf = s->mecc.me_cmp[size]; 428 chroma_cmpf = s->mecc.me_cmp[size + 1]; 429 430 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ 431 const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; 432 const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); 433 if (map[index] != key) { // this will be executed only very rarely 434 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); 435 map[index]= key; 436 } 437 } 438 439 for(;;){ 440 int d; 441 const int dir= next_dir; 442 const int x= best[0]; 443 const int y= best[1]; 444 next_dir=-1; 445 446 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) 447 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) 448 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) 449 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) 450 451 if(next_dir==-1){ 452 return dmin; 453 } 454 } 455} 456 457static int funny_diamond_search(MpegEncContext * s, int *best, int dmin, 458 int src_index, int ref_index, const int penalty_factor, 459 int size, int h, int flags) 460{ 461 MotionEstContext * const c= &s->me; 462 me_cmp_func cmpf, chroma_cmpf; 463 int dia_size; 464 LOAD_COMMON 465 LOAD_COMMON2 466 unsigned map_generation = c->map_generation; 467 468 cmpf = s->mecc.me_cmp[size]; 469 chroma_cmpf = s->mecc.me_cmp[size + 1]; 470 471 for(dia_size=1; dia_size<=4; dia_size++){ 472 int dir; 473 const int x= best[0]; 474 const int y= best[1]; 475 476 if(dia_size&(dia_size-1)) continue; 477 478 if( x + dia_size > xmax 479 || x - dia_size < xmin 480 || y + dia_size > ymax 481 || y - dia_size < ymin) 482 continue; 483 484 for(dir= 0; dir<dia_size; dir+=2){ 485 int d; 486 487 CHECK_MV(x + dir , y + dia_size - dir); 488 CHECK_MV(x + dia_size - dir, y - dir ); 489 CHECK_MV(x - dir , y - dia_size + dir); 490 CHECK_MV(x - dia_size + dir, y + dir ); 491 } 492 493 if(x!=best[0] || y!=best[1]) 494 dia_size=0; 495 } 496 return dmin; 497} 498 499static int hex_search(MpegEncContext * s, int *best, int dmin, 500 int src_index, int ref_index, const int penalty_factor, 501 int size, int h, int flags, int dia_size) 502{ 503 MotionEstContext * const c= &s->me; 504 me_cmp_func cmpf, chroma_cmpf; 505 LOAD_COMMON 506 LOAD_COMMON2 507 unsigned map_generation = c->map_generation; 508 int x,y,d; 509 const int dec= dia_size & (dia_size-1); 510 511 cmpf = s->mecc.me_cmp[size]; 512 chroma_cmpf = s->mecc.me_cmp[size + 1]; 513 514 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ 515 do{ 516 x= best[0]; 517 y= best[1]; 518 519 CHECK_CLIPPED_MV(x -dia_size , y); 520 CHECK_CLIPPED_MV(x+ dia_size , y); 521 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size); 522 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size); 523 if(dia_size>1){ 524 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size); 525 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size); 526 } 527 }while(best[0] != x || best[1] != y); 528 } 529 530 return dmin; 531} 532 533static int l2s_dia_search(MpegEncContext * s, int *best, int dmin, 534 int src_index, int ref_index, const int penalty_factor, 535 int size, int h, int flags) 536{ 537 MotionEstContext * const c= &s->me; 538 me_cmp_func cmpf, chroma_cmpf; 539 LOAD_COMMON 540 LOAD_COMMON2 541 unsigned map_generation = c->map_generation; 542 int x,y,i,d; 543 int dia_size= c->dia_size&0xFF; 544 const int dec= dia_size & (dia_size-1); 545 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1}, 546 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}}; 547 548 cmpf = s->mecc.me_cmp[size]; 549 chroma_cmpf = s->mecc.me_cmp[size + 1]; 550 551 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){ 552 do{ 553 x= best[0]; 554 y= best[1]; 555 for(i=0; i<8; i++){ 556 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size); 557 } 558 }while(best[0] != x || best[1] != y); 559 } 560 561 x= best[0]; 562 y= best[1]; 563 CHECK_CLIPPED_MV(x+1, y); 564 CHECK_CLIPPED_MV(x, y+1); 565 CHECK_CLIPPED_MV(x-1, y); 566 CHECK_CLIPPED_MV(x, y-1); 567 568 return dmin; 569} 570 571static int umh_search(MpegEncContext * s, int *best, int dmin, 572 int src_index, int ref_index, const int penalty_factor, 573 int size, int h, int flags) 574{ 575 MotionEstContext * const c= &s->me; 576 me_cmp_func cmpf, chroma_cmpf; 577 LOAD_COMMON 578 LOAD_COMMON2 579 unsigned map_generation = c->map_generation; 580 int x,y,x2,y2, i, j, d; 581 const int dia_size= c->dia_size&0xFE; 582 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2}, 583 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2}, 584 {-2, 3}, { 0, 4}, { 2, 3}, 585 {-2,-3}, { 0,-4}, { 2,-3},}; 586 587 cmpf = s->mecc.me_cmp[size]; 588 chroma_cmpf = s->mecc.me_cmp[size + 1]; 589 590 x= best[0]; 591 y= best[1]; 592 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){ 593 CHECK_MV(x2, y); 594 } 595 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){ 596 CHECK_MV(x, y2); 597 } 598 599 x= best[0]; 600 y= best[1]; 601 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){ 602 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){ 603 CHECK_MV(x2, y2); 604 } 605 } 606 607//FIXME prevent the CLIP stuff 608 609 for(j=1; j<=dia_size/4; j++){ 610 for(i=0; i<16; i++){ 611 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j); 612 } 613 } 614 615 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2); 616} 617 618static int full_search(MpegEncContext * s, int *best, int dmin, 619 int src_index, int ref_index, const int penalty_factor, 620 int size, int h, int flags) 621{ 622 MotionEstContext * const c= &s->me; 623 me_cmp_func cmpf, chroma_cmpf; 624 LOAD_COMMON 625 LOAD_COMMON2 626 unsigned map_generation = c->map_generation; 627 int x,y, d; 628 const int dia_size= c->dia_size&0xFF; 629 630 cmpf = s->mecc.me_cmp[size]; 631 chroma_cmpf = s->mecc.me_cmp[size + 1]; 632 633 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){ 634 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){ 635 CHECK_MV(x, y); 636 } 637 } 638 639 x= best[0]; 640 y= best[1]; 641 d= dmin; 642 CHECK_CLIPPED_MV(x , y); 643 CHECK_CLIPPED_MV(x+1, y); 644 CHECK_CLIPPED_MV(x, y+1); 645 CHECK_CLIPPED_MV(x-1, y); 646 CHECK_CLIPPED_MV(x, y-1); 647 best[0]= x; 648 best[1]= y; 649 650 return d; 651} 652 653#define SAB_CHECK_MV(ax,ay)\ 654{\ 655 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\ 656 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ 657 if(map[index]!=key){\ 658 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ 659 map[index]= key;\ 660 score_map[index]= d;\ 661 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ 662 if(d < minima[minima_count-1].height){\ 663 int j=0;\ 664 \ 665 while(d >= minima[j].height) j++;\ 666\ 667 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\ 668\ 669 minima[j].checked= 0;\ 670 minima[j].height= d;\ 671 minima[j].x= ax;\ 672 minima[j].y= ay;\ 673 \ 674 i=-1;\ 675 continue;\ 676 }\ 677 }\ 678} 679 680#define MAX_SAB_SIZE ME_MAP_SIZE 681static int sab_diamond_search(MpegEncContext * s, int *best, int dmin, 682 int src_index, int ref_index, const int penalty_factor, 683 int size, int h, int flags) 684{ 685 MotionEstContext * const c= &s->me; 686 me_cmp_func cmpf, chroma_cmpf; 687 Minima minima[MAX_SAB_SIZE]; 688 const int minima_count= FFABS(c->dia_size); 689 int i, j; 690 LOAD_COMMON 691 LOAD_COMMON2 692 unsigned map_generation = c->map_generation; 693 694 av_assert1(minima_count <= MAX_SAB_SIZE); 695 696 cmpf = s->mecc.me_cmp[size]; 697 chroma_cmpf = s->mecc.me_cmp[size + 1]; 698 699 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can 700 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map 701 */ 702 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){ 703 uint32_t key= map[i]; 704 705 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); 706 707 if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation) 708 continue; 709 710 minima[j].height= score_map[i]; 711 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS; 712 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1); 713 minima[j].x-= (1<<(ME_MAP_MV_BITS-1)); 714 minima[j].y-= (1<<(ME_MAP_MV_BITS-1)); 715 716 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space 717 if( minima[j].x > xmax || minima[j].x < xmin 718 || minima[j].y > ymax || minima[j].y < ymin) 719 continue; 720 721 minima[j].checked=0; 722 if(minima[j].x || minima[j].y) 723 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor; 724 725 j++; 726 } 727 728 AV_QSORT(minima, j, Minima, minima_cmp); 729 730 for(; j<minima_count; j++){ 731 minima[j].height=256*256*256*64; 732 minima[j].checked=0; 733 minima[j].x= minima[j].y=0; 734 } 735 736 for(i=0; i<minima_count; i++){ 737 const int x= minima[i].x; 738 const int y= minima[i].y; 739 int d; 740 741 if(minima[i].checked) continue; 742 743 if( x >= xmax || x <= xmin 744 || y >= ymax || y <= ymin) 745 continue; 746 747 SAB_CHECK_MV(x-1, y) 748 SAB_CHECK_MV(x+1, y) 749 SAB_CHECK_MV(x , y-1) 750 SAB_CHECK_MV(x , y+1) 751 752 minima[i].checked= 1; 753 } 754 755 best[0]= minima[0].x; 756 best[1]= minima[0].y; 757 dmin= minima[0].height; 758 759 if( best[0] < xmax && best[0] > xmin 760 && best[1] < ymax && best[1] > ymin){ 761 int d; 762 // ensure that the reference samples for hpel refinement are in the map 763 CHECK_MV(best[0]-1, best[1]) 764 CHECK_MV(best[0]+1, best[1]) 765 CHECK_MV(best[0], best[1]-1) 766 CHECK_MV(best[0], best[1]+1) 767 } 768 return dmin; 769} 770 771static int var_diamond_search(MpegEncContext * s, int *best, int dmin, 772 int src_index, int ref_index, const int penalty_factor, 773 int size, int h, int flags) 774{ 775 MotionEstContext * const c= &s->me; 776 me_cmp_func cmpf, chroma_cmpf; 777 int dia_size; 778 LOAD_COMMON 779 LOAD_COMMON2 780 unsigned map_generation = c->map_generation; 781 782 cmpf = s->mecc.me_cmp[size]; 783 chroma_cmpf = s->mecc.me_cmp[size + 1]; 784 785 for(dia_size=1; dia_size<=c->dia_size; dia_size++){ 786 int dir, start, end; 787 const int x= best[0]; 788 const int y= best[1]; 789 790 start= FFMAX(0, y + dia_size - ymax); 791 end = FFMIN(dia_size, xmax - x + 1); 792 for(dir= start; dir<end; dir++){ 793 int d; 794 795//check(x + dir,y + dia_size - dir,0, a0) 796 CHECK_MV(x + dir , y + dia_size - dir); 797 } 798 799 start= FFMAX(0, x + dia_size - xmax); 800 end = FFMIN(dia_size, y - ymin + 1); 801 for(dir= start; dir<end; dir++){ 802 int d; 803 804//check(x + dia_size - dir, y - dir,0, a1) 805 CHECK_MV(x + dia_size - dir, y - dir ); 806 } 807 808 start= FFMAX(0, -y + dia_size + ymin ); 809 end = FFMIN(dia_size, x - xmin + 1); 810 for(dir= start; dir<end; dir++){ 811 int d; 812 813//check(x - dir,y - dia_size + dir,0, a2) 814 CHECK_MV(x - dir , y - dia_size + dir); 815 } 816 817 start= FFMAX(0, -x + dia_size + xmin ); 818 end = FFMIN(dia_size, ymax - y + 1); 819 for(dir= start; dir<end; dir++){ 820 int d; 821 822//check(x - dia_size + dir, y + dir,0, a3) 823 CHECK_MV(x - dia_size + dir, y + dir ); 824 } 825 826 if(x!=best[0] || y!=best[1]) 827 dia_size=0; 828 } 829 return dmin; 830} 831 832static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin, 833 int src_index, int ref_index, const int penalty_factor, 834 int size, int h, int flags){ 835 MotionEstContext * const c= &s->me; 836 if(c->dia_size==-1) 837 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 838 else if(c->dia_size<-1) 839 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 840 else if(c->dia_size<2) 841 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 842 else if(c->dia_size>1024) 843 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 844 else if(c->dia_size>768) 845 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 846 else if(c->dia_size>512) 847 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF); 848 else if(c->dia_size>256) 849 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 850 else 851 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 852} 853 854/** 855 @param P a list of candidate mvs to check before starting the 856 iterative search. If one of the candidates is close to the optimal mv, then 857 it takes fewer iterations. And it increases the chance that we find the 858 optimal mv. 859 */ 860static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr, 861 int P[10][2], int src_index, int ref_index, const int16_t (*last_mv)[2], 862 int ref_mv_scale, int flags, int size, int h) 863{ 864 MotionEstContext * const c= &s->me; 865 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector. 866 i.e. the difference between the position of the 867 block currently being encoded and the position of 868 the block chosen to predict it from. */ 869 int d; ///< the score (cmp + penalty) of any given mv 870 int dmin; /**< the best value of d, i.e. the score 871 corresponding to the mv stored in best[]. */ 872 unsigned map_generation; 873 int penalty_factor; 874 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME 875 const int ref_mv_xy = s->mb_x + s->mb_y * ref_mv_stride; // add to last_mv before passing FIXME 876 me_cmp_func cmpf, chroma_cmpf; 877 878 LOAD_COMMON 879 LOAD_COMMON2 880 881 if(c->pre_pass){ 882 penalty_factor= c->pre_penalty_factor; 883 cmpf = s->mecc.me_pre_cmp[size]; 884 chroma_cmpf = s->mecc.me_pre_cmp[size + 1]; 885 }else{ 886 penalty_factor= c->penalty_factor; 887 cmpf = s->mecc.me_cmp[size]; 888 chroma_cmpf = s->mecc.me_cmp[size + 1]; 889 } 890 891 map_generation= update_map_generation(c); 892 893 av_assert2(cmpf); 894 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); 895 map[0]= map_generation; 896 score_map[0]= dmin; 897 898 //FIXME precalc first term below? 899 if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || 900 s->mpv_flags & FF_MPV_FLAG_MV0) 901 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor; 902 903 /* first line */ 904 if (s->first_slice_line) { 905 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) 906 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 907 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) 908 }else{ 909 if(dmin<((h*h*s->avctx->mv0_threshold)>>8) 910 && ( P_LEFT[0] |P_LEFT[1] 911 |P_TOP[0] |P_TOP[1] 912 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ 913 *mx_ptr= 0; 914 *my_ptr= 0; 915 c->skip=1; 916 return dmin; 917 } 918 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift) 919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1) 920 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1) 921 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) ) 922 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) ) 923 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 924 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) 925 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) 926 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) 927 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) 928 } 929 if(dmin>h*h*4){ 930 if(c->pre_pass){ 931 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, 932 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) 933 if(!s->first_slice_line) 934 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 935 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) 936 }else{ 937 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 938 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) 939 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line 940 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 941 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) 942 } 943 } 944 945 if(c->avctx->last_predictor_count){ 946 const int count= c->avctx->last_predictor_count; 947 const int xstart= FFMAX(0, s->mb_x - count); 948 const int ystart= FFMAX(0, s->mb_y - count); 949 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1); 950 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1); 951 int mb_y; 952 953 for(mb_y=ystart; mb_y<yend; mb_y++){ 954 int mb_x; 955 for(mb_x=xstart; mb_x<xend; mb_x++){ 956 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride; 957 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16; 958 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16; 959 960 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue; 961 CHECK_MV(mx,my) 962 } 963 } 964 } 965 966//check(best[0],best[1],0, b0) 967 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 968 969//check(best[0],best[1],0, b1) 970 *mx_ptr= best[0]; 971 *my_ptr= best[1]; 972 973 return dmin; 974} 975 976//this function is dedicated to the brain damaged gcc 977int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr, 978 int P[10][2], int src_index, int ref_index, 979 const int16_t (*last_mv)[2], int ref_mv_scale, 980 int size, int h) 981{ 982 MotionEstContext * const c= &s->me; 983//FIXME convert other functions in the same way if faster 984 if(c->flags==0 && h==16 && size==0){ 985 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16); 986// case FLAG_QPEL: 987// return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL); 988 }else{ 989 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h); 990 } 991} 992 993static int epzs_motion_search2(MpegEncContext * s, 994 int *mx_ptr, int *my_ptr, int P[10][2], 995 int src_index, int ref_index, const int16_t (*last_mv)[2], 996 int ref_mv_scale, const int size) 997{ 998 MotionEstContext * const c= &s->me; 999 int best[2]={0, 0}; 1000 int d, dmin; 1001 unsigned map_generation; 1002 const int penalty_factor= c->penalty_factor; 1003 const int h=8; 1004 const int ref_mv_stride= s->mb_stride; 1005 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; 1006 me_cmp_func cmpf, chroma_cmpf; 1007 LOAD_COMMON 1008 int flags= c->flags; 1009 LOAD_COMMON2 1010 1011 cmpf = s->mecc.me_cmp[size]; 1012 chroma_cmpf = s->mecc.me_cmp[size + 1]; 1013 1014 map_generation= update_map_generation(c); 1015 1016 dmin = 1000000; 1017 1018 /* first line */ 1019 if (s->first_slice_line) { 1020 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) 1021 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 1022 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) 1023 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) 1024 }else{ 1025 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) 1026 //FIXME try some early stop 1027 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) 1028 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) 1029 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) 1030 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) 1031 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 1032 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) 1033 } 1034 if(dmin>64*4){ 1035 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 1036 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) 1037 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line 1038 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 1039 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) 1040 } 1041 1042 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags); 1043 1044 *mx_ptr= best[0]; 1045 *my_ptr= best[1]; 1046 1047 return dmin; 1048} 1049