1/*
2 * Motion estimation
3 * Copyright (c) 2002-2004 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * Motion estimation template.
25 */
26
27#include "libavutil/qsort.h"
28#include "mpegvideo.h"
29
30//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31#define LOAD_COMMON\
32    uint32_t av_unused * const score_map= c->score_map;\
33    const int av_unused xmin= c->xmin;\
34    const int av_unused ymin= c->ymin;\
35    const int av_unused xmax= c->xmax;\
36    const int av_unused ymax= c->ymax;\
37    const uint8_t *mv_penalty = c->current_mv_penalty; \
38    const int pred_x= c->pred_x;\
39    const int pred_y= c->pred_y;\
40
41#define CHECK_HALF_MV(dx, dy, x, y)\
42{\
43    const int hx= 2*(x)+(dx);\
44    const int hy= 2*(y)+(dy);\
45    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
46    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
47    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
48}
49
50static int hpel_motion_search(MpegEncContext * s,
51                                  int *mx_ptr, int *my_ptr, int dmin,
52                                  int src_index, int ref_index,
53                                  int size, int h)
54{
55    MotionEstContext * const c= &s->me;
56    const int mx = *mx_ptr;
57    const int my = *my_ptr;
58    const int penalty_factor= c->sub_penalty_factor;
59    me_cmp_func cmp_sub, chroma_cmp_sub;
60    int bx=2*mx, by=2*my;
61
62    LOAD_COMMON
63    int flags= c->sub_flags;
64
65 //FIXME factorize
66
67    cmp_sub        = s->mecc.me_sub_cmp[size];
68    chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
69
70    if(c->skip){ //FIXME move out of hpel?
71        *mx_ptr = 0;
72        *my_ptr = 0;
73        return dmin;
74    }
75
76    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
77        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
78        if(mx || my || size>0)
79            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
80    }
81
82    if (mx > xmin && mx < xmax &&
83        my > ymin && my < ymax) {
84        int d= dmin;
85        const int index = my * (1 << ME_MAP_SHIFT) + mx;
86        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
87                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
88        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
89                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
90        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
91                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
92        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
93                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94
95#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
96        unsigned key;
97        unsigned map_generation= c->map_generation;
98        key = (my - 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
99        av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
100        key = (my + 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
101        av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
102        key = (my) * (1 << ME_MAP_MV_BITS) + (mx + 1) + map_generation;
103        av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
104        key = (my) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation;
105        av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
106#endif
107        if(t<=b){
108            CHECK_HALF_MV(0, 1, mx  ,my-1)
109            if(l<=r){
110                CHECK_HALF_MV(1, 1, mx-1, my-1)
111                if(t+r<=b+l){
112                    CHECK_HALF_MV(1, 1, mx  , my-1)
113                }else{
114                    CHECK_HALF_MV(1, 1, mx-1, my  )
115                }
116                CHECK_HALF_MV(1, 0, mx-1, my  )
117            }else{
118                CHECK_HALF_MV(1, 1, mx  , my-1)
119                if(t+l<=b+r){
120                    CHECK_HALF_MV(1, 1, mx-1, my-1)
121                }else{
122                    CHECK_HALF_MV(1, 1, mx  , my  )
123                }
124                CHECK_HALF_MV(1, 0, mx  , my  )
125            }
126        }else{
127            if(l<=r){
128                if(t+l<=b+r){
129                    CHECK_HALF_MV(1, 1, mx-1, my-1)
130                }else{
131                    CHECK_HALF_MV(1, 1, mx  , my  )
132                }
133                CHECK_HALF_MV(1, 0, mx-1, my)
134                CHECK_HALF_MV(1, 1, mx-1, my)
135            }else{
136                if(t+r<=b+l){
137                    CHECK_HALF_MV(1, 1, mx  , my-1)
138                }else{
139                    CHECK_HALF_MV(1, 1, mx-1, my)
140                }
141                CHECK_HALF_MV(1, 0, mx  , my)
142                CHECK_HALF_MV(1, 1, mx  , my)
143            }
144            CHECK_HALF_MV(0, 1, mx  , my)
145        }
146        av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
147    }
148
149    *mx_ptr = bx;
150    *my_ptr = by;
151
152    return dmin;
153}
154
155static int no_sub_motion_search(MpegEncContext * s,
156          int *mx_ptr, int *my_ptr, int dmin,
157                                  int src_index, int ref_index,
158                                  int size, int h)
159{
160    (*mx_ptr) *= 2;
161    (*my_ptr) *= 2;
162    return dmin;
163}
164
165static inline int get_mb_score(MpegEncContext *s, int mx, int my,
166                               int src_index, int ref_index, int size,
167                               int h, int add_rate)
168{
169    MotionEstContext * const c= &s->me;
170    const int penalty_factor= c->mb_penalty_factor;
171    const int flags= c->mb_flags;
172    const int qpel= flags & FLAG_QPEL;
173    const int mask= 1+2*qpel;
174    me_cmp_func cmp_sub, chroma_cmp_sub;
175    int d;
176
177    LOAD_COMMON
178
179 //FIXME factorize
180
181    cmp_sub        = s->mecc.mb_cmp[size];
182    chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
183
184    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
185    //FIXME check cbp before adding penalty for (0,0) vector
186    if(add_rate && (mx || my || size>0))
187        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
188
189    return d;
190}
191
192int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
193                    int ref_index, int size, int h, int add_rate)
194{
195    return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196}
197
198#define CHECK_QUARTER_MV(dx, dy, x, y)\
199{\
200    const int hx= 4*(x)+(dx);\
201    const int hy= 4*(y)+(dy);\
202    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
203    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
204    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205}
206
207static int qpel_motion_search(MpegEncContext * s,
208                                  int *mx_ptr, int *my_ptr, int dmin,
209                                  int src_index, int ref_index,
210                                  int size, int h)
211{
212    MotionEstContext * const c= &s->me;
213    const int mx = *mx_ptr;
214    const int my = *my_ptr;
215    const int penalty_factor= c->sub_penalty_factor;
216    const unsigned map_generation = c->map_generation;
217    const int subpel_quality= c->avctx->me_subpel_quality;
218    uint32_t *map= c->map;
219    me_cmp_func cmpf, chroma_cmpf;
220    me_cmp_func cmp_sub, chroma_cmp_sub;
221
222    LOAD_COMMON
223    int flags= c->sub_flags;
224
225    cmpf        = s->mecc.me_cmp[size];
226    chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
227 //FIXME factorize
228
229    cmp_sub        = s->mecc.me_sub_cmp[size];
230    chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
231
232    if(c->skip){ //FIXME somehow move up (benchmark)
233        *mx_ptr = 0;
234        *my_ptr = 0;
235        return dmin;
236    }
237
238    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
239        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
240        if(mx || my || size>0)
241            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242    }
243
244    if (mx > xmin && mx < xmax &&
245        my > ymin && my < ymax) {
246        int bx=4*mx, by=4*my;
247        int d= dmin;
248        int i, nx, ny;
249        const int index = my * (1 << ME_MAP_SHIFT) + mx;
250        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
251        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
252        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
253        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
254        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
255        int best[8];
256        int best_pos[8][2];
257
258        memset(best, 64, sizeof(int)*8);
259        if(s->me.dia_size>=2){
260            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
262            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
264
265            for(ny= -3; ny <= 3; ny++){
266                for(nx= -3; nx <= 3; nx++){
267                    //FIXME this could overflow (unlikely though)
268                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
269                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
270                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
271                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272                    int i;
273
274                    if((nx&3)==0 && (ny&3)==0) continue;
275
276                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
277
278//                    if(nx&1) score-=1024*c->penalty_factor;
279//                    if(ny&1) score-=1024*c->penalty_factor;
280
281                    for(i=0; i<8; i++){
282                        if(score < best[i]){
283                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
284                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
285                            best[i]= score;
286                            best_pos[i][0]= nx + 4*mx;
287                            best_pos[i][1]= ny + 4*my;
288                            break;
289                        }
290                    }
291                }
292            }
293        }else{
294            int tl;
295            //FIXME this could overflow (unlikely though)
296            const int cx = 4*(r - l);
297            const int cx2= r + l - 2*c;
298            const int cy = 4*(b - t);
299            const int cy2= b + t - 2*c;
300            int cxy;
301
302            if (map[(index - (1 << ME_MAP_SHIFT) - 1) & (ME_MAP_SIZE - 1)] ==
303                (my - 1) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation) {
304                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
305            }else{
306                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
307            }
308
309            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
310
311            av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
312            av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
313            av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
314            av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
315            av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
316
317            for(ny= -3; ny <= 3; ny++){
318                for(nx= -3; nx <= 3; nx++){
319                    //FIXME this could overflow (unlikely though)
320                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
321                    int i;
322
323                    if((nx&3)==0 && (ny&3)==0) continue;
324
325                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
326//                    if(nx&1) score-=32*c->penalty_factor;
327  //                  if(ny&1) score-=32*c->penalty_factor;
328
329                    for(i=0; i<8; i++){
330                        if(score < best[i]){
331                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
332                            memmove(best_pos[i + 1], best_pos[i], sizeof(best_pos[0]) * (7 - i));
333                            best[i]= score;
334                            best_pos[i][0]= nx + 4*mx;
335                            best_pos[i][1]= ny + 4*my;
336                            break;
337                        }
338                    }
339                }
340            }
341        }
342        for(i=0; i<subpel_quality; i++){
343            nx= best_pos[i][0];
344            ny= best_pos[i][1];
345            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
346        }
347
348        av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
349
350        *mx_ptr = bx;
351        *my_ptr = by;
352    }else{
353        *mx_ptr =4*mx;
354        *my_ptr =4*my;
355    }
356
357    return dmin;
358}
359
360
361#define CHECK_MV(x,y)\
362{\
363    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
364    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
365    av_assert2((x) >= xmin);\
366    av_assert2((x) <= xmax);\
367    av_assert2((y) >= ymin);\
368    av_assert2((y) <= ymax);\
369    if(map[index]!=key){\
370        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
371        map[index]= key;\
372        score_map[index]= d;\
373        d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\
374        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375    }\
376}
377
378#define CHECK_CLIPPED_MV(ax,ay)\
379{\
380    const int Lx= ax;\
381    const int Ly= ay;\
382    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
383    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
384    CHECK_MV(Lx2, Ly2)\
385}
386
387#define CHECK_MV_DIR(x,y,new_dir)\
388{\
389    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
390    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
391    if(map[index]!=key){\
392        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
393        map[index]= key;\
394        score_map[index]= d;\
395        d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\
396        if(d<dmin){\
397            best[0]=x;\
398            best[1]=y;\
399            dmin=d;\
400            next_dir= new_dir;\
401        }\
402    }\
403}
404
405#define check(x,y,S,v)\
406if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
407if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
408if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
409if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
410
411#define LOAD_COMMON2\
412    uint32_t *map= c->map;\
413    const int qpel= flags&FLAG_QPEL;\
414    const int shift= 1+qpel;\
415
416static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
417                                       int src_index, int ref_index, const int penalty_factor,
418                                       int size, int h, int flags)
419{
420    MotionEstContext * const c= &s->me;
421    me_cmp_func cmpf, chroma_cmpf;
422    int next_dir=-1;
423    LOAD_COMMON
424    LOAD_COMMON2
425    unsigned map_generation = c->map_generation;
426
427    cmpf        = s->mecc.me_cmp[size];
428    chroma_cmpf = s->mecc.me_cmp[size + 1];
429
430    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431        const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432        const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
433        if (map[index] != key) { // this will be executed only very rarely
434            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
435            map[index]= key;
436        }
437    }
438
439    for(;;){
440        int d;
441        const int dir= next_dir;
442        const int x= best[0];
443        const int y= best[1];
444        next_dir=-1;
445
446        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
447        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
448        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
449        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
450
451        if(next_dir==-1){
452            return dmin;
453        }
454    }
455}
456
457static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
458                                       int src_index, int ref_index, const int penalty_factor,
459                                       int size, int h, int flags)
460{
461    MotionEstContext * const c= &s->me;
462    me_cmp_func cmpf, chroma_cmpf;
463    int dia_size;
464    LOAD_COMMON
465    LOAD_COMMON2
466    unsigned map_generation = c->map_generation;
467
468    cmpf        = s->mecc.me_cmp[size];
469    chroma_cmpf = s->mecc.me_cmp[size + 1];
470
471    for(dia_size=1; dia_size<=4; dia_size++){
472        int dir;
473        const int x= best[0];
474        const int y= best[1];
475
476        if(dia_size&(dia_size-1)) continue;
477
478        if(   x + dia_size > xmax
479           || x - dia_size < xmin
480           || y + dia_size > ymax
481           || y - dia_size < ymin)
482           continue;
483
484        for(dir= 0; dir<dia_size; dir+=2){
485            int d;
486
487            CHECK_MV(x + dir           , y + dia_size - dir);
488            CHECK_MV(x + dia_size - dir, y - dir           );
489            CHECK_MV(x - dir           , y - dia_size + dir);
490            CHECK_MV(x - dia_size + dir, y + dir           );
491        }
492
493        if(x!=best[0] || y!=best[1])
494            dia_size=0;
495    }
496    return dmin;
497}
498
499static int hex_search(MpegEncContext * s, int *best, int dmin,
500                                       int src_index, int ref_index, const int penalty_factor,
501                                       int size, int h, int flags, int dia_size)
502{
503    MotionEstContext * const c= &s->me;
504    me_cmp_func cmpf, chroma_cmpf;
505    LOAD_COMMON
506    LOAD_COMMON2
507    unsigned map_generation = c->map_generation;
508    int x,y,d;
509    const int dec= dia_size & (dia_size-1);
510
511    cmpf        = s->mecc.me_cmp[size];
512    chroma_cmpf = s->mecc.me_cmp[size + 1];
513
514    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
515        do{
516            x= best[0];
517            y= best[1];
518
519            CHECK_CLIPPED_MV(x  -dia_size    , y);
520            CHECK_CLIPPED_MV(x+  dia_size    , y);
521            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
522            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
523            if(dia_size>1){
524                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
525                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
526            }
527        }while(best[0] != x || best[1] != y);
528    }
529
530    return dmin;
531}
532
533static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
534                                       int src_index, int ref_index, const int penalty_factor,
535                                       int size, int h, int flags)
536{
537    MotionEstContext * const c= &s->me;
538    me_cmp_func cmpf, chroma_cmpf;
539    LOAD_COMMON
540    LOAD_COMMON2
541    unsigned map_generation = c->map_generation;
542    int x,y,i,d;
543    int dia_size= c->dia_size&0xFF;
544    const int dec= dia_size & (dia_size-1);
545    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
546                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
547
548    cmpf        = s->mecc.me_cmp[size];
549    chroma_cmpf = s->mecc.me_cmp[size + 1];
550
551    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
552        do{
553            x= best[0];
554            y= best[1];
555            for(i=0; i<8; i++){
556                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
557            }
558        }while(best[0] != x || best[1] != y);
559    }
560
561    x= best[0];
562    y= best[1];
563    CHECK_CLIPPED_MV(x+1, y);
564    CHECK_CLIPPED_MV(x, y+1);
565    CHECK_CLIPPED_MV(x-1, y);
566    CHECK_CLIPPED_MV(x, y-1);
567
568    return dmin;
569}
570
571static int umh_search(MpegEncContext * s, int *best, int dmin,
572                                       int src_index, int ref_index, const int penalty_factor,
573                                       int size, int h, int flags)
574{
575    MotionEstContext * const c= &s->me;
576    me_cmp_func cmpf, chroma_cmpf;
577    LOAD_COMMON
578    LOAD_COMMON2
579    unsigned map_generation = c->map_generation;
580    int x,y,x2,y2, i, j, d;
581    const int dia_size= c->dia_size&0xFE;
582    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
583                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
584                                 {-2, 3}, { 0, 4}, { 2, 3},
585                                 {-2,-3}, { 0,-4}, { 2,-3},};
586
587    cmpf        = s->mecc.me_cmp[size];
588    chroma_cmpf = s->mecc.me_cmp[size + 1];
589
590    x= best[0];
591    y= best[1];
592    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
593        CHECK_MV(x2, y);
594    }
595    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
596        CHECK_MV(x, y2);
597    }
598
599    x= best[0];
600    y= best[1];
601    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
602        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
603            CHECK_MV(x2, y2);
604        }
605    }
606
607//FIXME prevent the CLIP stuff
608
609    for(j=1; j<=dia_size/4; j++){
610        for(i=0; i<16; i++){
611            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
612        }
613    }
614
615    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
616}
617
618static int full_search(MpegEncContext * s, int *best, int dmin,
619                                       int src_index, int ref_index, const int penalty_factor,
620                                       int size, int h, int flags)
621{
622    MotionEstContext * const c= &s->me;
623    me_cmp_func cmpf, chroma_cmpf;
624    LOAD_COMMON
625    LOAD_COMMON2
626    unsigned map_generation = c->map_generation;
627    int x,y, d;
628    const int dia_size= c->dia_size&0xFF;
629
630    cmpf        = s->mecc.me_cmp[size];
631    chroma_cmpf = s->mecc.me_cmp[size + 1];
632
633    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
634        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
635            CHECK_MV(x, y);
636        }
637    }
638
639    x= best[0];
640    y= best[1];
641    d= dmin;
642    CHECK_CLIPPED_MV(x  , y);
643    CHECK_CLIPPED_MV(x+1, y);
644    CHECK_CLIPPED_MV(x, y+1);
645    CHECK_CLIPPED_MV(x-1, y);
646    CHECK_CLIPPED_MV(x, y-1);
647    best[0]= x;
648    best[1]= y;
649
650    return d;
651}
652
653#define SAB_CHECK_MV(ax,ay)\
654{\
655    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
656    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
657    if(map[index]!=key){\
658        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
659        map[index]= key;\
660        score_map[index]= d;\
661        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
662        if(d < minima[minima_count-1].height){\
663            int j=0;\
664            \
665            while(d >= minima[j].height) j++;\
666\
667            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
668\
669            minima[j].checked= 0;\
670            minima[j].height= d;\
671            minima[j].x= ax;\
672            minima[j].y= ay;\
673            \
674            i=-1;\
675            continue;\
676        }\
677    }\
678}
679
680#define MAX_SAB_SIZE ME_MAP_SIZE
681static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
682                                       int src_index, int ref_index, const int penalty_factor,
683                                       int size, int h, int flags)
684{
685    MotionEstContext * const c= &s->me;
686    me_cmp_func cmpf, chroma_cmpf;
687    Minima minima[MAX_SAB_SIZE];
688    const int minima_count= FFABS(c->dia_size);
689    int i, j;
690    LOAD_COMMON
691    LOAD_COMMON2
692    unsigned map_generation = c->map_generation;
693
694    av_assert1(minima_count <= MAX_SAB_SIZE);
695
696    cmpf        = s->mecc.me_cmp[size];
697    chroma_cmpf = s->mecc.me_cmp[size + 1];
698
699    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
700      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
701     */
702    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
703        uint32_t key= map[i];
704
705        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
706
707        if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation)
708            continue;
709
710        minima[j].height= score_map[i];
711        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
712        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
713        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
714        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
715
716        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
717        if(   minima[j].x > xmax || minima[j].x < xmin
718           || minima[j].y > ymax || minima[j].y < ymin)
719            continue;
720
721        minima[j].checked=0;
722        if(minima[j].x || minima[j].y)
723            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
724
725        j++;
726    }
727
728    AV_QSORT(minima, j, Minima, minima_cmp);
729
730    for(; j<minima_count; j++){
731        minima[j].height=256*256*256*64;
732        minima[j].checked=0;
733        minima[j].x= minima[j].y=0;
734    }
735
736    for(i=0; i<minima_count; i++){
737        const int x= minima[i].x;
738        const int y= minima[i].y;
739        int d;
740
741        if(minima[i].checked) continue;
742
743        if(   x >= xmax || x <= xmin
744           || y >= ymax || y <= ymin)
745           continue;
746
747        SAB_CHECK_MV(x-1, y)
748        SAB_CHECK_MV(x+1, y)
749        SAB_CHECK_MV(x  , y-1)
750        SAB_CHECK_MV(x  , y+1)
751
752        minima[i].checked= 1;
753    }
754
755    best[0]= minima[0].x;
756    best[1]= minima[0].y;
757    dmin= minima[0].height;
758
759    if(   best[0] < xmax && best[0] > xmin
760       && best[1] < ymax && best[1] > ymin){
761        int d;
762        // ensure that the reference samples for hpel refinement are in the map
763        CHECK_MV(best[0]-1, best[1])
764        CHECK_MV(best[0]+1, best[1])
765        CHECK_MV(best[0], best[1]-1)
766        CHECK_MV(best[0], best[1]+1)
767    }
768    return dmin;
769}
770
771static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
772                                       int src_index, int ref_index, const int penalty_factor,
773                                       int size, int h, int flags)
774{
775    MotionEstContext * const c= &s->me;
776    me_cmp_func cmpf, chroma_cmpf;
777    int dia_size;
778    LOAD_COMMON
779    LOAD_COMMON2
780    unsigned map_generation = c->map_generation;
781
782    cmpf        = s->mecc.me_cmp[size];
783    chroma_cmpf = s->mecc.me_cmp[size + 1];
784
785    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
786        int dir, start, end;
787        const int x= best[0];
788        const int y= best[1];
789
790        start= FFMAX(0, y + dia_size - ymax);
791        end  = FFMIN(dia_size, xmax - x + 1);
792        for(dir= start; dir<end; dir++){
793            int d;
794
795//check(x + dir,y + dia_size - dir,0, a0)
796            CHECK_MV(x + dir           , y + dia_size - dir);
797        }
798
799        start= FFMAX(0, x + dia_size - xmax);
800        end  = FFMIN(dia_size, y - ymin + 1);
801        for(dir= start; dir<end; dir++){
802            int d;
803
804//check(x + dia_size - dir, y - dir,0, a1)
805            CHECK_MV(x + dia_size - dir, y - dir           );
806        }
807
808        start= FFMAX(0, -y + dia_size + ymin );
809        end  = FFMIN(dia_size, x - xmin + 1);
810        for(dir= start; dir<end; dir++){
811            int d;
812
813//check(x - dir,y - dia_size + dir,0, a2)
814            CHECK_MV(x - dir           , y - dia_size + dir);
815        }
816
817        start= FFMAX(0, -x + dia_size + xmin );
818        end  = FFMIN(dia_size, ymax - y + 1);
819        for(dir= start; dir<end; dir++){
820            int d;
821
822//check(x - dia_size + dir, y + dir,0, a3)
823            CHECK_MV(x - dia_size + dir, y + dir           );
824        }
825
826        if(x!=best[0] || y!=best[1])
827            dia_size=0;
828    }
829    return dmin;
830}
831
832static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833                                       int src_index, int ref_index, const int penalty_factor,
834                                       int size, int h, int flags){
835    MotionEstContext * const c= &s->me;
836    if(c->dia_size==-1)
837        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838    else if(c->dia_size<-1)
839        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840    else if(c->dia_size<2)
841        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842    else if(c->dia_size>1024)
843        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844    else if(c->dia_size>768)
845        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
846    else if(c->dia_size>512)
847        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
848    else if(c->dia_size>256)
849        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850    else
851        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
852}
853
854/**
855   @param P a list of candidate mvs to check before starting the
856   iterative search. If one of the candidates is close to the optimal mv, then
857   it takes fewer iterations. And it increases the chance that we find the
858   optimal mv.
859 */
860static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861                             int P[10][2], int src_index, int ref_index, const int16_t (*last_mv)[2],
862                             int ref_mv_scale, int flags, int size, int h)
863{
864    MotionEstContext * const c= &s->me;
865    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
866                               i.e. the difference between the position of the
867                               block currently being encoded and the position of
868                               the block chosen to predict it from. */
869    int d;                   ///< the score (cmp + penalty) of any given mv
870    int dmin;                /**< the best value of d, i.e. the score
871                               corresponding to the mv stored in best[]. */
872    unsigned map_generation;
873    int penalty_factor;
874    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
875    const int ref_mv_xy = s->mb_x + s->mb_y * ref_mv_stride; // add to last_mv before passing FIXME
876    me_cmp_func cmpf, chroma_cmpf;
877
878    LOAD_COMMON
879    LOAD_COMMON2
880
881    if(c->pre_pass){
882        penalty_factor= c->pre_penalty_factor;
883        cmpf           = s->mecc.me_pre_cmp[size];
884        chroma_cmpf    = s->mecc.me_pre_cmp[size + 1];
885    }else{
886        penalty_factor= c->penalty_factor;
887        cmpf           = s->mecc.me_cmp[size];
888        chroma_cmpf    = s->mecc.me_cmp[size + 1];
889    }
890
891    map_generation= update_map_generation(c);
892
893    av_assert2(cmpf);
894    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
895    map[0]= map_generation;
896    score_map[0]= dmin;
897
898    //FIXME precalc first term below?
899    if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
900        s->mpv_flags & FF_MPV_FLAG_MV0)
901        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
902
903    /* first line */
904    if (s->first_slice_line) {
905        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
906        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
907                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908    }else{
909        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
910                    && ( P_LEFT[0]    |P_LEFT[1]
911                        |P_TOP[0]     |P_TOP[1]
912                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
913            *mx_ptr= 0;
914            *my_ptr= 0;
915            c->skip=1;
916            return dmin;
917        }
918        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
919        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
920        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
921        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
922        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
923        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
924                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
925        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
926        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
927        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
928    }
929    if(dmin>h*h*4){
930        if(c->pre_pass){
931            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
932                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
933            if(!s->first_slice_line)
934                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
935                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936        }else{
937            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
938                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
939            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
940                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
941                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
942        }
943    }
944
945    if(c->avctx->last_predictor_count){
946        const int count= c->avctx->last_predictor_count;
947        const int xstart= FFMAX(0, s->mb_x - count);
948        const int ystart= FFMAX(0, s->mb_y - count);
949        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
950        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
951        int mb_y;
952
953        for(mb_y=ystart; mb_y<yend; mb_y++){
954            int mb_x;
955            for(mb_x=xstart; mb_x<xend; mb_x++){
956                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
957                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
958                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959
960                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
961                CHECK_MV(mx,my)
962            }
963        }
964    }
965
966//check(best[0],best[1],0, b0)
967    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968
969//check(best[0],best[1],0, b1)
970    *mx_ptr= best[0];
971    *my_ptr= best[1];
972
973    return dmin;
974}
975
976//this function is dedicated to the brain damaged gcc
977int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
978                          int P[10][2], int src_index, int ref_index,
979                          const int16_t (*last_mv)[2], int ref_mv_scale,
980                          int size, int h)
981{
982    MotionEstContext * const c= &s->me;
983//FIXME convert other functions in the same way if faster
984    if(c->flags==0 && h==16 && size==0){
985        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986//    case FLAG_QPEL:
987//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988    }else{
989        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
990    }
991}
992
993static int epzs_motion_search2(MpegEncContext * s,
994                             int *mx_ptr, int *my_ptr, int P[10][2],
995                             int src_index, int ref_index, const int16_t (*last_mv)[2],
996                             int ref_mv_scale, const int size)
997{
998    MotionEstContext * const c= &s->me;
999    int best[2]={0, 0};
1000    int d, dmin;
1001    unsigned map_generation;
1002    const int penalty_factor= c->penalty_factor;
1003    const int h=8;
1004    const int ref_mv_stride= s->mb_stride;
1005    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006    me_cmp_func cmpf, chroma_cmpf;
1007    LOAD_COMMON
1008    int flags= c->flags;
1009    LOAD_COMMON2
1010
1011    cmpf        = s->mecc.me_cmp[size];
1012    chroma_cmpf = s->mecc.me_cmp[size + 1];
1013
1014    map_generation= update_map_generation(c);
1015
1016    dmin = 1000000;
1017
1018    /* first line */
1019    if (s->first_slice_line) {
1020        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024    }else{
1025        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026        //FIXME try some early stop
1027        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1033    }
1034    if(dmin>64*4){
1035        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1038            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1040    }
1041
1042    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1043
1044    *mx_ptr= best[0];
1045    *my_ptr= best[1];
1046
1047    return dmin;
1048}
1049