1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Motion estimation
3cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci/**
23cabdff1aSopenharmony_ci * @file
24cabdff1aSopenharmony_ci * Motion estimation template.
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "libavutil/qsort.h"
28cabdff1aSopenharmony_ci#include "mpegvideo.h"
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31cabdff1aSopenharmony_ci#define LOAD_COMMON\
32cabdff1aSopenharmony_ci    uint32_t av_unused * const score_map= c->score_map;\
33cabdff1aSopenharmony_ci    const int av_unused xmin= c->xmin;\
34cabdff1aSopenharmony_ci    const int av_unused ymin= c->ymin;\
35cabdff1aSopenharmony_ci    const int av_unused xmax= c->xmax;\
36cabdff1aSopenharmony_ci    const int av_unused ymax= c->ymax;\
37cabdff1aSopenharmony_ci    const uint8_t *mv_penalty = c->current_mv_penalty; \
38cabdff1aSopenharmony_ci    const int pred_x= c->pred_x;\
39cabdff1aSopenharmony_ci    const int pred_y= c->pred_y;\
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci#define CHECK_HALF_MV(dx, dy, x, y)\
42cabdff1aSopenharmony_ci{\
43cabdff1aSopenharmony_ci    const int hx= 2*(x)+(dx);\
44cabdff1aSopenharmony_ci    const int hy= 2*(y)+(dy);\
45cabdff1aSopenharmony_ci    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
46cabdff1aSopenharmony_ci    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
47cabdff1aSopenharmony_ci    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
48cabdff1aSopenharmony_ci}
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_cistatic int hpel_motion_search(MpegEncContext * s,
51cabdff1aSopenharmony_ci                                  int *mx_ptr, int *my_ptr, int dmin,
52cabdff1aSopenharmony_ci                                  int src_index, int ref_index,
53cabdff1aSopenharmony_ci                                  int size, int h)
54cabdff1aSopenharmony_ci{
55cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
56cabdff1aSopenharmony_ci    const int mx = *mx_ptr;
57cabdff1aSopenharmony_ci    const int my = *my_ptr;
58cabdff1aSopenharmony_ci    const int penalty_factor= c->sub_penalty_factor;
59cabdff1aSopenharmony_ci    me_cmp_func cmp_sub, chroma_cmp_sub;
60cabdff1aSopenharmony_ci    int bx=2*mx, by=2*my;
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_ci    LOAD_COMMON
63cabdff1aSopenharmony_ci    int flags= c->sub_flags;
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci //FIXME factorize
66cabdff1aSopenharmony_ci
67cabdff1aSopenharmony_ci    cmp_sub        = s->mecc.me_sub_cmp[size];
68cabdff1aSopenharmony_ci    chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci    if(c->skip){ //FIXME move out of hpel?
71cabdff1aSopenharmony_ci        *mx_ptr = 0;
72cabdff1aSopenharmony_ci        *my_ptr = 0;
73cabdff1aSopenharmony_ci        return dmin;
74cabdff1aSopenharmony_ci    }
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
77cabdff1aSopenharmony_ci        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
78cabdff1aSopenharmony_ci        if(mx || my || size>0)
79cabdff1aSopenharmony_ci            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
80cabdff1aSopenharmony_ci    }
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci    if (mx > xmin && mx < xmax &&
83cabdff1aSopenharmony_ci        my > ymin && my < ymax) {
84cabdff1aSopenharmony_ci        int d= dmin;
85cabdff1aSopenharmony_ci        const int index = my * (1 << ME_MAP_SHIFT) + mx;
86cabdff1aSopenharmony_ci        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
87cabdff1aSopenharmony_ci                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
88cabdff1aSopenharmony_ci        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
89cabdff1aSopenharmony_ci                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
90cabdff1aSopenharmony_ci        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
91cabdff1aSopenharmony_ci                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
92cabdff1aSopenharmony_ci        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
93cabdff1aSopenharmony_ci                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94cabdff1aSopenharmony_ci
95cabdff1aSopenharmony_ci#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
96cabdff1aSopenharmony_ci        unsigned key;
97cabdff1aSopenharmony_ci        unsigned map_generation= c->map_generation;
98cabdff1aSopenharmony_ci        key = (my - 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
99cabdff1aSopenharmony_ci        av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
100cabdff1aSopenharmony_ci        key = (my + 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
101cabdff1aSopenharmony_ci        av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
102cabdff1aSopenharmony_ci        key = (my) * (1 << ME_MAP_MV_BITS) + (mx + 1) + map_generation;
103cabdff1aSopenharmony_ci        av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
104cabdff1aSopenharmony_ci        key = (my) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation;
105cabdff1aSopenharmony_ci        av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
106cabdff1aSopenharmony_ci#endif
107cabdff1aSopenharmony_ci        if(t<=b){
108cabdff1aSopenharmony_ci            CHECK_HALF_MV(0, 1, mx  ,my-1)
109cabdff1aSopenharmony_ci            if(l<=r){
110cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 1, mx-1, my-1)
111cabdff1aSopenharmony_ci                if(t+r<=b+l){
112cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx  , my-1)
113cabdff1aSopenharmony_ci                }else{
114cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx-1, my  )
115cabdff1aSopenharmony_ci                }
116cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 0, mx-1, my  )
117cabdff1aSopenharmony_ci            }else{
118cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 1, mx  , my-1)
119cabdff1aSopenharmony_ci                if(t+l<=b+r){
120cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx-1, my-1)
121cabdff1aSopenharmony_ci                }else{
122cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx  , my  )
123cabdff1aSopenharmony_ci                }
124cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 0, mx  , my  )
125cabdff1aSopenharmony_ci            }
126cabdff1aSopenharmony_ci        }else{
127cabdff1aSopenharmony_ci            if(l<=r){
128cabdff1aSopenharmony_ci                if(t+l<=b+r){
129cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx-1, my-1)
130cabdff1aSopenharmony_ci                }else{
131cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx  , my  )
132cabdff1aSopenharmony_ci                }
133cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 0, mx-1, my)
134cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 1, mx-1, my)
135cabdff1aSopenharmony_ci            }else{
136cabdff1aSopenharmony_ci                if(t+r<=b+l){
137cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx  , my-1)
138cabdff1aSopenharmony_ci                }else{
139cabdff1aSopenharmony_ci                    CHECK_HALF_MV(1, 1, mx-1, my)
140cabdff1aSopenharmony_ci                }
141cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 0, mx  , my)
142cabdff1aSopenharmony_ci                CHECK_HALF_MV(1, 1, mx  , my)
143cabdff1aSopenharmony_ci            }
144cabdff1aSopenharmony_ci            CHECK_HALF_MV(0, 1, mx  , my)
145cabdff1aSopenharmony_ci        }
146cabdff1aSopenharmony_ci        av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
147cabdff1aSopenharmony_ci    }
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci    *mx_ptr = bx;
150cabdff1aSopenharmony_ci    *my_ptr = by;
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_ci    return dmin;
153cabdff1aSopenharmony_ci}
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_cistatic int no_sub_motion_search(MpegEncContext * s,
156cabdff1aSopenharmony_ci          int *mx_ptr, int *my_ptr, int dmin,
157cabdff1aSopenharmony_ci                                  int src_index, int ref_index,
158cabdff1aSopenharmony_ci                                  int size, int h)
159cabdff1aSopenharmony_ci{
160cabdff1aSopenharmony_ci    (*mx_ptr) *= 2;
161cabdff1aSopenharmony_ci    (*my_ptr) *= 2;
162cabdff1aSopenharmony_ci    return dmin;
163cabdff1aSopenharmony_ci}
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_cistatic inline int get_mb_score(MpegEncContext *s, int mx, int my,
166cabdff1aSopenharmony_ci                               int src_index, int ref_index, int size,
167cabdff1aSopenharmony_ci                               int h, int add_rate)
168cabdff1aSopenharmony_ci{
169cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
170cabdff1aSopenharmony_ci    const int penalty_factor= c->mb_penalty_factor;
171cabdff1aSopenharmony_ci    const int flags= c->mb_flags;
172cabdff1aSopenharmony_ci    const int qpel= flags & FLAG_QPEL;
173cabdff1aSopenharmony_ci    const int mask= 1+2*qpel;
174cabdff1aSopenharmony_ci    me_cmp_func cmp_sub, chroma_cmp_sub;
175cabdff1aSopenharmony_ci    int d;
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci    LOAD_COMMON
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci //FIXME factorize
180cabdff1aSopenharmony_ci
181cabdff1aSopenharmony_ci    cmp_sub        = s->mecc.mb_cmp[size];
182cabdff1aSopenharmony_ci    chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
183cabdff1aSopenharmony_ci
184cabdff1aSopenharmony_ci    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
185cabdff1aSopenharmony_ci    //FIXME check cbp before adding penalty for (0,0) vector
186cabdff1aSopenharmony_ci    if(add_rate && (mx || my || size>0))
187cabdff1aSopenharmony_ci        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
188cabdff1aSopenharmony_ci
189cabdff1aSopenharmony_ci    return d;
190cabdff1aSopenharmony_ci}
191cabdff1aSopenharmony_ci
192cabdff1aSopenharmony_ciint ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
193cabdff1aSopenharmony_ci                    int ref_index, int size, int h, int add_rate)
194cabdff1aSopenharmony_ci{
195cabdff1aSopenharmony_ci    return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196cabdff1aSopenharmony_ci}
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci#define CHECK_QUARTER_MV(dx, dy, x, y)\
199cabdff1aSopenharmony_ci{\
200cabdff1aSopenharmony_ci    const int hx= 4*(x)+(dx);\
201cabdff1aSopenharmony_ci    const int hy= 4*(y)+(dy);\
202cabdff1aSopenharmony_ci    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
203cabdff1aSopenharmony_ci    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
204cabdff1aSopenharmony_ci    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205cabdff1aSopenharmony_ci}
206cabdff1aSopenharmony_ci
207cabdff1aSopenharmony_cistatic int qpel_motion_search(MpegEncContext * s,
208cabdff1aSopenharmony_ci                                  int *mx_ptr, int *my_ptr, int dmin,
209cabdff1aSopenharmony_ci                                  int src_index, int ref_index,
210cabdff1aSopenharmony_ci                                  int size, int h)
211cabdff1aSopenharmony_ci{
212cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
213cabdff1aSopenharmony_ci    const int mx = *mx_ptr;
214cabdff1aSopenharmony_ci    const int my = *my_ptr;
215cabdff1aSopenharmony_ci    const int penalty_factor= c->sub_penalty_factor;
216cabdff1aSopenharmony_ci    const unsigned map_generation = c->map_generation;
217cabdff1aSopenharmony_ci    const int subpel_quality= c->avctx->me_subpel_quality;
218cabdff1aSopenharmony_ci    uint32_t *map= c->map;
219cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
220cabdff1aSopenharmony_ci    me_cmp_func cmp_sub, chroma_cmp_sub;
221cabdff1aSopenharmony_ci
222cabdff1aSopenharmony_ci    LOAD_COMMON
223cabdff1aSopenharmony_ci    int flags= c->sub_flags;
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
226cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
227cabdff1aSopenharmony_ci //FIXME factorize
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci    cmp_sub        = s->mecc.me_sub_cmp[size];
230cabdff1aSopenharmony_ci    chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci    if(c->skip){ //FIXME somehow move up (benchmark)
233cabdff1aSopenharmony_ci        *mx_ptr = 0;
234cabdff1aSopenharmony_ci        *my_ptr = 0;
235cabdff1aSopenharmony_ci        return dmin;
236cabdff1aSopenharmony_ci    }
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
239cabdff1aSopenharmony_ci        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
240cabdff1aSopenharmony_ci        if(mx || my || size>0)
241cabdff1aSopenharmony_ci            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242cabdff1aSopenharmony_ci    }
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_ci    if (mx > xmin && mx < xmax &&
245cabdff1aSopenharmony_ci        my > ymin && my < ymax) {
246cabdff1aSopenharmony_ci        int bx=4*mx, by=4*my;
247cabdff1aSopenharmony_ci        int d= dmin;
248cabdff1aSopenharmony_ci        int i, nx, ny;
249cabdff1aSopenharmony_ci        const int index = my * (1 << ME_MAP_SHIFT) + mx;
250cabdff1aSopenharmony_ci        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
251cabdff1aSopenharmony_ci        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
252cabdff1aSopenharmony_ci        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
253cabdff1aSopenharmony_ci        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
254cabdff1aSopenharmony_ci        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
255cabdff1aSopenharmony_ci        int best[8];
256cabdff1aSopenharmony_ci        int best_pos[8][2];
257cabdff1aSopenharmony_ci
258cabdff1aSopenharmony_ci        memset(best, 64, sizeof(int)*8);
259cabdff1aSopenharmony_ci        if(s->me.dia_size>=2){
260cabdff1aSopenharmony_ci            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261cabdff1aSopenharmony_ci            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
262cabdff1aSopenharmony_ci            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263cabdff1aSopenharmony_ci            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci            for(ny= -3; ny <= 3; ny++){
266cabdff1aSopenharmony_ci                for(nx= -3; nx <= 3; nx++){
267cabdff1aSopenharmony_ci                    //FIXME this could overflow (unlikely though)
268cabdff1aSopenharmony_ci                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
269cabdff1aSopenharmony_ci                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
270cabdff1aSopenharmony_ci                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
271cabdff1aSopenharmony_ci                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272cabdff1aSopenharmony_ci                    int i;
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_ci                    if((nx&3)==0 && (ny&3)==0) continue;
275cabdff1aSopenharmony_ci
276cabdff1aSopenharmony_ci                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci//                    if(nx&1) score-=1024*c->penalty_factor;
279cabdff1aSopenharmony_ci//                    if(ny&1) score-=1024*c->penalty_factor;
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_ci                    for(i=0; i<8; i++){
282cabdff1aSopenharmony_ci                        if(score < best[i]){
283cabdff1aSopenharmony_ci                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
284cabdff1aSopenharmony_ci                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
285cabdff1aSopenharmony_ci                            best[i]= score;
286cabdff1aSopenharmony_ci                            best_pos[i][0]= nx + 4*mx;
287cabdff1aSopenharmony_ci                            best_pos[i][1]= ny + 4*my;
288cabdff1aSopenharmony_ci                            break;
289cabdff1aSopenharmony_ci                        }
290cabdff1aSopenharmony_ci                    }
291cabdff1aSopenharmony_ci                }
292cabdff1aSopenharmony_ci            }
293cabdff1aSopenharmony_ci        }else{
294cabdff1aSopenharmony_ci            int tl;
295cabdff1aSopenharmony_ci            //FIXME this could overflow (unlikely though)
296cabdff1aSopenharmony_ci            const int cx = 4*(r - l);
297cabdff1aSopenharmony_ci            const int cx2= r + l - 2*c;
298cabdff1aSopenharmony_ci            const int cy = 4*(b - t);
299cabdff1aSopenharmony_ci            const int cy2= b + t - 2*c;
300cabdff1aSopenharmony_ci            int cxy;
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci            if (map[(index - (1 << ME_MAP_SHIFT) - 1) & (ME_MAP_SIZE - 1)] ==
303cabdff1aSopenharmony_ci                (my - 1) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation) {
304cabdff1aSopenharmony_ci                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
305cabdff1aSopenharmony_ci            }else{
306cabdff1aSopenharmony_ci                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
307cabdff1aSopenharmony_ci            }
308cabdff1aSopenharmony_ci
309cabdff1aSopenharmony_ci            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
310cabdff1aSopenharmony_ci
311cabdff1aSopenharmony_ci            av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
312cabdff1aSopenharmony_ci            av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
313cabdff1aSopenharmony_ci            av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
314cabdff1aSopenharmony_ci            av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
315cabdff1aSopenharmony_ci            av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
316cabdff1aSopenharmony_ci
317cabdff1aSopenharmony_ci            for(ny= -3; ny <= 3; ny++){
318cabdff1aSopenharmony_ci                for(nx= -3; nx <= 3; nx++){
319cabdff1aSopenharmony_ci                    //FIXME this could overflow (unlikely though)
320cabdff1aSopenharmony_ci                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
321cabdff1aSopenharmony_ci                    int i;
322cabdff1aSopenharmony_ci
323cabdff1aSopenharmony_ci                    if((nx&3)==0 && (ny&3)==0) continue;
324cabdff1aSopenharmony_ci
325cabdff1aSopenharmony_ci                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
326cabdff1aSopenharmony_ci//                    if(nx&1) score-=32*c->penalty_factor;
327cabdff1aSopenharmony_ci  //                  if(ny&1) score-=32*c->penalty_factor;
328cabdff1aSopenharmony_ci
329cabdff1aSopenharmony_ci                    for(i=0; i<8; i++){
330cabdff1aSopenharmony_ci                        if(score < best[i]){
331cabdff1aSopenharmony_ci                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
332cabdff1aSopenharmony_ci                            memmove(best_pos[i + 1], best_pos[i], sizeof(best_pos[0]) * (7 - i));
333cabdff1aSopenharmony_ci                            best[i]= score;
334cabdff1aSopenharmony_ci                            best_pos[i][0]= nx + 4*mx;
335cabdff1aSopenharmony_ci                            best_pos[i][1]= ny + 4*my;
336cabdff1aSopenharmony_ci                            break;
337cabdff1aSopenharmony_ci                        }
338cabdff1aSopenharmony_ci                    }
339cabdff1aSopenharmony_ci                }
340cabdff1aSopenharmony_ci            }
341cabdff1aSopenharmony_ci        }
342cabdff1aSopenharmony_ci        for(i=0; i<subpel_quality; i++){
343cabdff1aSopenharmony_ci            nx= best_pos[i][0];
344cabdff1aSopenharmony_ci            ny= best_pos[i][1];
345cabdff1aSopenharmony_ci            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
346cabdff1aSopenharmony_ci        }
347cabdff1aSopenharmony_ci
348cabdff1aSopenharmony_ci        av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
349cabdff1aSopenharmony_ci
350cabdff1aSopenharmony_ci        *mx_ptr = bx;
351cabdff1aSopenharmony_ci        *my_ptr = by;
352cabdff1aSopenharmony_ci    }else{
353cabdff1aSopenharmony_ci        *mx_ptr =4*mx;
354cabdff1aSopenharmony_ci        *my_ptr =4*my;
355cabdff1aSopenharmony_ci    }
356cabdff1aSopenharmony_ci
357cabdff1aSopenharmony_ci    return dmin;
358cabdff1aSopenharmony_ci}
359cabdff1aSopenharmony_ci
360cabdff1aSopenharmony_ci
361cabdff1aSopenharmony_ci#define CHECK_MV(x,y)\
362cabdff1aSopenharmony_ci{\
363cabdff1aSopenharmony_ci    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
364cabdff1aSopenharmony_ci    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
365cabdff1aSopenharmony_ci    av_assert2((x) >= xmin);\
366cabdff1aSopenharmony_ci    av_assert2((x) <= xmax);\
367cabdff1aSopenharmony_ci    av_assert2((y) >= ymin);\
368cabdff1aSopenharmony_ci    av_assert2((y) <= ymax);\
369cabdff1aSopenharmony_ci    if(map[index]!=key){\
370cabdff1aSopenharmony_ci        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
371cabdff1aSopenharmony_ci        map[index]= key;\
372cabdff1aSopenharmony_ci        score_map[index]= d;\
373cabdff1aSopenharmony_ci        d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\
374cabdff1aSopenharmony_ci        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375cabdff1aSopenharmony_ci    }\
376cabdff1aSopenharmony_ci}
377cabdff1aSopenharmony_ci
378cabdff1aSopenharmony_ci#define CHECK_CLIPPED_MV(ax,ay)\
379cabdff1aSopenharmony_ci{\
380cabdff1aSopenharmony_ci    const int Lx= ax;\
381cabdff1aSopenharmony_ci    const int Ly= ay;\
382cabdff1aSopenharmony_ci    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
383cabdff1aSopenharmony_ci    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
384cabdff1aSopenharmony_ci    CHECK_MV(Lx2, Ly2)\
385cabdff1aSopenharmony_ci}
386cabdff1aSopenharmony_ci
387cabdff1aSopenharmony_ci#define CHECK_MV_DIR(x,y,new_dir)\
388cabdff1aSopenharmony_ci{\
389cabdff1aSopenharmony_ci    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
390cabdff1aSopenharmony_ci    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
391cabdff1aSopenharmony_ci    if(map[index]!=key){\
392cabdff1aSopenharmony_ci        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
393cabdff1aSopenharmony_ci        map[index]= key;\
394cabdff1aSopenharmony_ci        score_map[index]= d;\
395cabdff1aSopenharmony_ci        d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\
396cabdff1aSopenharmony_ci        if(d<dmin){\
397cabdff1aSopenharmony_ci            best[0]=x;\
398cabdff1aSopenharmony_ci            best[1]=y;\
399cabdff1aSopenharmony_ci            dmin=d;\
400cabdff1aSopenharmony_ci            next_dir= new_dir;\
401cabdff1aSopenharmony_ci        }\
402cabdff1aSopenharmony_ci    }\
403cabdff1aSopenharmony_ci}
404cabdff1aSopenharmony_ci
405cabdff1aSopenharmony_ci#define check(x,y,S,v)\
406cabdff1aSopenharmony_ciif( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
407cabdff1aSopenharmony_ciif( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
408cabdff1aSopenharmony_ciif( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
409cabdff1aSopenharmony_ciif( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
410cabdff1aSopenharmony_ci
411cabdff1aSopenharmony_ci#define LOAD_COMMON2\
412cabdff1aSopenharmony_ci    uint32_t *map= c->map;\
413cabdff1aSopenharmony_ci    const int qpel= flags&FLAG_QPEL;\
414cabdff1aSopenharmony_ci    const int shift= 1+qpel;\
415cabdff1aSopenharmony_ci
416cabdff1aSopenharmony_cistatic av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
417cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
418cabdff1aSopenharmony_ci                                       int size, int h, int flags)
419cabdff1aSopenharmony_ci{
420cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
421cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
422cabdff1aSopenharmony_ci    int next_dir=-1;
423cabdff1aSopenharmony_ci    LOAD_COMMON
424cabdff1aSopenharmony_ci    LOAD_COMMON2
425cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
426cabdff1aSopenharmony_ci
427cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
428cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
429cabdff1aSopenharmony_ci
430cabdff1aSopenharmony_ci    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431cabdff1aSopenharmony_ci        const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432cabdff1aSopenharmony_ci        const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
433cabdff1aSopenharmony_ci        if (map[index] != key) { // this will be executed only very rarely
434cabdff1aSopenharmony_ci            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
435cabdff1aSopenharmony_ci            map[index]= key;
436cabdff1aSopenharmony_ci        }
437cabdff1aSopenharmony_ci    }
438cabdff1aSopenharmony_ci
439cabdff1aSopenharmony_ci    for(;;){
440cabdff1aSopenharmony_ci        int d;
441cabdff1aSopenharmony_ci        const int dir= next_dir;
442cabdff1aSopenharmony_ci        const int x= best[0];
443cabdff1aSopenharmony_ci        const int y= best[1];
444cabdff1aSopenharmony_ci        next_dir=-1;
445cabdff1aSopenharmony_ci
446cabdff1aSopenharmony_ci        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
447cabdff1aSopenharmony_ci        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
448cabdff1aSopenharmony_ci        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
449cabdff1aSopenharmony_ci        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
450cabdff1aSopenharmony_ci
451cabdff1aSopenharmony_ci        if(next_dir==-1){
452cabdff1aSopenharmony_ci            return dmin;
453cabdff1aSopenharmony_ci        }
454cabdff1aSopenharmony_ci    }
455cabdff1aSopenharmony_ci}
456cabdff1aSopenharmony_ci
457cabdff1aSopenharmony_cistatic int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
458cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
459cabdff1aSopenharmony_ci                                       int size, int h, int flags)
460cabdff1aSopenharmony_ci{
461cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
462cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
463cabdff1aSopenharmony_ci    int dia_size;
464cabdff1aSopenharmony_ci    LOAD_COMMON
465cabdff1aSopenharmony_ci    LOAD_COMMON2
466cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
467cabdff1aSopenharmony_ci
468cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
469cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
470cabdff1aSopenharmony_ci
471cabdff1aSopenharmony_ci    for(dia_size=1; dia_size<=4; dia_size++){
472cabdff1aSopenharmony_ci        int dir;
473cabdff1aSopenharmony_ci        const int x= best[0];
474cabdff1aSopenharmony_ci        const int y= best[1];
475cabdff1aSopenharmony_ci
476cabdff1aSopenharmony_ci        if(dia_size&(dia_size-1)) continue;
477cabdff1aSopenharmony_ci
478cabdff1aSopenharmony_ci        if(   x + dia_size > xmax
479cabdff1aSopenharmony_ci           || x - dia_size < xmin
480cabdff1aSopenharmony_ci           || y + dia_size > ymax
481cabdff1aSopenharmony_ci           || y - dia_size < ymin)
482cabdff1aSopenharmony_ci           continue;
483cabdff1aSopenharmony_ci
484cabdff1aSopenharmony_ci        for(dir= 0; dir<dia_size; dir+=2){
485cabdff1aSopenharmony_ci            int d;
486cabdff1aSopenharmony_ci
487cabdff1aSopenharmony_ci            CHECK_MV(x + dir           , y + dia_size - dir);
488cabdff1aSopenharmony_ci            CHECK_MV(x + dia_size - dir, y - dir           );
489cabdff1aSopenharmony_ci            CHECK_MV(x - dir           , y - dia_size + dir);
490cabdff1aSopenharmony_ci            CHECK_MV(x - dia_size + dir, y + dir           );
491cabdff1aSopenharmony_ci        }
492cabdff1aSopenharmony_ci
493cabdff1aSopenharmony_ci        if(x!=best[0] || y!=best[1])
494cabdff1aSopenharmony_ci            dia_size=0;
495cabdff1aSopenharmony_ci    }
496cabdff1aSopenharmony_ci    return dmin;
497cabdff1aSopenharmony_ci}
498cabdff1aSopenharmony_ci
499cabdff1aSopenharmony_cistatic int hex_search(MpegEncContext * s, int *best, int dmin,
500cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
501cabdff1aSopenharmony_ci                                       int size, int h, int flags, int dia_size)
502cabdff1aSopenharmony_ci{
503cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
504cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
505cabdff1aSopenharmony_ci    LOAD_COMMON
506cabdff1aSopenharmony_ci    LOAD_COMMON2
507cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
508cabdff1aSopenharmony_ci    int x,y,d;
509cabdff1aSopenharmony_ci    const int dec= dia_size & (dia_size-1);
510cabdff1aSopenharmony_ci
511cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
512cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
513cabdff1aSopenharmony_ci
514cabdff1aSopenharmony_ci    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
515cabdff1aSopenharmony_ci        do{
516cabdff1aSopenharmony_ci            x= best[0];
517cabdff1aSopenharmony_ci            y= best[1];
518cabdff1aSopenharmony_ci
519cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV(x  -dia_size    , y);
520cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV(x+  dia_size    , y);
521cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
522cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
523cabdff1aSopenharmony_ci            if(dia_size>1){
524cabdff1aSopenharmony_ci                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
525cabdff1aSopenharmony_ci                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
526cabdff1aSopenharmony_ci            }
527cabdff1aSopenharmony_ci        }while(best[0] != x || best[1] != y);
528cabdff1aSopenharmony_ci    }
529cabdff1aSopenharmony_ci
530cabdff1aSopenharmony_ci    return dmin;
531cabdff1aSopenharmony_ci}
532cabdff1aSopenharmony_ci
533cabdff1aSopenharmony_cistatic int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
534cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
535cabdff1aSopenharmony_ci                                       int size, int h, int flags)
536cabdff1aSopenharmony_ci{
537cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
538cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
539cabdff1aSopenharmony_ci    LOAD_COMMON
540cabdff1aSopenharmony_ci    LOAD_COMMON2
541cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
542cabdff1aSopenharmony_ci    int x,y,i,d;
543cabdff1aSopenharmony_ci    int dia_size= c->dia_size&0xFF;
544cabdff1aSopenharmony_ci    const int dec= dia_size & (dia_size-1);
545cabdff1aSopenharmony_ci    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
546cabdff1aSopenharmony_ci                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
547cabdff1aSopenharmony_ci
548cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
549cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
550cabdff1aSopenharmony_ci
551cabdff1aSopenharmony_ci    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
552cabdff1aSopenharmony_ci        do{
553cabdff1aSopenharmony_ci            x= best[0];
554cabdff1aSopenharmony_ci            y= best[1];
555cabdff1aSopenharmony_ci            for(i=0; i<8; i++){
556cabdff1aSopenharmony_ci                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
557cabdff1aSopenharmony_ci            }
558cabdff1aSopenharmony_ci        }while(best[0] != x || best[1] != y);
559cabdff1aSopenharmony_ci    }
560cabdff1aSopenharmony_ci
561cabdff1aSopenharmony_ci    x= best[0];
562cabdff1aSopenharmony_ci    y= best[1];
563cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x+1, y);
564cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x, y+1);
565cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x-1, y);
566cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x, y-1);
567cabdff1aSopenharmony_ci
568cabdff1aSopenharmony_ci    return dmin;
569cabdff1aSopenharmony_ci}
570cabdff1aSopenharmony_ci
571cabdff1aSopenharmony_cistatic int umh_search(MpegEncContext * s, int *best, int dmin,
572cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
573cabdff1aSopenharmony_ci                                       int size, int h, int flags)
574cabdff1aSopenharmony_ci{
575cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
576cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
577cabdff1aSopenharmony_ci    LOAD_COMMON
578cabdff1aSopenharmony_ci    LOAD_COMMON2
579cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
580cabdff1aSopenharmony_ci    int x,y,x2,y2, i, j, d;
581cabdff1aSopenharmony_ci    const int dia_size= c->dia_size&0xFE;
582cabdff1aSopenharmony_ci    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
583cabdff1aSopenharmony_ci                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
584cabdff1aSopenharmony_ci                                 {-2, 3}, { 0, 4}, { 2, 3},
585cabdff1aSopenharmony_ci                                 {-2,-3}, { 0,-4}, { 2,-3},};
586cabdff1aSopenharmony_ci
587cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
588cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
589cabdff1aSopenharmony_ci
590cabdff1aSopenharmony_ci    x= best[0];
591cabdff1aSopenharmony_ci    y= best[1];
592cabdff1aSopenharmony_ci    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
593cabdff1aSopenharmony_ci        CHECK_MV(x2, y);
594cabdff1aSopenharmony_ci    }
595cabdff1aSopenharmony_ci    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
596cabdff1aSopenharmony_ci        CHECK_MV(x, y2);
597cabdff1aSopenharmony_ci    }
598cabdff1aSopenharmony_ci
599cabdff1aSopenharmony_ci    x= best[0];
600cabdff1aSopenharmony_ci    y= best[1];
601cabdff1aSopenharmony_ci    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
602cabdff1aSopenharmony_ci        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
603cabdff1aSopenharmony_ci            CHECK_MV(x2, y2);
604cabdff1aSopenharmony_ci        }
605cabdff1aSopenharmony_ci    }
606cabdff1aSopenharmony_ci
607cabdff1aSopenharmony_ci//FIXME prevent the CLIP stuff
608cabdff1aSopenharmony_ci
609cabdff1aSopenharmony_ci    for(j=1; j<=dia_size/4; j++){
610cabdff1aSopenharmony_ci        for(i=0; i<16; i++){
611cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
612cabdff1aSopenharmony_ci        }
613cabdff1aSopenharmony_ci    }
614cabdff1aSopenharmony_ci
615cabdff1aSopenharmony_ci    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
616cabdff1aSopenharmony_ci}
617cabdff1aSopenharmony_ci
618cabdff1aSopenharmony_cistatic int full_search(MpegEncContext * s, int *best, int dmin,
619cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
620cabdff1aSopenharmony_ci                                       int size, int h, int flags)
621cabdff1aSopenharmony_ci{
622cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
623cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
624cabdff1aSopenharmony_ci    LOAD_COMMON
625cabdff1aSopenharmony_ci    LOAD_COMMON2
626cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
627cabdff1aSopenharmony_ci    int x,y, d;
628cabdff1aSopenharmony_ci    const int dia_size= c->dia_size&0xFF;
629cabdff1aSopenharmony_ci
630cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
631cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_ci    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
634cabdff1aSopenharmony_ci        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
635cabdff1aSopenharmony_ci            CHECK_MV(x, y);
636cabdff1aSopenharmony_ci        }
637cabdff1aSopenharmony_ci    }
638cabdff1aSopenharmony_ci
639cabdff1aSopenharmony_ci    x= best[0];
640cabdff1aSopenharmony_ci    y= best[1];
641cabdff1aSopenharmony_ci    d= dmin;
642cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x  , y);
643cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x+1, y);
644cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x, y+1);
645cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x-1, y);
646cabdff1aSopenharmony_ci    CHECK_CLIPPED_MV(x, y-1);
647cabdff1aSopenharmony_ci    best[0]= x;
648cabdff1aSopenharmony_ci    best[1]= y;
649cabdff1aSopenharmony_ci
650cabdff1aSopenharmony_ci    return d;
651cabdff1aSopenharmony_ci}
652cabdff1aSopenharmony_ci
653cabdff1aSopenharmony_ci#define SAB_CHECK_MV(ax,ay)\
654cabdff1aSopenharmony_ci{\
655cabdff1aSopenharmony_ci    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
656cabdff1aSopenharmony_ci    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
657cabdff1aSopenharmony_ci    if(map[index]!=key){\
658cabdff1aSopenharmony_ci        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
659cabdff1aSopenharmony_ci        map[index]= key;\
660cabdff1aSopenharmony_ci        score_map[index]= d;\
661cabdff1aSopenharmony_ci        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
662cabdff1aSopenharmony_ci        if(d < minima[minima_count-1].height){\
663cabdff1aSopenharmony_ci            int j=0;\
664cabdff1aSopenharmony_ci            \
665cabdff1aSopenharmony_ci            while(d >= minima[j].height) j++;\
666cabdff1aSopenharmony_ci\
667cabdff1aSopenharmony_ci            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
668cabdff1aSopenharmony_ci\
669cabdff1aSopenharmony_ci            minima[j].checked= 0;\
670cabdff1aSopenharmony_ci            minima[j].height= d;\
671cabdff1aSopenharmony_ci            minima[j].x= ax;\
672cabdff1aSopenharmony_ci            minima[j].y= ay;\
673cabdff1aSopenharmony_ci            \
674cabdff1aSopenharmony_ci            i=-1;\
675cabdff1aSopenharmony_ci            continue;\
676cabdff1aSopenharmony_ci        }\
677cabdff1aSopenharmony_ci    }\
678cabdff1aSopenharmony_ci}
679cabdff1aSopenharmony_ci
680cabdff1aSopenharmony_ci#define MAX_SAB_SIZE ME_MAP_SIZE
681cabdff1aSopenharmony_cistatic int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
682cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
683cabdff1aSopenharmony_ci                                       int size, int h, int flags)
684cabdff1aSopenharmony_ci{
685cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
686cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
687cabdff1aSopenharmony_ci    Minima minima[MAX_SAB_SIZE];
688cabdff1aSopenharmony_ci    const int minima_count= FFABS(c->dia_size);
689cabdff1aSopenharmony_ci    int i, j;
690cabdff1aSopenharmony_ci    LOAD_COMMON
691cabdff1aSopenharmony_ci    LOAD_COMMON2
692cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
693cabdff1aSopenharmony_ci
694cabdff1aSopenharmony_ci    av_assert1(minima_count <= MAX_SAB_SIZE);
695cabdff1aSopenharmony_ci
696cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
697cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
698cabdff1aSopenharmony_ci
699cabdff1aSopenharmony_ci    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
700cabdff1aSopenharmony_ci      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
701cabdff1aSopenharmony_ci     */
702cabdff1aSopenharmony_ci    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
703cabdff1aSopenharmony_ci        uint32_t key= map[i];
704cabdff1aSopenharmony_ci
705cabdff1aSopenharmony_ci        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
706cabdff1aSopenharmony_ci
707cabdff1aSopenharmony_ci        if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation)
708cabdff1aSopenharmony_ci            continue;
709cabdff1aSopenharmony_ci
710cabdff1aSopenharmony_ci        minima[j].height= score_map[i];
711cabdff1aSopenharmony_ci        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
712cabdff1aSopenharmony_ci        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
713cabdff1aSopenharmony_ci        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
714cabdff1aSopenharmony_ci        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
715cabdff1aSopenharmony_ci
716cabdff1aSopenharmony_ci        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
717cabdff1aSopenharmony_ci        if(   minima[j].x > xmax || minima[j].x < xmin
718cabdff1aSopenharmony_ci           || minima[j].y > ymax || minima[j].y < ymin)
719cabdff1aSopenharmony_ci            continue;
720cabdff1aSopenharmony_ci
721cabdff1aSopenharmony_ci        minima[j].checked=0;
722cabdff1aSopenharmony_ci        if(minima[j].x || minima[j].y)
723cabdff1aSopenharmony_ci            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
724cabdff1aSopenharmony_ci
725cabdff1aSopenharmony_ci        j++;
726cabdff1aSopenharmony_ci    }
727cabdff1aSopenharmony_ci
728cabdff1aSopenharmony_ci    AV_QSORT(minima, j, Minima, minima_cmp);
729cabdff1aSopenharmony_ci
730cabdff1aSopenharmony_ci    for(; j<minima_count; j++){
731cabdff1aSopenharmony_ci        minima[j].height=256*256*256*64;
732cabdff1aSopenharmony_ci        minima[j].checked=0;
733cabdff1aSopenharmony_ci        minima[j].x= minima[j].y=0;
734cabdff1aSopenharmony_ci    }
735cabdff1aSopenharmony_ci
736cabdff1aSopenharmony_ci    for(i=0; i<minima_count; i++){
737cabdff1aSopenharmony_ci        const int x= minima[i].x;
738cabdff1aSopenharmony_ci        const int y= minima[i].y;
739cabdff1aSopenharmony_ci        int d;
740cabdff1aSopenharmony_ci
741cabdff1aSopenharmony_ci        if(minima[i].checked) continue;
742cabdff1aSopenharmony_ci
743cabdff1aSopenharmony_ci        if(   x >= xmax || x <= xmin
744cabdff1aSopenharmony_ci           || y >= ymax || y <= ymin)
745cabdff1aSopenharmony_ci           continue;
746cabdff1aSopenharmony_ci
747cabdff1aSopenharmony_ci        SAB_CHECK_MV(x-1, y)
748cabdff1aSopenharmony_ci        SAB_CHECK_MV(x+1, y)
749cabdff1aSopenharmony_ci        SAB_CHECK_MV(x  , y-1)
750cabdff1aSopenharmony_ci        SAB_CHECK_MV(x  , y+1)
751cabdff1aSopenharmony_ci
752cabdff1aSopenharmony_ci        minima[i].checked= 1;
753cabdff1aSopenharmony_ci    }
754cabdff1aSopenharmony_ci
755cabdff1aSopenharmony_ci    best[0]= minima[0].x;
756cabdff1aSopenharmony_ci    best[1]= minima[0].y;
757cabdff1aSopenharmony_ci    dmin= minima[0].height;
758cabdff1aSopenharmony_ci
759cabdff1aSopenharmony_ci    if(   best[0] < xmax && best[0] > xmin
760cabdff1aSopenharmony_ci       && best[1] < ymax && best[1] > ymin){
761cabdff1aSopenharmony_ci        int d;
762cabdff1aSopenharmony_ci        // ensure that the reference samples for hpel refinement are in the map
763cabdff1aSopenharmony_ci        CHECK_MV(best[0]-1, best[1])
764cabdff1aSopenharmony_ci        CHECK_MV(best[0]+1, best[1])
765cabdff1aSopenharmony_ci        CHECK_MV(best[0], best[1]-1)
766cabdff1aSopenharmony_ci        CHECK_MV(best[0], best[1]+1)
767cabdff1aSopenharmony_ci    }
768cabdff1aSopenharmony_ci    return dmin;
769cabdff1aSopenharmony_ci}
770cabdff1aSopenharmony_ci
771cabdff1aSopenharmony_cistatic int var_diamond_search(MpegEncContext * s, int *best, int dmin,
772cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
773cabdff1aSopenharmony_ci                                       int size, int h, int flags)
774cabdff1aSopenharmony_ci{
775cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
776cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
777cabdff1aSopenharmony_ci    int dia_size;
778cabdff1aSopenharmony_ci    LOAD_COMMON
779cabdff1aSopenharmony_ci    LOAD_COMMON2
780cabdff1aSopenharmony_ci    unsigned map_generation = c->map_generation;
781cabdff1aSopenharmony_ci
782cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
783cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
784cabdff1aSopenharmony_ci
785cabdff1aSopenharmony_ci    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
786cabdff1aSopenharmony_ci        int dir, start, end;
787cabdff1aSopenharmony_ci        const int x= best[0];
788cabdff1aSopenharmony_ci        const int y= best[1];
789cabdff1aSopenharmony_ci
790cabdff1aSopenharmony_ci        start= FFMAX(0, y + dia_size - ymax);
791cabdff1aSopenharmony_ci        end  = FFMIN(dia_size, xmax - x + 1);
792cabdff1aSopenharmony_ci        for(dir= start; dir<end; dir++){
793cabdff1aSopenharmony_ci            int d;
794cabdff1aSopenharmony_ci
795cabdff1aSopenharmony_ci//check(x + dir,y + dia_size - dir,0, a0)
796cabdff1aSopenharmony_ci            CHECK_MV(x + dir           , y + dia_size - dir);
797cabdff1aSopenharmony_ci        }
798cabdff1aSopenharmony_ci
799cabdff1aSopenharmony_ci        start= FFMAX(0, x + dia_size - xmax);
800cabdff1aSopenharmony_ci        end  = FFMIN(dia_size, y - ymin + 1);
801cabdff1aSopenharmony_ci        for(dir= start; dir<end; dir++){
802cabdff1aSopenharmony_ci            int d;
803cabdff1aSopenharmony_ci
804cabdff1aSopenharmony_ci//check(x + dia_size - dir, y - dir,0, a1)
805cabdff1aSopenharmony_ci            CHECK_MV(x + dia_size - dir, y - dir           );
806cabdff1aSopenharmony_ci        }
807cabdff1aSopenharmony_ci
808cabdff1aSopenharmony_ci        start= FFMAX(0, -y + dia_size + ymin );
809cabdff1aSopenharmony_ci        end  = FFMIN(dia_size, x - xmin + 1);
810cabdff1aSopenharmony_ci        for(dir= start; dir<end; dir++){
811cabdff1aSopenharmony_ci            int d;
812cabdff1aSopenharmony_ci
813cabdff1aSopenharmony_ci//check(x - dir,y - dia_size + dir,0, a2)
814cabdff1aSopenharmony_ci            CHECK_MV(x - dir           , y - dia_size + dir);
815cabdff1aSopenharmony_ci        }
816cabdff1aSopenharmony_ci
817cabdff1aSopenharmony_ci        start= FFMAX(0, -x + dia_size + xmin );
818cabdff1aSopenharmony_ci        end  = FFMIN(dia_size, ymax - y + 1);
819cabdff1aSopenharmony_ci        for(dir= start; dir<end; dir++){
820cabdff1aSopenharmony_ci            int d;
821cabdff1aSopenharmony_ci
822cabdff1aSopenharmony_ci//check(x - dia_size + dir, y + dir,0, a3)
823cabdff1aSopenharmony_ci            CHECK_MV(x - dia_size + dir, y + dir           );
824cabdff1aSopenharmony_ci        }
825cabdff1aSopenharmony_ci
826cabdff1aSopenharmony_ci        if(x!=best[0] || y!=best[1])
827cabdff1aSopenharmony_ci            dia_size=0;
828cabdff1aSopenharmony_ci    }
829cabdff1aSopenharmony_ci    return dmin;
830cabdff1aSopenharmony_ci}
831cabdff1aSopenharmony_ci
832cabdff1aSopenharmony_cistatic av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833cabdff1aSopenharmony_ci                                       int src_index, int ref_index, const int penalty_factor,
834cabdff1aSopenharmony_ci                                       int size, int h, int flags){
835cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
836cabdff1aSopenharmony_ci    if(c->dia_size==-1)
837cabdff1aSopenharmony_ci        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838cabdff1aSopenharmony_ci    else if(c->dia_size<-1)
839cabdff1aSopenharmony_ci        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840cabdff1aSopenharmony_ci    else if(c->dia_size<2)
841cabdff1aSopenharmony_ci        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842cabdff1aSopenharmony_ci    else if(c->dia_size>1024)
843cabdff1aSopenharmony_ci        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844cabdff1aSopenharmony_ci    else if(c->dia_size>768)
845cabdff1aSopenharmony_ci        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
846cabdff1aSopenharmony_ci    else if(c->dia_size>512)
847cabdff1aSopenharmony_ci        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
848cabdff1aSopenharmony_ci    else if(c->dia_size>256)
849cabdff1aSopenharmony_ci        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850cabdff1aSopenharmony_ci    else
851cabdff1aSopenharmony_ci        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
852cabdff1aSopenharmony_ci}
853cabdff1aSopenharmony_ci
854cabdff1aSopenharmony_ci/**
855cabdff1aSopenharmony_ci   @param P a list of candidate mvs to check before starting the
856cabdff1aSopenharmony_ci   iterative search. If one of the candidates is close to the optimal mv, then
857cabdff1aSopenharmony_ci   it takes fewer iterations. And it increases the chance that we find the
858cabdff1aSopenharmony_ci   optimal mv.
859cabdff1aSopenharmony_ci */
860cabdff1aSopenharmony_cistatic av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861cabdff1aSopenharmony_ci                             int P[10][2], int src_index, int ref_index, const int16_t (*last_mv)[2],
862cabdff1aSopenharmony_ci                             int ref_mv_scale, int flags, int size, int h)
863cabdff1aSopenharmony_ci{
864cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
865cabdff1aSopenharmony_ci    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
866cabdff1aSopenharmony_ci                               i.e. the difference between the position of the
867cabdff1aSopenharmony_ci                               block currently being encoded and the position of
868cabdff1aSopenharmony_ci                               the block chosen to predict it from. */
869cabdff1aSopenharmony_ci    int d;                   ///< the score (cmp + penalty) of any given mv
870cabdff1aSopenharmony_ci    int dmin;                /**< the best value of d, i.e. the score
871cabdff1aSopenharmony_ci                               corresponding to the mv stored in best[]. */
872cabdff1aSopenharmony_ci    unsigned map_generation;
873cabdff1aSopenharmony_ci    int penalty_factor;
874cabdff1aSopenharmony_ci    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
875cabdff1aSopenharmony_ci    const int ref_mv_xy = s->mb_x + s->mb_y * ref_mv_stride; // add to last_mv before passing FIXME
876cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
877cabdff1aSopenharmony_ci
878cabdff1aSopenharmony_ci    LOAD_COMMON
879cabdff1aSopenharmony_ci    LOAD_COMMON2
880cabdff1aSopenharmony_ci
881cabdff1aSopenharmony_ci    if(c->pre_pass){
882cabdff1aSopenharmony_ci        penalty_factor= c->pre_penalty_factor;
883cabdff1aSopenharmony_ci        cmpf           = s->mecc.me_pre_cmp[size];
884cabdff1aSopenharmony_ci        chroma_cmpf    = s->mecc.me_pre_cmp[size + 1];
885cabdff1aSopenharmony_ci    }else{
886cabdff1aSopenharmony_ci        penalty_factor= c->penalty_factor;
887cabdff1aSopenharmony_ci        cmpf           = s->mecc.me_cmp[size];
888cabdff1aSopenharmony_ci        chroma_cmpf    = s->mecc.me_cmp[size + 1];
889cabdff1aSopenharmony_ci    }
890cabdff1aSopenharmony_ci
891cabdff1aSopenharmony_ci    map_generation= update_map_generation(c);
892cabdff1aSopenharmony_ci
893cabdff1aSopenharmony_ci    av_assert2(cmpf);
894cabdff1aSopenharmony_ci    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
895cabdff1aSopenharmony_ci    map[0]= map_generation;
896cabdff1aSopenharmony_ci    score_map[0]= dmin;
897cabdff1aSopenharmony_ci
898cabdff1aSopenharmony_ci    //FIXME precalc first term below?
899cabdff1aSopenharmony_ci    if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
900cabdff1aSopenharmony_ci        s->mpv_flags & FF_MPV_FLAG_MV0)
901cabdff1aSopenharmony_ci        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
902cabdff1aSopenharmony_ci
903cabdff1aSopenharmony_ci    /* first line */
904cabdff1aSopenharmony_ci    if (s->first_slice_line) {
905cabdff1aSopenharmony_ci        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
906cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
907cabdff1aSopenharmony_ci                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908cabdff1aSopenharmony_ci    }else{
909cabdff1aSopenharmony_ci        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
910cabdff1aSopenharmony_ci                    && ( P_LEFT[0]    |P_LEFT[1]
911cabdff1aSopenharmony_ci                        |P_TOP[0]     |P_TOP[1]
912cabdff1aSopenharmony_ci                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
913cabdff1aSopenharmony_ci            *mx_ptr= 0;
914cabdff1aSopenharmony_ci            *my_ptr= 0;
915cabdff1aSopenharmony_ci            c->skip=1;
916cabdff1aSopenharmony_ci            return dmin;
917cabdff1aSopenharmony_ci        }
918cabdff1aSopenharmony_ci        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
919cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
920cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
921cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
922cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
923cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
924cabdff1aSopenharmony_ci                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
925cabdff1aSopenharmony_ci        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
926cabdff1aSopenharmony_ci        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
927cabdff1aSopenharmony_ci        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
928cabdff1aSopenharmony_ci    }
929cabdff1aSopenharmony_ci    if(dmin>h*h*4){
930cabdff1aSopenharmony_ci        if(c->pre_pass){
931cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
932cabdff1aSopenharmony_ci                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
933cabdff1aSopenharmony_ci            if(!s->first_slice_line)
934cabdff1aSopenharmony_ci                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
935cabdff1aSopenharmony_ci                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936cabdff1aSopenharmony_ci        }else{
937cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
938cabdff1aSopenharmony_ci                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
939cabdff1aSopenharmony_ci            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
940cabdff1aSopenharmony_ci                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
941cabdff1aSopenharmony_ci                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
942cabdff1aSopenharmony_ci        }
943cabdff1aSopenharmony_ci    }
944cabdff1aSopenharmony_ci
945cabdff1aSopenharmony_ci    if(c->avctx->last_predictor_count){
946cabdff1aSopenharmony_ci        const int count= c->avctx->last_predictor_count;
947cabdff1aSopenharmony_ci        const int xstart= FFMAX(0, s->mb_x - count);
948cabdff1aSopenharmony_ci        const int ystart= FFMAX(0, s->mb_y - count);
949cabdff1aSopenharmony_ci        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
950cabdff1aSopenharmony_ci        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
951cabdff1aSopenharmony_ci        int mb_y;
952cabdff1aSopenharmony_ci
953cabdff1aSopenharmony_ci        for(mb_y=ystart; mb_y<yend; mb_y++){
954cabdff1aSopenharmony_ci            int mb_x;
955cabdff1aSopenharmony_ci            for(mb_x=xstart; mb_x<xend; mb_x++){
956cabdff1aSopenharmony_ci                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
957cabdff1aSopenharmony_ci                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
958cabdff1aSopenharmony_ci                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959cabdff1aSopenharmony_ci
960cabdff1aSopenharmony_ci                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
961cabdff1aSopenharmony_ci                CHECK_MV(mx,my)
962cabdff1aSopenharmony_ci            }
963cabdff1aSopenharmony_ci        }
964cabdff1aSopenharmony_ci    }
965cabdff1aSopenharmony_ci
966cabdff1aSopenharmony_ci//check(best[0],best[1],0, b0)
967cabdff1aSopenharmony_ci    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968cabdff1aSopenharmony_ci
969cabdff1aSopenharmony_ci//check(best[0],best[1],0, b1)
970cabdff1aSopenharmony_ci    *mx_ptr= best[0];
971cabdff1aSopenharmony_ci    *my_ptr= best[1];
972cabdff1aSopenharmony_ci
973cabdff1aSopenharmony_ci    return dmin;
974cabdff1aSopenharmony_ci}
975cabdff1aSopenharmony_ci
976cabdff1aSopenharmony_ci//this function is dedicated to the brain damaged gcc
977cabdff1aSopenharmony_ciint ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
978cabdff1aSopenharmony_ci                          int P[10][2], int src_index, int ref_index,
979cabdff1aSopenharmony_ci                          const int16_t (*last_mv)[2], int ref_mv_scale,
980cabdff1aSopenharmony_ci                          int size, int h)
981cabdff1aSopenharmony_ci{
982cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
983cabdff1aSopenharmony_ci//FIXME convert other functions in the same way if faster
984cabdff1aSopenharmony_ci    if(c->flags==0 && h==16 && size==0){
985cabdff1aSopenharmony_ci        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986cabdff1aSopenharmony_ci//    case FLAG_QPEL:
987cabdff1aSopenharmony_ci//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988cabdff1aSopenharmony_ci    }else{
989cabdff1aSopenharmony_ci        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
990cabdff1aSopenharmony_ci    }
991cabdff1aSopenharmony_ci}
992cabdff1aSopenharmony_ci
993cabdff1aSopenharmony_cistatic int epzs_motion_search2(MpegEncContext * s,
994cabdff1aSopenharmony_ci                             int *mx_ptr, int *my_ptr, int P[10][2],
995cabdff1aSopenharmony_ci                             int src_index, int ref_index, const int16_t (*last_mv)[2],
996cabdff1aSopenharmony_ci                             int ref_mv_scale, const int size)
997cabdff1aSopenharmony_ci{
998cabdff1aSopenharmony_ci    MotionEstContext * const c= &s->me;
999cabdff1aSopenharmony_ci    int best[2]={0, 0};
1000cabdff1aSopenharmony_ci    int d, dmin;
1001cabdff1aSopenharmony_ci    unsigned map_generation;
1002cabdff1aSopenharmony_ci    const int penalty_factor= c->penalty_factor;
1003cabdff1aSopenharmony_ci    const int h=8;
1004cabdff1aSopenharmony_ci    const int ref_mv_stride= s->mb_stride;
1005cabdff1aSopenharmony_ci    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006cabdff1aSopenharmony_ci    me_cmp_func cmpf, chroma_cmpf;
1007cabdff1aSopenharmony_ci    LOAD_COMMON
1008cabdff1aSopenharmony_ci    int flags= c->flags;
1009cabdff1aSopenharmony_ci    LOAD_COMMON2
1010cabdff1aSopenharmony_ci
1011cabdff1aSopenharmony_ci    cmpf        = s->mecc.me_cmp[size];
1012cabdff1aSopenharmony_ci    chroma_cmpf = s->mecc.me_cmp[size + 1];
1013cabdff1aSopenharmony_ci
1014cabdff1aSopenharmony_ci    map_generation= update_map_generation(c);
1015cabdff1aSopenharmony_ci
1016cabdff1aSopenharmony_ci    dmin = 1000000;
1017cabdff1aSopenharmony_ci
1018cabdff1aSopenharmony_ci    /* first line */
1019cabdff1aSopenharmony_ci    if (s->first_slice_line) {
1020cabdff1aSopenharmony_ci        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022cabdff1aSopenharmony_ci                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023cabdff1aSopenharmony_ci        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024cabdff1aSopenharmony_ci    }else{
1025cabdff1aSopenharmony_ci        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026cabdff1aSopenharmony_ci        //FIXME try some early stop
1027cabdff1aSopenharmony_ci        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028cabdff1aSopenharmony_ci        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029cabdff1aSopenharmony_ci        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030cabdff1aSopenharmony_ci        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032cabdff1aSopenharmony_ci                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1033cabdff1aSopenharmony_ci    }
1034cabdff1aSopenharmony_ci    if(dmin>64*4){
1035cabdff1aSopenharmony_ci        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036cabdff1aSopenharmony_ci                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037cabdff1aSopenharmony_ci        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1038cabdff1aSopenharmony_ci            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039cabdff1aSopenharmony_ci                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1040cabdff1aSopenharmony_ci    }
1041cabdff1aSopenharmony_ci
1042cabdff1aSopenharmony_ci    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1043cabdff1aSopenharmony_ci
1044cabdff1aSopenharmony_ci    *mx_ptr= best[0];
1045cabdff1aSopenharmony_ci    *my_ptr= best[1];
1046cabdff1aSopenharmony_ci
1047cabdff1aSopenharmony_ci    return dmin;
1048cabdff1aSopenharmony_ci}
1049