1/*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#define ROUNDED_DIV_MVx2(a, b) \
25    (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
26#define ROUNDED_DIV_MVx4(a, b, c, d) \
27    (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
28               .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
29
30static void FN(inter_pred)(VP9TileData *td)
31{
32    static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
33        { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
34        { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
35    };
36    VP9Context *s = td->s;
37    VP9Block *b = td->b;
38    int row = td->row, col = td->col;
39    ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
40    AVFrame *ref1 = tref1->f, *ref2;
41    int w1 = ref1->width, h1 = ref1->height, w2, h2;
42    ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride;
43    int bytesperpixel = BYTES_PER_PIXEL;
44
45    if (b->comp) {
46        tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]];
47        ref2 = tref2->f;
48        w2 = ref2->width;
49        h2 = ref2->height;
50    }
51
52    // y inter pred
53    if (b->bs > BS_8x8) {
54        VP56mv uvmv;
55
56#if SCALED == 0
57        if (b->bs == BS_8x4) {
58            mc_luma_dir(td, mc[3][b->filter][0], td->dst[0], ls_y,
59                        ref1->data[0], ref1->linesize[0], tref1,
60                        row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
61            mc_luma_dir(td, mc[3][b->filter][0],
62                        td->dst[0] + 4 * ls_y, ls_y,
63                        ref1->data[0], ref1->linesize[0], tref1,
64                        (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
65            w1 = (w1 + s->ss_h) >> s->ss_h;
66            if (s->ss_v) {
67                h1 = (h1 + 1) >> 1;
68                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
69                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
70                              td->dst[1], td->dst[2], ls_uv,
71                              ref1->data[1], ref1->linesize[1],
72                              ref1->data[2], ref1->linesize[2], tref1,
73                              row << 2, col << (3 - s->ss_h),
74                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
75            } else {
76                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
77                              td->dst[1], td->dst[2], ls_uv,
78                              ref1->data[1], ref1->linesize[1],
79                              ref1->data[2], ref1->linesize[2], tref1,
80                              row << 3, col << (3 - s->ss_h),
81                              &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
82                // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
83                // to get the motion vector for the bottom 4x4 block
84                // https://code.google.com/p/webm/issues/detail?id=993
85                if (s->ss_h == 0) {
86                    uvmv = b->mv[2][0];
87                } else {
88                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
89                }
90                mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
91                              td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
92                              ref1->data[1], ref1->linesize[1],
93                              ref1->data[2], ref1->linesize[2], tref1,
94                              (row << 3) + 4, col << (3 - s->ss_h),
95                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
96            }
97
98            if (b->comp) {
99                mc_luma_dir(td, mc[3][b->filter][1], td->dst[0], ls_y,
100                            ref2->data[0], ref2->linesize[0], tref2,
101                            row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
102                mc_luma_dir(td, mc[3][b->filter][1],
103                            td->dst[0] + 4 * ls_y, ls_y,
104                            ref2->data[0], ref2->linesize[0], tref2,
105                            (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
106                w2 = (w2 + s->ss_h) >> s->ss_h;
107                if (s->ss_v) {
108                    h2 = (h2 + 1) >> 1;
109                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
110                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
111                                  td->dst[1], td->dst[2], ls_uv,
112                                  ref2->data[1], ref2->linesize[1],
113                                  ref2->data[2], ref2->linesize[2], tref2,
114                                  row << 2, col << (3 - s->ss_h),
115                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
116                } else {
117                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
118                                  td->dst[1], td->dst[2], ls_uv,
119                                  ref2->data[1], ref2->linesize[1],
120                                  ref2->data[2], ref2->linesize[2], tref2,
121                                  row << 3, col << (3 - s->ss_h),
122                                  &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
123                    // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
124                    // to get the motion vector for the bottom 4x4 block
125                    // https://code.google.com/p/webm/issues/detail?id=993
126                    if (s->ss_h == 0) {
127                        uvmv = b->mv[2][1];
128                    } else {
129                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
130                    }
131                    mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
132                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
133                                  ref2->data[1], ref2->linesize[1],
134                                  ref2->data[2], ref2->linesize[2], tref2,
135                                  (row << 3) + 4, col << (3 - s->ss_h),
136                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
137                }
138            }
139        } else if (b->bs == BS_4x8) {
140            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
141                        ref1->data[0], ref1->linesize[0], tref1,
142                        row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
143            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
144                        ref1->data[0], ref1->linesize[0], tref1,
145                        row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
146            h1 = (h1 + s->ss_v) >> s->ss_v;
147            if (s->ss_h) {
148                w1 = (w1 + 1) >> 1;
149                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
150                mc_chroma_dir(td, mc[4][b->filter][0],
151                              td->dst[1], td->dst[2], ls_uv,
152                              ref1->data[1], ref1->linesize[1],
153                              ref1->data[2], ref1->linesize[2], tref1,
154                              row << (3 - s->ss_v), col << 2,
155                              &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
156            } else {
157                mc_chroma_dir(td, mc[4][b->filter][0],
158                              td->dst[1], td->dst[2], ls_uv,
159                              ref1->data[1], ref1->linesize[1],
160                              ref1->data[2], ref1->linesize[2], tref1,
161                              row << (3 - s->ss_v), col << 3,
162                              &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
163                mc_chroma_dir(td, mc[4][b->filter][0],
164                              td->dst[1] + 4 * bytesperpixel,
165                              td->dst[2] + 4 * bytesperpixel, ls_uv,
166                              ref1->data[1], ref1->linesize[1],
167                              ref1->data[2], ref1->linesize[2], tref1,
168                              row << (3 - s->ss_v), (col << 3) + 4,
169                              &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
170            }
171
172            if (b->comp) {
173                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
174                            ref2->data[0], ref2->linesize[0], tref2,
175                            row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
176                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
177                            ref2->data[0], ref2->linesize[0], tref2,
178                            row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
179                h2 = (h2 + s->ss_v) >> s->ss_v;
180                if (s->ss_h) {
181                    w2 = (w2 + 1) >> 1;
182                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
183                    mc_chroma_dir(td, mc[4][b->filter][1],
184                                  td->dst[1], td->dst[2], ls_uv,
185                                  ref2->data[1], ref2->linesize[1],
186                                  ref2->data[2], ref2->linesize[2], tref2,
187                                  row << (3 - s->ss_v), col << 2,
188                                  &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
189                } else {
190                    mc_chroma_dir(td, mc[4][b->filter][1],
191                                  td->dst[1], td->dst[2], ls_uv,
192                                  ref2->data[1], ref2->linesize[1],
193                                  ref2->data[2], ref2->linesize[2], tref2,
194                                  row << (3 - s->ss_v), col << 3,
195                                  &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
196                    mc_chroma_dir(td, mc[4][b->filter][1],
197                                  td->dst[1] + 4 * bytesperpixel,
198                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
199                                  ref2->data[1], ref2->linesize[1],
200                                  ref2->data[2], ref2->linesize[2], tref2,
201                                  row << (3 - s->ss_v), (col << 3) + 4,
202                                  &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
203                }
204            }
205        } else
206#endif
207        {
208#if SCALED == 0
209            av_assert2(b->bs == BS_4x4);
210#endif
211
212            // FIXME if two horizontally adjacent blocks have the same MV,
213            // do a w8 instead of a w4 call
214            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
215                        ref1->data[0], ref1->linesize[0], tref1,
216                        row << 3, col << 3, &b->mv[0][0],
217                        0, 0, 8, 8, 4, 4, w1, h1, 0);
218            mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
219                        ref1->data[0], ref1->linesize[0], tref1,
220                        row << 3, (col << 3) + 4, &b->mv[1][0],
221                        4, 0, 8, 8, 4, 4, w1, h1, 0);
222            mc_luma_dir(td, mc[4][b->filter][0],
223                        td->dst[0] + 4 * ls_y, ls_y,
224                        ref1->data[0], ref1->linesize[0], tref1,
225                        (row << 3) + 4, col << 3, &b->mv[2][0],
226                        0, 4, 8, 8, 4, 4, w1, h1, 0);
227            mc_luma_dir(td, mc[4][b->filter][0],
228                        td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
229                        ref1->data[0], ref1->linesize[0], tref1,
230                        (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
231                        4, 4, 8, 8, 4, 4, w1, h1, 0);
232            if (s->ss_v) {
233                h1 = (h1 + 1) >> 1;
234                if (s->ss_h) {
235                    w1 = (w1 + 1) >> 1;
236                    uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
237                                            b->mv[2][0], b->mv[3][0]);
238                    mc_chroma_dir(td, mc[4][b->filter][0],
239                                  td->dst[1], td->dst[2], ls_uv,
240                                  ref1->data[1], ref1->linesize[1],
241                                  ref1->data[2], ref1->linesize[2], tref1,
242                                  row << 2, col << 2,
243                                  &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
244                } else {
245                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
246                    mc_chroma_dir(td, mc[4][b->filter][0],
247                                  td->dst[1], td->dst[2], ls_uv,
248                                  ref1->data[1], ref1->linesize[1],
249                                  ref1->data[2], ref1->linesize[2], tref1,
250                                  row << 2, col << 3,
251                                  &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
252                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
253                    mc_chroma_dir(td, mc[4][b->filter][0],
254                                  td->dst[1] + 4 * bytesperpixel,
255                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
256                                  ref1->data[1], ref1->linesize[1],
257                                  ref1->data[2], ref1->linesize[2], tref1,
258                                  row << 2, (col << 3) + 4,
259                                  &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
260                }
261            } else {
262                if (s->ss_h) {
263                    w1 = (w1 + 1) >> 1;
264                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
265                    mc_chroma_dir(td, mc[4][b->filter][0],
266                                  td->dst[1], td->dst[2], ls_uv,
267                                  ref1->data[1], ref1->linesize[1],
268                                  ref1->data[2], ref1->linesize[2], tref1,
269                                  row << 3, col << 2,
270                                  &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
271                    // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
272                    // bottom block
273                    // https://code.google.com/p/webm/issues/detail?id=993
274                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
275                    mc_chroma_dir(td, mc[4][b->filter][0],
276                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
277                                  ref1->data[1], ref1->linesize[1],
278                                  ref1->data[2], ref1->linesize[2], tref1,
279                                  (row << 3) + 4, col << 2,
280                                  &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
281                } else {
282                    mc_chroma_dir(td, mc[4][b->filter][0],
283                                  td->dst[1], td->dst[2], ls_uv,
284                                  ref1->data[1], ref1->linesize[1],
285                                  ref1->data[2], ref1->linesize[2], tref1,
286                                  row << 3, col << 3,
287                                  &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
288                    mc_chroma_dir(td, mc[4][b->filter][0],
289                                  td->dst[1] + 4 * bytesperpixel,
290                                  td->dst[2] + 4 * bytesperpixel, ls_uv,
291                                  ref1->data[1], ref1->linesize[1],
292                                  ref1->data[2], ref1->linesize[2], tref1,
293                                  row << 3, (col << 3) + 4,
294                                  &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
295                    mc_chroma_dir(td, mc[4][b->filter][0],
296                                  td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
297                                  ref1->data[1], ref1->linesize[1],
298                                  ref1->data[2], ref1->linesize[2], tref1,
299                                  (row << 3) + 4, col << 3,
300                                  &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
301                    mc_chroma_dir(td, mc[4][b->filter][0],
302                                  td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
303                                  td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
304                                  ref1->data[1], ref1->linesize[1],
305                                  ref1->data[2], ref1->linesize[2], tref1,
306                                  (row << 3) + 4, (col << 3) + 4,
307                                  &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
308                }
309            }
310
311            if (b->comp) {
312                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
313                            ref2->data[0], ref2->linesize[0], tref2,
314                            row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
315                mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
316                            ref2->data[0], ref2->linesize[0], tref2,
317                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
318                mc_luma_dir(td, mc[4][b->filter][1],
319                            td->dst[0] + 4 * ls_y, ls_y,
320                            ref2->data[0], ref2->linesize[0], tref2,
321                            (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
322                mc_luma_dir(td, mc[4][b->filter][1],
323                            td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
324                            ref2->data[0], ref2->linesize[0], tref2,
325                            (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
326                if (s->ss_v) {
327                    h2 = (h2 + 1) >> 1;
328                    if (s->ss_h) {
329                        w2 = (w2 + 1) >> 1;
330                        uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
331                                                b->mv[2][1], b->mv[3][1]);
332                        mc_chroma_dir(td, mc[4][b->filter][1],
333                                      td->dst[1], td->dst[2], ls_uv,
334                                      ref2->data[1], ref2->linesize[1],
335                                      ref2->data[2], ref2->linesize[2], tref2,
336                                      row << 2, col << 2,
337                                      &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
338                    } else {
339                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
340                        mc_chroma_dir(td, mc[4][b->filter][1],
341                                      td->dst[1], td->dst[2], ls_uv,
342                                      ref2->data[1], ref2->linesize[1],
343                                      ref2->data[2], ref2->linesize[2], tref2,
344                                      row << 2, col << 3,
345                                      &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
346                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
347                        mc_chroma_dir(td, mc[4][b->filter][1],
348                                      td->dst[1] + 4 * bytesperpixel,
349                                      td->dst[2] + 4 * bytesperpixel, ls_uv,
350                                      ref2->data[1], ref2->linesize[1],
351                                      ref2->data[2], ref2->linesize[2], tref2,
352                                      row << 2, (col << 3) + 4,
353                                      &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
354                    }
355                } else {
356                    if (s->ss_h) {
357                        w2 = (w2 + 1) >> 1;
358                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
359                        mc_chroma_dir(td, mc[4][b->filter][1],
360                                      td->dst[1], td->dst[2], ls_uv,
361                                      ref2->data[1], ref2->linesize[1],
362                                      ref2->data[2], ref2->linesize[2], tref2,
363                                      row << 3, col << 2,
364                                      &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
365                        // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
366                        // bottom block
367                        // https://code.google.com/p/webm/issues/detail?id=993
368                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
369                        mc_chroma_dir(td, mc[4][b->filter][1],
370                                      td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
371                                      ref2->data[1], ref2->linesize[1],
372                                      ref2->data[2], ref2->linesize[2], tref2,
373                                      (row << 3) + 4, col << 2,
374                                      &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
375                    } else {
376                        mc_chroma_dir(td, mc[4][b->filter][1],
377                                      td->dst[1], td->dst[2], ls_uv,
378                                      ref2->data[1], ref2->linesize[1],
379                                      ref2->data[2], ref2->linesize[2], tref2,
380                                      row << 3, col << 3,
381                                      &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
382                        mc_chroma_dir(td, mc[4][b->filter][1],
383                                      td->dst[1] + 4 * bytesperpixel,
384                                      td->dst[2] + 4 * bytesperpixel, ls_uv,
385                                      ref2->data[1], ref2->linesize[1],
386                                      ref2->data[2], ref2->linesize[2], tref2,
387                                      row << 3, (col << 3) + 4,
388                                      &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
389                        mc_chroma_dir(td, mc[4][b->filter][1],
390                                      td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
391                                      ref2->data[1], ref2->linesize[1],
392                                      ref2->data[2], ref2->linesize[2], tref2,
393                                      (row << 3) + 4, col << 3,
394                                      &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
395                        mc_chroma_dir(td, mc[4][b->filter][1],
396                                      td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
397                                      td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
398                                      ref2->data[1], ref2->linesize[1],
399                                      ref2->data[2], ref2->linesize[2], tref2,
400                                      (row << 3) + 4, (col << 3) + 4,
401                                      &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
402                    }
403                }
404            }
405        }
406    } else {
407        int bwl = bwlog_tab[0][b->bs];
408        int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4;
409        int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4;
410        int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4;
411        int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4;
412
413        mc_luma_dir(td, mc[bwl][b->filter][0], td->dst[0], ls_y,
414                    ref1->data[0], ref1->linesize[0], tref1,
415                    row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
416        w1 = (w1 + s->ss_h) >> s->ss_h;
417        h1 = (h1 + s->ss_v) >> s->ss_v;
418        mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][0],
419                      td->dst[1], td->dst[2], ls_uv,
420                      ref1->data[1], ref1->linesize[1],
421                      ref1->data[2], ref1->linesize[2], tref1,
422                      row << (3 - s->ss_v), col << (3 - s->ss_h),
423                      &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
424
425        if (b->comp) {
426            mc_luma_dir(td, mc[bwl][b->filter][1], td->dst[0], ls_y,
427                        ref2->data[0], ref2->linesize[0], tref2,
428                        row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
429            w2 = (w2 + s->ss_h) >> s->ss_h;
430            h2 = (h2 + s->ss_v) >> s->ss_v;
431            mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][1],
432                          td->dst[1], td->dst[2], ls_uv,
433                          ref2->data[1], ref2->linesize[1],
434                          ref2->data[2], ref2->linesize[2], tref2,
435                          row << (3 - s->ss_v), col << (3 - s->ss_h),
436                          &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
437        }
438    }
439}
440