1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 direct mb/block decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #include "avcodec.h"
29 #include "h264dec.h"
30 #include "h264_ps.h"
31 #include "mpegutils.h"
32 #include "rectangle.h"
33 #include "threadframe.h"
34 
35 #include <assert.h>
36 
get_scale_factor(H264SliceContext *sl, int poc, int poc1, int i)37 static int get_scale_factor(H264SliceContext *sl,
38                             int poc, int poc1, int i)
39 {
40     int poc0 = sl->ref_list[0][i].poc;
41     int64_t pocdiff = poc1 - (int64_t)poc0;
42     int td = av_clip_int8(pocdiff);
43 
44     if (pocdiff != (int)pocdiff)
45         avpriv_request_sample(sl->h264->avctx, "pocdiff overflow");
46 
47     if (td == 0 || sl->ref_list[0][i].parent->long_ref) {
48         return 256;
49     } else {
50         int64_t pocdiff0 = poc - (int64_t)poc0;
51         int tb = av_clip_int8(pocdiff0);
52         int tx = (16384 + (FFABS(td) >> 1)) / td;
53 
54         if (pocdiff0 != (int)pocdiff0)
55             av_log(sl->h264->avctx, AV_LOG_DEBUG, "pocdiff0 overflow\n");
56 
57         return av_clip_intp2((tb * tx + 32) >> 6, 10);
58     }
59 }
60 
ff_h264_direct_dist_scale_factor(const H264Context *const h, H264SliceContext *sl)61 void ff_h264_direct_dist_scale_factor(const H264Context *const h,
62                                       H264SliceContext *sl)
63 {
64     const int poc  = FIELD_PICTURE(h) ? h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD]
65                                       : h->cur_pic_ptr->poc;
66     const int poc1 = sl->ref_list[1][0].poc;
67     int i, field;
68 
69     if (FRAME_MBAFF(h))
70         for (field = 0; field < 2; field++) {
71             const int poc  = h->cur_pic_ptr->field_poc[field];
72             const int poc1 = sl->ref_list[1][0].parent->field_poc[field];
73             for (i = 0; i < 2 * sl->ref_count[0]; i++)
74                 sl->dist_scale_factor_field[field][i ^ field] =
75                     get_scale_factor(sl, poc, poc1, i + 16);
76         }
77 
78     for (i = 0; i < sl->ref_count[0]; i++)
79         sl->dist_scale_factor[i] = get_scale_factor(sl, poc, poc1, i);
80 }
81 
fill_colmap(const H264Context *h, H264SliceContext *sl, int map[2][16 + 32], int list, int field, int colfield, int mbafi)82 static void fill_colmap(const H264Context *h, H264SliceContext *sl,
83                         int map[2][16 + 32], int list,
84                         int field, int colfield, int mbafi)
85 {
86     H264Picture *const ref1 = sl->ref_list[1][0].parent;
87     int j, old_ref, rfield;
88     int start  = mbafi ? 16                       : 0;
89     int end    = mbafi ? 16 + 2 * sl->ref_count[0] : sl->ref_count[0];
90     int interl = mbafi || h->picture_structure != PICT_FRAME;
91 
92     /* bogus; fills in for missing frames */
93     memset(map[list], 0, sizeof(map[list]));
94 
95     for (rfield = 0; rfield < 2; rfield++) {
96         for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
97             int poc = ref1->ref_poc[colfield][list][old_ref];
98 
99             if (!interl)
100                 poc |= 3;
101             // FIXME: store all MBAFF references so this is not needed
102             else if (interl && (poc & 3) == 3)
103                 poc = (poc & ~3) + rfield + 1;
104 
105             for (j = start; j < end; j++) {
106                 if (4 * sl->ref_list[0][j].parent->frame_num +
107                     (sl->ref_list[0][j].reference & 3) == poc) {
108                     int cur_ref = mbafi ? (j - 16) ^ field : j;
109                     if (ref1->mbaff)
110                         map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
111                     if (rfield == field || !interl)
112                         map[list][old_ref] = cur_ref;
113                     break;
114                 }
115             }
116         }
117     }
118 }
119 
ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *sl)120 void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *sl)
121 {
122     H264Ref *const ref1 = &sl->ref_list[1][0];
123     H264Picture *const cur = h->cur_pic_ptr;
124     int list, j, field;
125     int sidx     = (h->picture_structure & 1) ^ 1;
126     int ref1sidx = (ref1->reference      & 1) ^ 1;
127 
128     for (list = 0; list < sl->list_count; list++) {
129         cur->ref_count[sidx][list] = sl->ref_count[list];
130         for (j = 0; j < sl->ref_count[list]; j++)
131             cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
132                                           (sl->ref_list[list][j].reference & 3);
133     }
134 
135     if (h->picture_structure == PICT_FRAME) {
136         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
137         memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
138     }
139 
140     if (h->current_slice == 0) {
141         cur->mbaff = FRAME_MBAFF(h);
142     } else {
143         av_assert0(cur->mbaff == FRAME_MBAFF(h));
144     }
145 
146     sl->col_fieldoff = 0;
147 
148     if (sl->list_count != 2 || !sl->ref_count[1])
149         return;
150 
151     if (h->picture_structure == PICT_FRAME) {
152         int cur_poc  = h->cur_pic_ptr->poc;
153         int *col_poc = sl->ref_list[1][0].parent->field_poc;
154         if (col_poc[0] == INT_MAX && col_poc[1] == INT_MAX) {
155             av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
156             sl->col_parity = 1;
157         } else
158             sl->col_parity = (FFABS(col_poc[0] - (int64_t)cur_poc) >=
159                               FFABS(col_poc[1] - (int64_t)cur_poc));
160         ref1sidx =
161         sidx     = sl->col_parity;
162     // FL -> FL & differ parity
163     } else if (!(h->picture_structure & sl->ref_list[1][0].reference) &&
164                !sl->ref_list[1][0].parent->mbaff) {
165         sl->col_fieldoff = 2 * sl->ref_list[1][0].reference - 3;
166     }
167 
168     if (sl->slice_type_nos != AV_PICTURE_TYPE_B || sl->direct_spatial_mv_pred)
169         return;
170 
171     for (list = 0; list < 2; list++) {
172         fill_colmap(h, sl, sl->map_col_to_list0, list, sidx, ref1sidx, 0);
173         if (FRAME_MBAFF(h))
174             for (field = 0; field < 2; field++)
175                 fill_colmap(h, sl, sl->map_col_to_list0_field[field], list, field,
176                             field, 1);
177     }
178 }
179 
await_reference_mb_row(const H264Context *const h, H264Ref *ref, int mb_y)180 static void await_reference_mb_row(const H264Context *const h, H264Ref *ref,
181                                    int mb_y)
182 {
183     int ref_field         = ref->reference - 1;
184     int ref_field_picture = ref->parent->field_picture;
185     int ref_height        = 16 * h->mb_height >> ref_field_picture;
186 
187     if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
188         return;
189 
190     /* FIXME: It can be safe to access mb stuff
191      * even if pixels aren't deblocked yet. */
192 
193     ff_thread_await_progress(&ref->parent->tf,
194                              FFMIN(16 * mb_y >> ref_field_picture,
195                                    ref_height - 1),
196                              ref_field_picture && ref_field);
197 }
198 
pred_spatial_direct_motion(const H264Context *const h, H264SliceContext *sl, int *mb_type)199 static void pred_spatial_direct_motion(const H264Context *const h, H264SliceContext *sl,
200                                        int *mb_type)
201 {
202     int b8_stride = 2;
203     int b4_stride = h->b_stride;
204     int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
205     int mb_type_col[2];
206     const int16_t (*l1mv0)[2], (*l1mv1)[2];
207     const int8_t *l1ref0, *l1ref1;
208     const int is_b8x8 = IS_8X8(*mb_type);
209     unsigned int sub_mb_type = MB_TYPE_L0L1;
210     int i8, i4;
211     int ref[2];
212     int mv[2];
213     int list;
214 
215     assert(sl->ref_list[1][0].reference & 3);
216 
217     await_reference_mb_row(h, &sl->ref_list[1][0],
218                            sl->mb_y + !!IS_INTERLACED(*mb_type));
219 
220 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
221                                 MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
222 
223     /* ref = min(neighbors) */
224     for (list = 0; list < 2; list++) {
225         int left_ref     = sl->ref_cache[list][scan8[0] - 1];
226         int top_ref      = sl->ref_cache[list][scan8[0] - 8];
227         int refc         = sl->ref_cache[list][scan8[0] - 8 + 4];
228         const int16_t *C = sl->mv_cache[list][scan8[0]  - 8 + 4];
229         if (refc == PART_NOT_AVAILABLE) {
230             refc = sl->ref_cache[list][scan8[0] - 8 - 1];
231             C    = sl->mv_cache[list][scan8[0]  - 8 - 1];
232         }
233         ref[list] = FFMIN3((unsigned)left_ref,
234                            (unsigned)top_ref,
235                            (unsigned)refc);
236         if (ref[list] >= 0) {
237             /* This is just pred_motion() but with the cases removed that
238              * cannot happen for direct blocks. */
239             const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
240             const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
241 
242             int match_count = (left_ref == ref[list]) +
243                               (top_ref  == ref[list]) +
244                               (refc     == ref[list]);
245 
246             if (match_count > 1) { // most common
247                 mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
248                                       mid_pred(A[1], B[1], C[1]));
249             } else {
250                 assert(match_count == 1);
251                 if (left_ref == ref[list])
252                     mv[list] = AV_RN32A(A);
253                 else if (top_ref == ref[list])
254                     mv[list] = AV_RN32A(B);
255                 else
256                     mv[list] = AV_RN32A(C);
257             }
258             av_assert2(ref[list] < (sl->ref_count[list] << !!FRAME_MBAFF(h)));
259         } else {
260             int mask = ~(MB_TYPE_L0 << (2 * list));
261             mv[list]  = 0;
262             ref[list] = -1;
263             if (!is_b8x8)
264                 *mb_type &= mask;
265             sub_mb_type &= mask;
266         }
267     }
268     if (ref[0] < 0 && ref[1] < 0) {
269         ref[0] = ref[1] = 0;
270         if (!is_b8x8)
271             *mb_type |= MB_TYPE_L0L1;
272         sub_mb_type |= MB_TYPE_L0L1;
273     }
274 
275     if (!(is_b8x8 | mv[0] | mv[1])) {
276         fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
277         fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
278         fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
279         fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
280         *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
281                                  MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
282                    MB_TYPE_16x16 | MB_TYPE_DIRECT2;
283         return;
284     }
285 
286     if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
287         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
288             mb_y  = (sl->mb_y & ~1) + sl->col_parity;
289             mb_xy = sl->mb_x +
290                     ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
291             b8_stride = 0;
292         } else {
293             mb_y  += sl->col_fieldoff;
294             mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
295         }
296         goto single_col;
297     } else {                                             // AFL/AFR/FR/FL -> AFR/FR
298         if (IS_INTERLACED(*mb_type)) {                   // AFL       /FL -> AFR/FR
299             mb_y           =  sl->mb_y & ~1;
300             mb_xy          = (sl->mb_y & ~1) * h->mb_stride + sl->mb_x;
301             mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
302             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
303             b8_stride      = 2 + 4 * h->mb_stride;
304             b4_stride     *= 6;
305             if (IS_INTERLACED(mb_type_col[0]) !=
306                 IS_INTERLACED(mb_type_col[1])) {
307                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
308                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
309             }
310 
311             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
312             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
313                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
314                 !is_b8x8) {
315                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2;  /* B_16x8 */
316             } else {
317                 *mb_type |= MB_TYPE_8x8;
318             }
319         } else {                                         //     AFR/FR    -> AFR/FR
320 single_col:
321             mb_type_col[0] =
322             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
323 
324             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
325             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
326                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
327             } else if (!is_b8x8 &&
328                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
329                 *mb_type |= MB_TYPE_DIRECT2 |
330                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
331             } else {
332                 if (!h->ps.sps->direct_8x8_inference_flag) {
333                     /* FIXME: Save sub mb types from previous frames (or derive
334                      * from MVs) so we know exactly what block size to use. */
335                     sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
336                 }
337                 *mb_type |= MB_TYPE_8x8;
338             }
339         }
340     }
341 
342     await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
343 
344     l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
345     l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
346     l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
347     l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
348     if (!b8_stride) {
349         if (sl->mb_y & 1) {
350             l1ref0 += 2;
351             l1ref1 += 2;
352             l1mv0  += 2 * b4_stride;
353             l1mv1  += 2 * b4_stride;
354         }
355     }
356 
357     if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
358         int n = 0;
359         for (i8 = 0; i8 < 4; i8++) {
360             int x8  = i8 & 1;
361             int y8  = i8 >> 1;
362             int xy8 = x8     + y8 * b8_stride;
363             int xy4 = x8 * 3 + y8 * b4_stride;
364             int a, b;
365 
366             if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
367                 continue;
368             sl->sub_mb_type[i8] = sub_mb_type;
369 
370             fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
371                            (uint8_t)ref[0], 1);
372             fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
373                            (uint8_t)ref[1], 1);
374             if (!IS_INTRA(mb_type_col[y8]) && !sl->ref_list[1][0].parent->long_ref &&
375                 ((l1ref0[xy8] == 0 &&
376                   FFABS(l1mv0[xy4][0]) <= 1 &&
377                   FFABS(l1mv0[xy4][1]) <= 1) ||
378                  (l1ref0[xy8] < 0 &&
379                   l1ref1[xy8] == 0 &&
380                   FFABS(l1mv1[xy4][0]) <= 1 &&
381                   FFABS(l1mv1[xy4][1]) <= 1))) {
382                 a =
383                 b = 0;
384                 if (ref[0] > 0)
385                     a = mv[0];
386                 if (ref[1] > 0)
387                     b = mv[1];
388                 n++;
389             } else {
390                 a = mv[0];
391                 b = mv[1];
392             }
393             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
394             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
395         }
396         if (!is_b8x8 && !(n & 3))
397             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
398                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
399                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
400     } else if (IS_16X16(*mb_type)) {
401         int a, b;
402 
403         fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
404         fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
405         if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
406             ((l1ref0[0] == 0 &&
407               FFABS(l1mv0[0][0]) <= 1 &&
408               FFABS(l1mv0[0][1]) <= 1) ||
409              (l1ref0[0] < 0 && !l1ref1[0] &&
410               FFABS(l1mv1[0][0]) <= 1 &&
411               FFABS(l1mv1[0][1]) <= 1 &&
412               h->x264_build > 33U))) {
413             a = b = 0;
414             if (ref[0] > 0)
415                 a = mv[0];
416             if (ref[1] > 0)
417                 b = mv[1];
418         } else {
419             a = mv[0];
420             b = mv[1];
421         }
422         fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
423         fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
424     } else {
425         int n = 0;
426         for (i8 = 0; i8 < 4; i8++) {
427             const int x8 = i8 & 1;
428             const int y8 = i8 >> 1;
429 
430             if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
431                 continue;
432             sl->sub_mb_type[i8] = sub_mb_type;
433 
434             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
435             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
436             fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
437                            (uint8_t)ref[0], 1);
438             fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
439                            (uint8_t)ref[1], 1);
440 
441             assert(b8_stride == 2);
442             /* col_zero_flag */
443             if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
444                 (l1ref0[i8] == 0 ||
445                  (l1ref0[i8] < 0 &&
446                   l1ref1[i8] == 0 &&
447                   h->x264_build > 33U))) {
448                 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
449                 if (IS_SUB_8X8(sub_mb_type)) {
450                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
451                     if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
452                         if (ref[0] == 0)
453                             fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2,
454                                            8, 0, 4);
455                         if (ref[1] == 0)
456                             fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2,
457                                            8, 0, 4);
458                         n += 4;
459                     }
460                 } else {
461                     int m = 0;
462                     for (i4 = 0; i4 < 4; i4++) {
463                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
464                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
465                         if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
466                             if (ref[0] == 0)
467                                 AV_ZERO32(sl->mv_cache[0][scan8[i8 * 4 + i4]]);
468                             if (ref[1] == 0)
469                                 AV_ZERO32(sl->mv_cache[1][scan8[i8 * 4 + i4]]);
470                             m++;
471                         }
472                     }
473                     if (!(m & 3))
474                         sl->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
475                     n += m;
476                 }
477             }
478         }
479         if (!is_b8x8 && !(n & 15))
480             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
481                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
482                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
483     }
484 }
485 
pred_temp_direct_motion(const H264Context *const h, H264SliceContext *sl, int *mb_type)486 static void pred_temp_direct_motion(const H264Context *const h, H264SliceContext *sl,
487                                     int *mb_type)
488 {
489     int b8_stride = 2;
490     int b4_stride = h->b_stride;
491     int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
492     int mb_type_col[2];
493     const int16_t (*l1mv0)[2], (*l1mv1)[2];
494     const int8_t *l1ref0, *l1ref1;
495     const int is_b8x8 = IS_8X8(*mb_type);
496     unsigned int sub_mb_type;
497     int i8, i4;
498 
499     assert(sl->ref_list[1][0].reference & 3);
500 
501     await_reference_mb_row(h, &sl->ref_list[1][0],
502                            sl->mb_y + !!IS_INTERLACED(*mb_type));
503 
504     if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
505         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
506             mb_y  = (sl->mb_y & ~1) + sl->col_parity;
507             mb_xy = sl->mb_x +
508                     ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
509             b8_stride = 0;
510         } else {
511             mb_y  += sl->col_fieldoff;
512             mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
513         }
514         goto single_col;
515     } else {                                        // AFL/AFR/FR/FL -> AFR/FR
516         if (IS_INTERLACED(*mb_type)) {              // AFL       /FL -> AFR/FR
517             mb_y           = sl->mb_y & ~1;
518             mb_xy          = sl->mb_x + (sl->mb_y & ~1) * h->mb_stride;
519             mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
520             mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
521             b8_stride      = 2 + 4 * h->mb_stride;
522             b4_stride     *= 6;
523             if (IS_INTERLACED(mb_type_col[0]) !=
524                 IS_INTERLACED(mb_type_col[1])) {
525                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
526                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
527             }
528 
529             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
530                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
531 
532             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
533                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
534                 !is_b8x8) {
535                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
536                             MB_TYPE_DIRECT2;                /* B_16x8 */
537             } else {
538                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
539             }
540         } else {                                    //     AFR/FR    -> AFR/FR
541 single_col:
542             mb_type_col[0]     =
543                 mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
544 
545             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
546                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
547             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
548                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
549                             MB_TYPE_DIRECT2;                /* B_16x16 */
550             } else if (!is_b8x8 &&
551                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
552                 *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
553                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
554             } else {
555                 if (!h->ps.sps->direct_8x8_inference_flag) {
556                     /* FIXME: save sub mb types from previous frames (or derive
557                      * from MVs) so we know exactly what block size to use */
558                     sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
559                                   MB_TYPE_DIRECT2;          /* B_SUB_4x4 */
560                 }
561                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
562             }
563         }
564     }
565 
566     await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
567 
568     l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
569     l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
570     l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
571     l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
572     if (!b8_stride) {
573         if (sl->mb_y & 1) {
574             l1ref0 += 2;
575             l1ref1 += 2;
576             l1mv0  += 2 * b4_stride;
577             l1mv1  += 2 * b4_stride;
578         }
579     }
580 
581     {
582         const int *map_col_to_list0[2] = { sl->map_col_to_list0[0],
583                                            sl->map_col_to_list0[1] };
584         const int *dist_scale_factor = sl->dist_scale_factor;
585         int ref_offset;
586 
587         if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
588             map_col_to_list0[0] = sl->map_col_to_list0_field[sl->mb_y & 1][0];
589             map_col_to_list0[1] = sl->map_col_to_list0_field[sl->mb_y & 1][1];
590             dist_scale_factor   = sl->dist_scale_factor_field[sl->mb_y & 1];
591         }
592         ref_offset = (sl->ref_list[1][0].parent->mbaff << 4) & (mb_type_col[0] >> 3);
593 
594         if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
595             int y_shift = 2 * !IS_INTERLACED(*mb_type);
596             assert(h->ps.sps->direct_8x8_inference_flag);
597 
598             for (i8 = 0; i8 < 4; i8++) {
599                 const int x8 = i8 & 1;
600                 const int y8 = i8 >> 1;
601                 int ref0, scale;
602                 const int16_t (*l1mv)[2] = l1mv0;
603 
604                 if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
605                     continue;
606                 sl->sub_mb_type[i8] = sub_mb_type;
607 
608                 fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
609                 if (IS_INTRA(mb_type_col[y8])) {
610                     fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
611                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
612                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
613                     continue;
614                 }
615 
616                 ref0 = l1ref0[x8 + y8 * b8_stride];
617                 if (ref0 >= 0)
618                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
619                 else {
620                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
621                                                ref_offset];
622                     l1mv = l1mv1;
623                 }
624                 scale = dist_scale_factor[ref0];
625                 fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
626                                ref0, 1);
627 
628                 {
629                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
630                     int my_col            = (mv_col[1] * (1 << y_shift)) / 2;
631                     int mx                = (scale * mv_col[0] + 128) >> 8;
632                     int my                = (scale * my_col    + 128) >> 8;
633                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
634                                    pack16to32(mx, my), 4);
635                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
636                                    pack16to32(mx - mv_col[0], my - my_col), 4);
637                 }
638             }
639             return;
640         }
641 
642         /* one-to-one mv scaling */
643 
644         if (IS_16X16(*mb_type)) {
645             int ref, mv0, mv1;
646 
647             fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
648             if (IS_INTRA(mb_type_col[0])) {
649                 ref = mv0 = mv1 = 0;
650             } else {
651                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
652                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
653                 const int scale = dist_scale_factor[ref0];
654                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
655                 int mv_l0[2];
656                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
657                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
658                 ref      = ref0;
659                 mv0      = pack16to32(mv_l0[0], mv_l0[1]);
660                 mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
661             }
662             fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
663             fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
664             fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
665         } else {
666             for (i8 = 0; i8 < 4; i8++) {
667                 const int x8 = i8 & 1;
668                 const int y8 = i8 >> 1;
669                 int ref0, scale;
670                 const int16_t (*l1mv)[2] = l1mv0;
671 
672                 if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
673                     continue;
674                 sl->sub_mb_type[i8] = sub_mb_type;
675                 fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
676                 if (IS_INTRA(mb_type_col[0])) {
677                     fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
678                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
679                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
680                     continue;
681                 }
682 
683                 assert(b8_stride == 2);
684                 ref0 = l1ref0[i8];
685                 if (ref0 >= 0)
686                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
687                 else {
688                     ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
689                     l1mv = l1mv1;
690                 }
691                 scale = dist_scale_factor[ref0];
692 
693                 fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
694                                ref0, 1);
695                 if (IS_SUB_8X8(sub_mb_type)) {
696                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
697                     int mx                = (scale * mv_col[0] + 128) >> 8;
698                     int my                = (scale * mv_col[1] + 128) >> 8;
699                     fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
700                                    pack16to32(mx, my), 4);
701                     fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
702                                    pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
703                 } else {
704                     for (i4 = 0; i4 < 4; i4++) {
705                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
706                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
707                         int16_t *mv_l0 = sl->mv_cache[0][scan8[i8 * 4 + i4]];
708                         mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
709                         mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
710                         AV_WN32A(sl->mv_cache[1][scan8[i8 * 4 + i4]],
711                                  pack16to32(mv_l0[0] - mv_col[0],
712                                             mv_l0[1] - mv_col[1]));
713                     }
714                 }
715             }
716         }
717     }
718 }
719 
ff_h264_pred_direct_motion(const H264Context *const h, H264SliceContext *sl, int *mb_type)720 void ff_h264_pred_direct_motion(const H264Context *const h, H264SliceContext *sl,
721                                 int *mb_type)
722 {
723     if (sl->direct_spatial_mv_pred)
724         pred_spatial_direct_motion(h, sl, mb_type);
725     else
726         pred_temp_direct_motion(h, sl, mb_type);
727 }
728