1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... motion vector prediction 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG-4 part10 motion vector prediction. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#ifndef AVCODEC_H264_MVPRED_H 29#define AVCODEC_H264_MVPRED_H 30 31#include "h264dec.h" 32#include "mpegutils.h" 33#include "libavutil/avassert.h" 34#include "libavutil/mem_internal.h" 35 36 37static av_always_inline int fetch_diagonal_mv(const H264Context *h, H264SliceContext *sl, 38 const int16_t **C, 39 int i, int list, int part_width) 40{ 41 const int topright_ref = sl->ref_cache[list][i - 8 + part_width]; 42 43 /* there is no consistent mapping of mvs to neighboring locations that will 44 * make mbaff happy, so we can't move all this logic to fill_caches */ 45 if (FRAME_MBAFF(h)) { 46#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \ 47 const int xy = XY, y4 = Y4; \ 48 const int mb_type = mb_types[xy + (y4 >> 2) * h->mb_stride]; \ 49 if (!USES_LIST(mb_type, list)) \ 50 return LIST_NOT_USED; \ 51 mv = h->cur_pic_ptr->motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \ 52 sl->mv_cache[list][scan8[0] - 2][0] = mv[0]; \ 53 sl->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \ 54 return h->cur_pic_ptr->ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP; 55 56 if (topright_ref == PART_NOT_AVAILABLE 57 && i >= scan8[0] + 8 && (i & 7) == 4 58 && sl->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) { 59 const uint32_t *mb_types = h->cur_pic_ptr->mb_type; 60 const int16_t *mv; 61 AV_ZERO32(sl->mv_cache[list][scan8[0] - 2]); 62 *C = sl->mv_cache[list][scan8[0] - 2]; 63 64 if (!MB_FIELD(sl) && IS_INTERLACED(sl->left_type[0])) { 65 SET_DIAG_MV(* 2, >> 1, sl->left_mb_xy[0] + h->mb_stride, 66 (sl->mb_y & 1) * 2 + (i >> 5)); 67 } 68 if (MB_FIELD(sl) && !IS_INTERLACED(sl->left_type[0])) { 69 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. 70 SET_DIAG_MV(/ 2, *2, sl->left_mb_xy[i >= 36], ((i >> 2)) & 3); 71 } 72 } 73#undef SET_DIAG_MV 74 } 75 76 if (topright_ref != PART_NOT_AVAILABLE) { 77 *C = sl->mv_cache[list][i - 8 + part_width]; 78 return topright_ref; 79 } else { 80 ff_tlog(h->avctx, "topright MV not available\n"); 81 82 *C = sl->mv_cache[list][i - 8 - 1]; 83 return sl->ref_cache[list][i - 8 - 1]; 84 } 85} 86 87/** 88 * Get the predicted MV. 89 * @param n the block index 90 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) 91 * @param mx the x component of the predicted motion vector 92 * @param my the y component of the predicted motion vector 93 */ 94static av_always_inline void pred_motion(const H264Context *const h, 95 H264SliceContext *sl, 96 int n, 97 int part_width, int list, int ref, 98 int *const mx, int *const my) 99{ 100 const int index8 = scan8[n]; 101 const int top_ref = sl->ref_cache[list][index8 - 8]; 102 const int left_ref = sl->ref_cache[list][index8 - 1]; 103 const int16_t *const A = sl->mv_cache[list][index8 - 1]; 104 const int16_t *const B = sl->mv_cache[list][index8 - 8]; 105 const int16_t *C; 106 int diagonal_ref, match_count; 107 108 av_assert2(part_width == 1 || part_width == 2 || part_width == 4); 109 110/* mv_cache 111 * B . . A T T T T 112 * U . . L . . , . 113 * U . . L . . . . 114 * U . . L . . , . 115 * . . . L . . . . 116 */ 117 118 diagonal_ref = fetch_diagonal_mv(h, sl, &C, index8, list, part_width); 119 match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref); 120 ff_tlog(h->avctx, "pred_motion match_count=%d\n", match_count); 121 if (match_count > 1) { //most common 122 *mx = mid_pred(A[0], B[0], C[0]); 123 *my = mid_pred(A[1], B[1], C[1]); 124 } else if (match_count == 1) { 125 if (left_ref == ref) { 126 *mx = A[0]; 127 *my = A[1]; 128 } else if (top_ref == ref) { 129 *mx = B[0]; 130 *my = B[1]; 131 } else { 132 *mx = C[0]; 133 *my = C[1]; 134 } 135 } else { 136 if (top_ref == PART_NOT_AVAILABLE && 137 diagonal_ref == PART_NOT_AVAILABLE && 138 left_ref != PART_NOT_AVAILABLE) { 139 *mx = A[0]; 140 *my = A[1]; 141 } else { 142 *mx = mid_pred(A[0], B[0], C[0]); 143 *my = mid_pred(A[1], B[1], C[1]); 144 } 145 } 146 147 ff_tlog(h->avctx, 148 "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", 149 top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, 150 A[0], A[1], ref, *mx, *my, sl->mb_x, sl->mb_y, n, list); 151} 152 153/** 154 * Get the directionally predicted 16x8 MV. 155 * @param n the block index 156 * @param mx the x component of the predicted motion vector 157 * @param my the y component of the predicted motion vector 158 */ 159static av_always_inline void pred_16x8_motion(const H264Context *const h, 160 H264SliceContext *sl, 161 int n, int list, int ref, 162 int *const mx, int *const my) 163{ 164 if (n == 0) { 165 const int top_ref = sl->ref_cache[list][scan8[0] - 8]; 166 const int16_t *const B = sl->mv_cache[list][scan8[0] - 8]; 167 168 ff_tlog(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", 169 top_ref, B[0], B[1], sl->mb_x, sl->mb_y, n, list); 170 171 if (top_ref == ref) { 172 *mx = B[0]; 173 *my = B[1]; 174 return; 175 } 176 } else { 177 const int left_ref = sl->ref_cache[list][scan8[8] - 1]; 178 const int16_t *const A = sl->mv_cache[list][scan8[8] - 1]; 179 180 ff_tlog(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", 181 left_ref, A[0], A[1], sl->mb_x, sl->mb_y, n, list); 182 183 if (left_ref == ref) { 184 *mx = A[0]; 185 *my = A[1]; 186 return; 187 } 188 } 189 190 //RARE 191 pred_motion(h, sl, n, 4, list, ref, mx, my); 192} 193 194/** 195 * Get the directionally predicted 8x16 MV. 196 * @param n the block index 197 * @param mx the x component of the predicted motion vector 198 * @param my the y component of the predicted motion vector 199 */ 200static av_always_inline void pred_8x16_motion(const H264Context *const h, 201 H264SliceContext *sl, 202 int n, int list, int ref, 203 int *const mx, int *const my) 204{ 205 if (n == 0) { 206 const int left_ref = sl->ref_cache[list][scan8[0] - 1]; 207 const int16_t *const A = sl->mv_cache[list][scan8[0] - 1]; 208 209 ff_tlog(h->avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", 210 left_ref, A[0], A[1], sl->mb_x, sl->mb_y, n, list); 211 212 if (left_ref == ref) { 213 *mx = A[0]; 214 *my = A[1]; 215 return; 216 } 217 } else { 218 const int16_t *C; 219 int diagonal_ref; 220 221 diagonal_ref = fetch_diagonal_mv(h, sl, &C, scan8[4], list, 2); 222 223 ff_tlog(h->avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", 224 diagonal_ref, C[0], C[1], sl->mb_x, sl->mb_y, n, list); 225 226 if (diagonal_ref == ref) { 227 *mx = C[0]; 228 *my = C[1]; 229 return; 230 } 231 } 232 233 //RARE 234 pred_motion(h, sl, n, 2, list, ref, mx, my); 235} 236 237#define FIX_MV_MBAFF(type, refn, mvn, idx) \ 238 if (FRAME_MBAFF(h)) { \ 239 if (MB_FIELD(sl)) { \ 240 if (!IS_INTERLACED(type)) { \ 241 refn <<= 1; \ 242 AV_COPY32(mvbuf[idx], mvn); \ 243 mvbuf[idx][1] /= 2; \ 244 mvn = mvbuf[idx]; \ 245 } \ 246 } else { \ 247 if (IS_INTERLACED(type)) { \ 248 refn >>= 1; \ 249 AV_COPY32(mvbuf[idx], mvn); \ 250 mvbuf[idx][1] *= 2; \ 251 mvn = mvbuf[idx]; \ 252 } \ 253 } \ 254 } 255 256static av_always_inline void pred_pskip_motion(const H264Context *const h, 257 H264SliceContext *sl) 258{ 259 DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 }; 260 DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2]; 261 int8_t *ref = h->cur_pic.ref_index[0]; 262 int16_t(*mv)[2] = h->cur_pic.motion_val[0]; 263 int top_ref, left_ref, diagonal_ref, match_count, mx, my; 264 const int16_t *A, *B, *C; 265 int b_stride = h->b_stride; 266 267 fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); 268 269 /* To avoid doing an entire fill_decode_caches, we inline the relevant 270 * parts here. 271 * FIXME: this is a partial duplicate of the logic in fill_decode_caches, 272 * but it's faster this way. Is there a way to avoid this duplication? 273 */ 274 if (USES_LIST(sl->left_type[LTOP], 0)) { 275 left_ref = ref[4 * sl->left_mb_xy[LTOP] + 1 + (sl->left_block[0] & ~1)]; 276 A = mv[h->mb2b_xy[sl->left_mb_xy[LTOP]] + 3 + b_stride * sl->left_block[0]]; 277 FIX_MV_MBAFF(sl->left_type[LTOP], left_ref, A, 0); 278 if (!(left_ref | AV_RN32A(A))) 279 goto zeromv; 280 } else if (sl->left_type[LTOP]) { 281 left_ref = LIST_NOT_USED; 282 A = zeromv; 283 } else { 284 goto zeromv; 285 } 286 287 if (USES_LIST(sl->top_type, 0)) { 288 top_ref = ref[4 * sl->top_mb_xy + 2]; 289 B = mv[h->mb2b_xy[sl->top_mb_xy] + 3 * b_stride]; 290 FIX_MV_MBAFF(sl->top_type, top_ref, B, 1); 291 if (!(top_ref | AV_RN32A(B))) 292 goto zeromv; 293 } else if (sl->top_type) { 294 top_ref = LIST_NOT_USED; 295 B = zeromv; 296 } else { 297 goto zeromv; 298 } 299 300 ff_tlog(h->avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", 301 top_ref, left_ref, sl->mb_x, sl->mb_y); 302 303 if (USES_LIST(sl->topright_type, 0)) { 304 diagonal_ref = ref[4 * sl->topright_mb_xy + 2]; 305 C = mv[h->mb2b_xy[sl->topright_mb_xy] + 3 * b_stride]; 306 FIX_MV_MBAFF(sl->topright_type, diagonal_ref, C, 2); 307 } else if (sl->topright_type) { 308 diagonal_ref = LIST_NOT_USED; 309 C = zeromv; 310 } else { 311 if (USES_LIST(sl->topleft_type, 0)) { 312 diagonal_ref = ref[4 * sl->topleft_mb_xy + 1 + 313 (sl->topleft_partition & 2)]; 314 C = mv[h->mb2b_xy[sl->topleft_mb_xy] + 3 + b_stride + 315 (sl->topleft_partition & 2 * b_stride)]; 316 FIX_MV_MBAFF(sl->topleft_type, diagonal_ref, C, 2); 317 } else if (sl->topleft_type) { 318 diagonal_ref = LIST_NOT_USED; 319 C = zeromv; 320 } else { 321 diagonal_ref = PART_NOT_AVAILABLE; 322 C = zeromv; 323 } 324 } 325 326 match_count = !diagonal_ref + !top_ref + !left_ref; 327 ff_tlog(h->avctx, "pred_pskip_motion match_count=%d\n", match_count); 328 if (match_count > 1) { 329 mx = mid_pred(A[0], B[0], C[0]); 330 my = mid_pred(A[1], B[1], C[1]); 331 } else if (match_count == 1) { 332 if (!left_ref) { 333 mx = A[0]; 334 my = A[1]; 335 } else if (!top_ref) { 336 mx = B[0]; 337 my = B[1]; 338 } else { 339 mx = C[0]; 340 my = C[1]; 341 } 342 } else { 343 mx = mid_pred(A[0], B[0], C[0]); 344 my = mid_pred(A[1], B[1], C[1]); 345 } 346 347 fill_rectangle(sl->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4); 348 return; 349 350zeromv: 351 fill_rectangle(sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); 352 return; 353} 354 355static void fill_decode_neighbors(const H264Context *h, H264SliceContext *sl, int mb_type) 356{ 357 const int mb_xy = sl->mb_xy; 358 int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; 359 static const uint8_t left_block_options[4][32] = { 360 { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 }, 361 { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 }, 362 { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }, 363 { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 } 364 }; 365 366 sl->topleft_partition = -1; 367 368 top_xy = mb_xy - (h->mb_stride << MB_FIELD(sl)); 369 370 /* Wow, what a mess, why didn't they simplify the interlacing & intra 371 * stuff, I can't imagine that these complex rules are worth it. */ 372 373 topleft_xy = top_xy - 1; 374 topright_xy = top_xy + 1; 375 left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; 376 sl->left_block = left_block_options[0]; 377 if (FRAME_MBAFF(h)) { 378 const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]); 379 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 380 if (sl->mb_y & 1) { 381 if (left_mb_field_flag != curr_mb_field_flag) { 382 left_xy[LBOT] = left_xy[LTOP] = mb_xy - h->mb_stride - 1; 383 if (curr_mb_field_flag) { 384 left_xy[LBOT] += h->mb_stride; 385 sl->left_block = left_block_options[3]; 386 } else { 387 topleft_xy += h->mb_stride; 388 /* take top left mv from the middle of the mb, as opposed 389 * to all other modes which use the bottom right partition */ 390 sl->topleft_partition = 0; 391 sl->left_block = left_block_options[1]; 392 } 393 } 394 } else { 395 if (curr_mb_field_flag) { 396 topleft_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy - 1] >> 7) & 1) - 1); 397 topright_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy + 1] >> 7) & 1) - 1); 398 top_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1); 399 } 400 if (left_mb_field_flag != curr_mb_field_flag) { 401 if (curr_mb_field_flag) { 402 left_xy[LBOT] += h->mb_stride; 403 sl->left_block = left_block_options[3]; 404 } else { 405 sl->left_block = left_block_options[2]; 406 } 407 } 408 } 409 } 410 411 sl->topleft_mb_xy = topleft_xy; 412 sl->top_mb_xy = top_xy; 413 sl->topright_mb_xy = topright_xy; 414 sl->left_mb_xy[LTOP] = left_xy[LTOP]; 415 sl->left_mb_xy[LBOT] = left_xy[LBOT]; 416 //FIXME do we need all in the context? 417 418 sl->topleft_type = h->cur_pic.mb_type[topleft_xy]; 419 sl->top_type = h->cur_pic.mb_type[top_xy]; 420 sl->topright_type = h->cur_pic.mb_type[topright_xy]; 421 sl->left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]]; 422 sl->left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]]; 423 424 if (FMO) { 425 if (h->slice_table[topleft_xy] != sl->slice_num) 426 sl->topleft_type = 0; 427 if (h->slice_table[top_xy] != sl->slice_num) 428 sl->top_type = 0; 429 if (h->slice_table[left_xy[LTOP]] != sl->slice_num) 430 sl->left_type[LTOP] = sl->left_type[LBOT] = 0; 431 } else { 432 if (h->slice_table[topleft_xy] != sl->slice_num) { 433 sl->topleft_type = 0; 434 if (h->slice_table[top_xy] != sl->slice_num) 435 sl->top_type = 0; 436 if (h->slice_table[left_xy[LTOP]] != sl->slice_num) 437 sl->left_type[LTOP] = sl->left_type[LBOT] = 0; 438 } 439 } 440 if (h->slice_table[topright_xy] != sl->slice_num) 441 sl->topright_type = 0; 442} 443 444static void fill_decode_caches(const H264Context *h, H264SliceContext *sl, int mb_type) 445{ 446 int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; 447 int topleft_type, top_type, topright_type, left_type[LEFT_MBS]; 448 const uint8_t *left_block = sl->left_block; 449 int i; 450 uint8_t *nnz; 451 uint8_t *nnz_cache; 452 453 topleft_xy = sl->topleft_mb_xy; 454 top_xy = sl->top_mb_xy; 455 topright_xy = sl->topright_mb_xy; 456 left_xy[LTOP] = sl->left_mb_xy[LTOP]; 457 left_xy[LBOT] = sl->left_mb_xy[LBOT]; 458 topleft_type = sl->topleft_type; 459 top_type = sl->top_type; 460 topright_type = sl->topright_type; 461 left_type[LTOP] = sl->left_type[LTOP]; 462 left_type[LBOT] = sl->left_type[LBOT]; 463 464 if (!IS_SKIP(mb_type)) { 465 if (IS_INTRA(mb_type)) { 466 int type_mask = h->ps.pps->constrained_intra_pred ? IS_INTRA(-1) : -1; 467 sl->topleft_samples_available = 468 sl->top_samples_available = 469 sl->left_samples_available = 0xFFFF; 470 sl->topright_samples_available = 0xEEEA; 471 472 if (!(top_type & type_mask)) { 473 sl->topleft_samples_available = 0xB3FF; 474 sl->top_samples_available = 0x33FF; 475 sl->topright_samples_available = 0x26EA; 476 } 477 if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) { 478 if (IS_INTERLACED(mb_type)) { 479 if (!(left_type[LTOP] & type_mask)) { 480 sl->topleft_samples_available &= 0xDFFF; 481 sl->left_samples_available &= 0x5FFF; 482 } 483 if (!(left_type[LBOT] & type_mask)) { 484 sl->topleft_samples_available &= 0xFF5F; 485 sl->left_samples_available &= 0xFF5F; 486 } 487 } else { 488 int left_typei = h->cur_pic.mb_type[left_xy[LTOP] + h->mb_stride]; 489 490 av_assert2(left_xy[LTOP] == left_xy[LBOT]); 491 if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) { 492 sl->topleft_samples_available &= 0xDF5F; 493 sl->left_samples_available &= 0x5F5F; 494 } 495 } 496 } else { 497 if (!(left_type[LTOP] & type_mask)) { 498 sl->topleft_samples_available &= 0xDF5F; 499 sl->left_samples_available &= 0x5F5F; 500 } 501 } 502 503 if (!(topleft_type & type_mask)) 504 sl->topleft_samples_available &= 0x7FFF; 505 506 if (!(topright_type & type_mask)) 507 sl->topright_samples_available &= 0xFBFF; 508 509 if (IS_INTRA4x4(mb_type)) { 510 if (IS_INTRA4x4(top_type)) { 511 AV_COPY32(sl->intra4x4_pred_mode_cache + 4 + 8 * 0, sl->intra4x4_pred_mode + h->mb2br_xy[top_xy]); 512 } else { 513 sl->intra4x4_pred_mode_cache[4 + 8 * 0] = 514 sl->intra4x4_pred_mode_cache[5 + 8 * 0] = 515 sl->intra4x4_pred_mode_cache[6 + 8 * 0] = 516 sl->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask); 517 } 518 for (i = 0; i < 2; i++) { 519 if (IS_INTRA4x4(left_type[LEFT(i)])) { 520 int8_t *mode = sl->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; 521 sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]]; 522 sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]]; 523 } else { 524 sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = 525 sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask); 526 } 527 } 528 } 529 } 530 531 /* 532 * 0 . T T. T T T T 533 * 1 L . .L . . . . 534 * 2 L . .L . . . . 535 * 3 . T TL . . . . 536 * 4 L . .L . . . . 537 * 5 L . .. . . . . 538 */ 539 /* FIXME: constraint_intra_pred & partitioning & nnz 540 * (let us hope this is just a typo in the spec) */ 541 nnz_cache = sl->non_zero_count_cache; 542 if (top_type) { 543 nnz = h->non_zero_count[top_xy]; 544 AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]); 545 if (!h->chroma_y_shift) { 546 AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 7]); 547 AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]); 548 } else { 549 AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 5]); 550 AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]); 551 } 552 } else { 553 uint32_t top_empty = CABAC(h) && !IS_INTRA(mb_type) ? 0 : 0x40404040; 554 AV_WN32A(&nnz_cache[4 + 8 * 0], top_empty); 555 AV_WN32A(&nnz_cache[4 + 8 * 5], top_empty); 556 AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty); 557 } 558 559 for (i = 0; i < 2; i++) { 560 if (left_type[LEFT(i)]) { 561 nnz = h->non_zero_count[left_xy[LEFT(i)]]; 562 nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]]; 563 nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]]; 564 if (CHROMA444(h)) { 565 nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4]; 566 nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4]; 567 nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4]; 568 nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4]; 569 } else if (CHROMA422(h)) { 570 nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4]; 571 nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4]; 572 nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4]; 573 nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4]; 574 } else { 575 nnz_cache[3 + 8 * 6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]]; 576 nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]]; 577 } 578 } else { 579 nnz_cache[3 + 8 * 1 + 2 * 8 * i] = 580 nnz_cache[3 + 8 * 2 + 2 * 8 * i] = 581 nnz_cache[3 + 8 * 6 + 2 * 8 * i] = 582 nnz_cache[3 + 8 * 7 + 2 * 8 * i] = 583 nnz_cache[3 + 8 * 11 + 2 * 8 * i] = 584 nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC(h) && !IS_INTRA(mb_type) ? 0 : 64; 585 } 586 } 587 588 if (CABAC(h)) { 589 // top_cbp 590 if (top_type) 591 sl->top_cbp = h->cbp_table[top_xy]; 592 else 593 sl->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; 594 // left_cbp 595 if (left_type[LTOP]) { 596 sl->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) | 597 ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) | 598 (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2); 599 } else { 600 sl->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; 601 } 602 } 603 } 604 605 if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && sl->direct_spatial_mv_pred)) { 606 int list; 607 int b_stride = h->b_stride; 608 for (list = 0; list < sl->list_count; list++) { 609 int8_t *ref_cache = &sl->ref_cache[list][scan8[0]]; 610 int8_t *ref = h->cur_pic.ref_index[list]; 611 int16_t(*mv_cache)[2] = &sl->mv_cache[list][scan8[0]]; 612 int16_t(*mv)[2] = h->cur_pic.motion_val[list]; 613 if (!USES_LIST(mb_type, list)) 614 continue; 615 av_assert2(!(IS_DIRECT(mb_type) && !sl->direct_spatial_mv_pred)); 616 617 if (USES_LIST(top_type, list)) { 618 const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; 619 AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]); 620 ref_cache[0 - 1 * 8] = 621 ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2]; 622 ref_cache[2 - 1 * 8] = 623 ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3]; 624 } else { 625 AV_ZERO128(mv_cache[0 - 1 * 8]); 626 AV_WN32A(&ref_cache[0 - 1 * 8], 627 ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u); 628 } 629 630 if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) { 631 for (i = 0; i < 2; i++) { 632 int cache_idx = -1 + i * 2 * 8; 633 if (USES_LIST(left_type[LEFT(i)], list)) { 634 const int b_xy = h->mb2b_xy[left_xy[LEFT(i)]] + 3; 635 const int b8_xy = 4 * left_xy[LEFT(i)] + 1; 636 AV_COPY32(mv_cache[cache_idx], 637 mv[b_xy + b_stride * left_block[0 + i * 2]]); 638 AV_COPY32(mv_cache[cache_idx + 8], 639 mv[b_xy + b_stride * left_block[1 + i * 2]]); 640 ref_cache[cache_idx] = ref[b8_xy + (left_block[0 + i * 2] & ~1)]; 641 ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)]; 642 } else { 643 AV_ZERO32(mv_cache[cache_idx]); 644 AV_ZERO32(mv_cache[cache_idx + 8]); 645 ref_cache[cache_idx] = 646 ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED 647 : PART_NOT_AVAILABLE; 648 } 649 } 650 } else { 651 if (USES_LIST(left_type[LTOP], list)) { 652 const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; 653 const int b8_xy = 4 * left_xy[LTOP] + 1; 654 AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]); 655 ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)]; 656 } else { 657 AV_ZERO32(mv_cache[-1]); 658 ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED 659 : PART_NOT_AVAILABLE; 660 } 661 } 662 663 if (USES_LIST(topright_type, list)) { 664 const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride; 665 AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]); 666 ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2]; 667 } else { 668 AV_ZERO32(mv_cache[4 - 1 * 8]); 669 ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED 670 : PART_NOT_AVAILABLE; 671 } 672 if(ref_cache[2 - 1*8] < 0 || ref_cache[4 - 1 * 8] < 0) { 673 if (USES_LIST(topleft_type, list)) { 674 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + 675 (sl->topleft_partition & 2 * b_stride); 676 const int b8_xy = 4 * topleft_xy + 1 + (sl->topleft_partition & 2); 677 AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]); 678 ref_cache[-1 - 1 * 8] = ref[b8_xy]; 679 } else { 680 AV_ZERO32(mv_cache[-1 - 1 * 8]); 681 ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED 682 : PART_NOT_AVAILABLE; 683 } 684 } 685 686 if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF(h)) 687 continue; 688 689 if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) { 690 uint8_t(*mvd_cache)[2] = &sl->mvd_cache[list][scan8[0]]; 691 uint8_t(*mvd)[2] = sl->mvd_table[list]; 692 ref_cache[2 + 8 * 0] = 693 ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE; 694 AV_ZERO32(mv_cache[2 + 8 * 0]); 695 AV_ZERO32(mv_cache[2 + 8 * 2]); 696 697 if (CABAC(h)) { 698 if (USES_LIST(top_type, list)) { 699 const int b_xy = h->mb2br_xy[top_xy]; 700 AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]); 701 } else { 702 AV_ZERO64(mvd_cache[0 - 1 * 8]); 703 } 704 if (USES_LIST(left_type[LTOP], list)) { 705 const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6; 706 AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]); 707 AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]); 708 } else { 709 AV_ZERO16(mvd_cache[-1 + 0 * 8]); 710 AV_ZERO16(mvd_cache[-1 + 1 * 8]); 711 } 712 if (USES_LIST(left_type[LBOT], list)) { 713 const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6; 714 AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]); 715 AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]); 716 } else { 717 AV_ZERO16(mvd_cache[-1 + 2 * 8]); 718 AV_ZERO16(mvd_cache[-1 + 3 * 8]); 719 } 720 AV_ZERO16(mvd_cache[2 + 8 * 0]); 721 AV_ZERO16(mvd_cache[2 + 8 * 2]); 722 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { 723 uint8_t *direct_cache = &sl->direct_cache[scan8[0]]; 724 uint8_t *direct_table = h->direct_table; 725 fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1); 726 727 if (IS_DIRECT(top_type)) { 728 AV_WN32A(&direct_cache[-1 * 8], 729 0x01010101u * (MB_TYPE_DIRECT2 >> 1)); 730 } else if (IS_8X8(top_type)) { 731 int b8_xy = 4 * top_xy; 732 direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2]; 733 direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3]; 734 } else { 735 AV_WN32A(&direct_cache[-1 * 8], 736 0x01010101 * (MB_TYPE_16x16 >> 1)); 737 } 738 739 if (IS_DIRECT(left_type[LTOP])) 740 direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1; 741 else if (IS_8X8(left_type[LTOP])) 742 direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)]; 743 else 744 direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1; 745 746 if (IS_DIRECT(left_type[LBOT])) 747 direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1; 748 else if (IS_8X8(left_type[LBOT])) 749 direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)]; 750 else 751 direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1; 752 } 753 } 754 } 755 756#define MAP_MVS \ 757 MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \ 758 MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \ 759 MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \ 760 MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \ 761 MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \ 762 MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \ 763 MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \ 764 MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \ 765 MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \ 766 MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT]) 767 768 if (FRAME_MBAFF(h)) { 769 if (MB_FIELD(sl)) { 770 771#define MAP_F2F(idx, mb_type) \ 772 if (!IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) { \ 773 sl->ref_cache[list][idx] *= 2; \ 774 sl->mv_cache[list][idx][1] /= 2; \ 775 sl->mvd_cache[list][idx][1] >>= 1; \ 776 } 777 778 MAP_MVS 779 } else { 780 781#undef MAP_F2F 782#define MAP_F2F(idx, mb_type) \ 783 if (IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) { \ 784 sl->ref_cache[list][idx] >>= 1; \ 785 sl->mv_cache[list][idx][1] *= 2; \ 786 sl->mvd_cache[list][idx][1] <<= 1; \ 787 } 788 789 MAP_MVS 790#undef MAP_F2F 791 } 792 } 793 } 794 } 795 796 sl->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); 797} 798 799/** 800 * decodes a P_SKIP or B_SKIP macroblock 801 */ 802static void av_unused decode_mb_skip(const H264Context *h, H264SliceContext *sl) 803{ 804 const int mb_xy = sl->mb_xy; 805 int mb_type = 0; 806 807 memset(h->non_zero_count[mb_xy], 0, 48); 808 809 if (MB_FIELD(sl)) 810 mb_type |= MB_TYPE_INTERLACED; 811 812 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { 813 // just for fill_caches. pred_direct_motion will set the real mb_type 814 mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP; 815 if (sl->direct_spatial_mv_pred) { 816 fill_decode_neighbors(h, sl, mb_type); 817 fill_decode_caches(h, sl, mb_type); //FIXME check what is needed and what not ... 818 } 819 ff_h264_pred_direct_motion(h, sl, &mb_type); 820 mb_type |= MB_TYPE_SKIP; 821 } else { 822 mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP; 823 824 fill_decode_neighbors(h, sl, mb_type); 825 pred_pskip_motion(h, sl); 826 } 827 828 write_back_motion(h, sl, mb_type); 829 h->cur_pic.mb_type[mb_xy] = mb_type; 830 h->cur_pic.qscale_table[mb_xy] = sl->qscale; 831 h->slice_table[mb_xy] = sl->slice_num; 832 sl->prev_mb_skipped = 1; 833} 834 835#endif /* AVCODEC_H264_MVPRED_H */ 836