1/* 2 * VP7/VP8 compatible video decoder 3 * 4 * Copyright (C) 2010 David Conrad 5 * Copyright (C) 2010 Ronald S. Bultje 6 * Copyright (C) 2010 Fiona Glaser 7 * Copyright (C) 2012 Daniel Kang 8 * Copyright (C) 2014 Peter Ross 9 * 10 * This file is part of FFmpeg. 11 * 12 * FFmpeg is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2.1 of the License, or (at your option) any later version. 16 * 17 * FFmpeg is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with FFmpeg; if not, write to the Free Software 24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25 */ 26 27#include "config_components.h" 28 29#include "libavutil/imgutils.h" 30#include "libavutil/mem_internal.h" 31 32#include "avcodec.h" 33#include "codec_internal.h" 34#include "hwconfig.h" 35#include "internal.h" 36#include "mathops.h" 37#include "rectangle.h" 38#include "thread.h" 39#include "threadframe.h" 40#include "vp8.h" 41#include "vp8data.h" 42 43#if ARCH_ARM 44# include "arm/vp8.h" 45#endif 46 47#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER 48#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f) 49#elif CONFIG_VP7_DECODER 50#define VPX(vp7, f) vp7_ ## f 51#else // CONFIG_VP8_DECODER 52#define VPX(vp7, f) vp8_ ## f 53#endif 54 55static void free_buffers(VP8Context *s) 56{ 57 int i; 58 if (s->thread_data) 59 for (i = 0; i < MAX_THREADS; i++) { 60#if HAVE_THREADS 61 pthread_cond_destroy(&s->thread_data[i].cond); 62 pthread_mutex_destroy(&s->thread_data[i].lock); 63#endif 64 av_freep(&s->thread_data[i].filter_strength); 65 } 66 av_freep(&s->thread_data); 67 av_freep(&s->macroblocks_base); 68 av_freep(&s->intra4x4_pred_mode_top); 69 av_freep(&s->top_nnz); 70 av_freep(&s->top_border); 71 72 s->macroblocks = NULL; 73} 74 75static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) 76{ 77 int ret; 78 if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf, 79 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0) 80 return ret; 81 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) 82 goto fail; 83 if (s->avctx->hwaccel) { 84 const AVHWAccel *hwaccel = s->avctx->hwaccel; 85 if (hwaccel->frame_priv_data_size) { 86 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); 87 if (!f->hwaccel_priv_buf) 88 goto fail; 89 f->hwaccel_picture_private = f->hwaccel_priv_buf->data; 90 } 91 } 92 return 0; 93 94fail: 95 av_buffer_unref(&f->seg_map); 96 ff_thread_release_ext_buffer(s->avctx, &f->tf); 97 return AVERROR(ENOMEM); 98} 99 100static void vp8_release_frame(VP8Context *s, VP8Frame *f) 101{ 102 av_buffer_unref(&f->seg_map); 103 av_buffer_unref(&f->hwaccel_priv_buf); 104 f->hwaccel_picture_private = NULL; 105 ff_thread_release_ext_buffer(s->avctx, &f->tf); 106} 107 108#if CONFIG_VP8_DECODER 109static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src) 110{ 111 int ret; 112 113 vp8_release_frame(s, dst); 114 115 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) 116 return ret; 117 if (src->seg_map && 118 !(dst->seg_map = av_buffer_ref(src->seg_map))) { 119 vp8_release_frame(s, dst); 120 return AVERROR(ENOMEM); 121 } 122 if (src->hwaccel_picture_private) { 123 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); 124 if (!dst->hwaccel_priv_buf) 125 return AVERROR(ENOMEM); 126 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; 127 } 128 129 return 0; 130} 131#endif /* CONFIG_VP8_DECODER */ 132 133static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) 134{ 135 VP8Context *s = avctx->priv_data; 136 int i; 137 138 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) 139 vp8_release_frame(s, &s->frames[i]); 140 memset(s->framep, 0, sizeof(s->framep)); 141 142 if (free_mem) 143 free_buffers(s); 144} 145 146static void vp8_decode_flush(AVCodecContext *avctx) 147{ 148 vp8_decode_flush_impl(avctx, 0); 149} 150 151static VP8Frame *vp8_find_free_buffer(VP8Context *s) 152{ 153 VP8Frame *frame = NULL; 154 int i; 155 156 // find a free buffer 157 for (i = 0; i < 5; i++) 158 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] && 159 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && 160 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && 161 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { 162 frame = &s->frames[i]; 163 break; 164 } 165 if (i == 5) { 166 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); 167 abort(); 168 } 169 if (frame->tf.f->buf[0]) 170 vp8_release_frame(s, frame); 171 172 return frame; 173} 174 175static enum AVPixelFormat get_pixel_format(VP8Context *s) 176{ 177 enum AVPixelFormat pix_fmts[] = { 178#if CONFIG_VP8_VAAPI_HWACCEL 179 AV_PIX_FMT_VAAPI, 180#endif 181#if CONFIG_VP8_NVDEC_HWACCEL 182 AV_PIX_FMT_CUDA, 183#endif 184 AV_PIX_FMT_YUV420P, 185 AV_PIX_FMT_NONE, 186 }; 187 188 return ff_get_format(s->avctx, pix_fmts); 189} 190 191static av_always_inline 192int update_dimensions(VP8Context *s, int width, int height, int is_vp7) 193{ 194 AVCodecContext *avctx = s->avctx; 195 int i, ret, dim_reset = 0; 196 197 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || 198 height != s->avctx->height) { 199 vp8_decode_flush_impl(s->avctx, 1); 200 201 ret = ff_set_dimensions(s->avctx, width, height); 202 if (ret < 0) 203 return ret; 204 205 dim_reset = (s->macroblocks_base != NULL); 206 } 207 208 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) && 209 !s->actually_webp && !is_vp7) { 210 s->pix_fmt = get_pixel_format(s); 211 if (s->pix_fmt < 0) 212 return AVERROR(EINVAL); 213 avctx->pix_fmt = s->pix_fmt; 214 } 215 216 s->mb_width = (s->avctx->coded_width + 15) / 16; 217 s->mb_height = (s->avctx->coded_height + 15) / 16; 218 219 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE && 220 avctx->thread_count > 1; 221 if (!s->mb_layout) { // Frame threading and one thread 222 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * 223 sizeof(*s->macroblocks)); 224 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); 225 } else // Sliced threading 226 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * 227 sizeof(*s->macroblocks)); 228 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); 229 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); 230 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); 231 232 if (!s->macroblocks_base || !s->top_nnz || !s->top_border || 233 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) { 234 free_buffers(s); 235 return AVERROR(ENOMEM); 236 } 237 238 for (i = 0; i < MAX_THREADS; i++) { 239 s->thread_data[i].filter_strength = 240 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); 241 if (!s->thread_data[i].filter_strength) { 242 free_buffers(s); 243 return AVERROR(ENOMEM); 244 } 245#if HAVE_THREADS 246 pthread_mutex_init(&s->thread_data[i].lock, NULL); 247 pthread_cond_init(&s->thread_data[i].cond, NULL); 248#endif 249 } 250 251 s->macroblocks = s->macroblocks_base + 1; 252 253 return 0; 254} 255 256static int vp7_update_dimensions(VP8Context *s, int width, int height) 257{ 258 return update_dimensions(s, width, height, IS_VP7); 259} 260 261static int vp8_update_dimensions(VP8Context *s, int width, int height) 262{ 263 return update_dimensions(s, width, height, IS_VP8); 264} 265 266 267static void parse_segment_info(VP8Context *s) 268{ 269 VP56RangeCoder *c = &s->c; 270 int i; 271 272 s->segmentation.update_map = vp8_rac_get(c); 273 s->segmentation.update_feature_data = vp8_rac_get(c); 274 275 if (s->segmentation.update_feature_data) { 276 s->segmentation.absolute_vals = vp8_rac_get(c); 277 278 for (i = 0; i < 4; i++) 279 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); 280 281 for (i = 0; i < 4; i++) 282 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); 283 } 284 if (s->segmentation.update_map) 285 for (i = 0; i < 3; i++) 286 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 287} 288 289static void update_lf_deltas(VP8Context *s) 290{ 291 VP56RangeCoder *c = &s->c; 292 int i; 293 294 for (i = 0; i < 4; i++) { 295 if (vp8_rac_get(c)) { 296 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6); 297 298 if (vp8_rac_get(c)) 299 s->lf_delta.ref[i] = -s->lf_delta.ref[i]; 300 } 301 } 302 303 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { 304 if (vp8_rac_get(c)) { 305 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6); 306 307 if (vp8_rac_get(c)) 308 s->lf_delta.mode[i] = -s->lf_delta.mode[i]; 309 } 310 } 311} 312 313static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 314{ 315 const uint8_t *sizes = buf; 316 int i; 317 int ret; 318 319 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); 320 321 buf += 3 * (s->num_coeff_partitions - 1); 322 buf_size -= 3 * (s->num_coeff_partitions - 1); 323 if (buf_size < 0) 324 return -1; 325 326 for (i = 0; i < s->num_coeff_partitions - 1; i++) { 327 int size = AV_RL24(sizes + 3 * i); 328 if (buf_size - size < 0) 329 return -1; 330 s->coeff_partition_size[i] = size; 331 332 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); 333 if (ret < 0) 334 return ret; 335 buf += size; 336 buf_size -= size; 337 } 338 339 s->coeff_partition_size[i] = buf_size; 340 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); 341 342 return 0; 343} 344 345static void vp7_get_quants(VP8Context *s) 346{ 347 VP56RangeCoder *c = &s->c; 348 349 int yac_qi = vp8_rac_get_uint(c, 7); 350 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; 351 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; 352 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; 353 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; 354 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; 355 356 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi]; 357 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi]; 358 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi]; 359 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi]; 360 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132); 361 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi]; 362} 363 364static void vp8_get_quants(VP8Context *s) 365{ 366 VP56RangeCoder *c = &s->c; 367 int i, base_qi; 368 369 s->quant.yac_qi = vp8_rac_get_uint(c, 7); 370 s->quant.ydc_delta = vp8_rac_get_sint(c, 4); 371 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4); 372 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4); 373 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4); 374 s->quant.uvac_delta = vp8_rac_get_sint(c, 4); 375 376 for (i = 0; i < 4; i++) { 377 if (s->segmentation.enabled) { 378 base_qi = s->segmentation.base_quant[i]; 379 if (!s->segmentation.absolute_vals) 380 base_qi += s->quant.yac_qi; 381 } else 382 base_qi = s->quant.yac_qi; 383 384 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)]; 385 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; 386 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2; 387 /* 101581>>16 is equivalent to 155/100 */ 388 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16; 389 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)]; 390 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)]; 391 392 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); 393 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); 394 } 395} 396 397/** 398 * Determine which buffers golden and altref should be updated with after this frame. 399 * The spec isn't clear here, so I'm going by my understanding of what libvpx does 400 * 401 * Intra frames update all 3 references 402 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set 403 * If the update (golden|altref) flag is set, it's updated with the current frame 404 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. 405 * If the flag is not set, the number read means: 406 * 0: no update 407 * 1: VP56_FRAME_PREVIOUS 408 * 2: update golden with altref, or update altref with golden 409 */ 410static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 411{ 412 VP56RangeCoder *c = &s->c; 413 414 if (update) 415 return VP56_FRAME_CURRENT; 416 417 switch (vp8_rac_get_uint(c, 2)) { 418 case 1: 419 return VP56_FRAME_PREVIOUS; 420 case 2: 421 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; 422 } 423 return VP56_FRAME_NONE; 424} 425 426static void vp78_reset_probability_tables(VP8Context *s) 427{ 428 int i, j; 429 for (i = 0; i < 4; i++) 430 for (j = 0; j < 16; j++) 431 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], 432 sizeof(s->prob->token[i][j])); 433} 434 435static void vp78_update_probability_tables(VP8Context *s) 436{ 437 VP56RangeCoder *c = &s->c; 438 int i, j, k, l, m; 439 440 for (i = 0; i < 4; i++) 441 for (j = 0; j < 8; j++) 442 for (k = 0; k < 3; k++) 443 for (l = 0; l < NUM_DCT_TOKENS-1; l++) 444 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { 445 int prob = vp8_rac_get_uint(c, 8); 446 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) 447 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; 448 } 449} 450 451#define VP7_MVC_SIZE 17 452#define VP8_MVC_SIZE 19 453 454static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, 455 int mvc_size) 456{ 457 VP56RangeCoder *c = &s->c; 458 int i, j; 459 460 if (vp8_rac_get(c)) 461 for (i = 0; i < 4; i++) 462 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); 463 if (vp8_rac_get(c)) 464 for (i = 0; i < 3; i++) 465 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); 466 467 // 17.2 MV probability update 468 for (i = 0; i < 2; i++) 469 for (j = 0; j < mvc_size; j++) 470 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) 471 s->prob->mvc[i][j] = vp8_rac_get_nn(c); 472} 473 474static void update_refs(VP8Context *s) 475{ 476 VP56RangeCoder *c = &s->c; 477 478 int update_golden = vp8_rac_get(c); 479 int update_altref = vp8_rac_get(c); 480 481 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 482 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 483} 484 485static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height) 486{ 487 int i, j; 488 489 for (j = 1; j < 3; j++) { 490 for (i = 0; i < height / 2; i++) 491 memcpy(dst->data[j] + i * dst->linesize[j], 492 src->data[j] + i * src->linesize[j], width / 2); 493 } 494} 495 496static void fade(uint8_t *dst, ptrdiff_t dst_linesize, 497 const uint8_t *src, ptrdiff_t src_linesize, 498 int width, int height, 499 int alpha, int beta) 500{ 501 int i, j; 502 for (j = 0; j < height; j++) { 503 const uint8_t *src2 = src + j * src_linesize; 504 uint8_t *dst2 = dst + j * dst_linesize; 505 for (i = 0; i < width; i++) { 506 uint8_t y = src2[i]; 507 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha); 508 } 509 } 510} 511 512static int vp7_fade_frame(VP8Context *s, int alpha, int beta) 513{ 514 int ret; 515 516 if (!s->keyframe && (alpha || beta)) { 517 int width = s->mb_width * 16; 518 int height = s->mb_height * 16; 519 AVFrame *src, *dst; 520 521 if (!s->framep[VP56_FRAME_PREVIOUS] || 522 !s->framep[VP56_FRAME_GOLDEN]) { 523 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); 524 return AVERROR_INVALIDDATA; 525 } 526 527 dst = 528 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f; 529 530 /* preserve the golden frame, write a new previous frame */ 531 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) { 532 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s); 533 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0) 534 return ret; 535 536 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f; 537 538 copy_chroma(dst, src, width, height); 539 } 540 541 fade(dst->data[0], dst->linesize[0], 542 src->data[0], src->linesize[0], 543 width, height, alpha, beta); 544 } 545 546 return 0; 547} 548 549static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 550{ 551 VP56RangeCoder *c = &s->c; 552 int part1_size, hscale, vscale, i, j, ret; 553 int width = s->avctx->width; 554 int height = s->avctx->height; 555 int alpha = 0; 556 int beta = 0; 557 558 if (buf_size < 4) { 559 return AVERROR_INVALIDDATA; 560 } 561 562 s->profile = (buf[0] >> 1) & 7; 563 if (s->profile > 1) { 564 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile); 565 return AVERROR_INVALIDDATA; 566 } 567 568 s->keyframe = !(buf[0] & 1); 569 s->invisible = 0; 570 part1_size = AV_RL24(buf) >> 4; 571 572 if (buf_size < 4 - s->profile + part1_size) { 573 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size); 574 return AVERROR_INVALIDDATA; 575 } 576 577 buf += 4 - s->profile; 578 buf_size -= 4 - s->profile; 579 580 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); 581 582 ret = ff_vp56_init_range_decoder(c, buf, part1_size); 583 if (ret < 0) 584 return ret; 585 buf += part1_size; 586 buf_size -= part1_size; 587 588 /* A. Dimension information (keyframes only) */ 589 if (s->keyframe) { 590 width = vp8_rac_get_uint(c, 12); 591 height = vp8_rac_get_uint(c, 12); 592 hscale = vp8_rac_get_uint(c, 2); 593 vscale = vp8_rac_get_uint(c, 2); 594 if (hscale || vscale) 595 avpriv_request_sample(s->avctx, "Upscaling"); 596 597 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; 598 vp78_reset_probability_tables(s); 599 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, 600 sizeof(s->prob->pred16x16)); 601 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, 602 sizeof(s->prob->pred8x8c)); 603 for (i = 0; i < 2; i++) 604 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], 605 sizeof(vp7_mv_default_prob[i])); 606 memset(&s->segmentation, 0, sizeof(s->segmentation)); 607 memset(&s->lf_delta, 0, sizeof(s->lf_delta)); 608 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); 609 } 610 611 if (s->keyframe || s->profile > 0) 612 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred)); 613 614 /* B. Decoding information for all four macroblock-level features */ 615 for (i = 0; i < 4; i++) { 616 s->feature_enabled[i] = vp8_rac_get(c); 617 if (s->feature_enabled[i]) { 618 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8); 619 620 for (j = 0; j < 3; j++) 621 s->feature_index_prob[i][j] = 622 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 623 624 if (vp7_feature_value_size[s->profile][i]) 625 for (j = 0; j < 4; j++) 626 s->feature_value[i][j] = 627 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0; 628 } 629 } 630 631 s->segmentation.enabled = 0; 632 s->segmentation.update_map = 0; 633 s->lf_delta.enabled = 0; 634 635 s->num_coeff_partitions = 1; 636 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size); 637 if (ret < 0) 638 return ret; 639 640 if (!s->macroblocks_base || /* first frame */ 641 width != s->avctx->width || height != s->avctx->height || 642 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) { 643 if ((ret = vp7_update_dimensions(s, width, height)) < 0) 644 return ret; 645 } 646 647 /* C. Dequantization indices */ 648 vp7_get_quants(s); 649 650 /* D. Golden frame update flag (a Flag) for interframes only */ 651 if (!s->keyframe) { 652 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE; 653 s->sign_bias[VP56_FRAME_GOLDEN] = 0; 654 } 655 656 s->update_last = 1; 657 s->update_probabilities = 1; 658 s->fade_present = 1; 659 660 if (s->profile > 0) { 661 s->update_probabilities = vp8_rac_get(c); 662 if (!s->update_probabilities) 663 s->prob[1] = s->prob[0]; 664 665 if (!s->keyframe) 666 s->fade_present = vp8_rac_get(c); 667 } 668 669 if (vpX_rac_is_end(c)) 670 return AVERROR_INVALIDDATA; 671 /* E. Fading information for previous frame */ 672 if (s->fade_present && vp8_rac_get(c)) { 673 alpha = (int8_t) vp8_rac_get_uint(c, 8); 674 beta = (int8_t) vp8_rac_get_uint(c, 8); 675 } 676 677 /* F. Loop filter type */ 678 if (!s->profile) 679 s->filter.simple = vp8_rac_get(c); 680 681 /* G. DCT coefficient ordering specification */ 682 if (vp8_rac_get(c)) 683 for (i = 1; i < 16; i++) 684 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)]; 685 686 /* H. Loop filter levels */ 687 if (s->profile > 0) 688 s->filter.simple = vp8_rac_get(c); 689 s->filter.level = vp8_rac_get_uint(c, 6); 690 s->filter.sharpness = vp8_rac_get_uint(c, 3); 691 692 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */ 693 vp78_update_probability_tables(s); 694 695 s->mbskip_enabled = 0; 696 697 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */ 698 if (!s->keyframe) { 699 s->prob->intra = vp8_rac_get_uint(c, 8); 700 s->prob->last = vp8_rac_get_uint(c, 8); 701 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE); 702 } 703 704 if (vpX_rac_is_end(c)) 705 return AVERROR_INVALIDDATA; 706 707 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0) 708 return ret; 709 710 return 0; 711} 712 713static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 714{ 715 VP56RangeCoder *c = &s->c; 716 int header_size, hscale, vscale, ret; 717 int width = s->avctx->width; 718 int height = s->avctx->height; 719 720 if (buf_size < 3) { 721 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size); 722 return AVERROR_INVALIDDATA; 723 } 724 725 s->keyframe = !(buf[0] & 1); 726 s->profile = (buf[0]>>1) & 7; 727 s->invisible = !(buf[0] & 0x10); 728 header_size = AV_RL24(buf) >> 5; 729 buf += 3; 730 buf_size -= 3; 731 732 s->header_partition_size = header_size; 733 734 if (s->profile > 3) 735 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); 736 737 if (!s->profile) 738 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, 739 sizeof(s->put_pixels_tab)); 740 else // profile 1-3 use bilinear, 4+ aren't defined so whatever 741 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, 742 sizeof(s->put_pixels_tab)); 743 744 if (header_size > buf_size - 7 * s->keyframe) { 745 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); 746 return AVERROR_INVALIDDATA; 747 } 748 749 if (s->keyframe) { 750 if (AV_RL24(buf) != 0x2a019d) { 751 av_log(s->avctx, AV_LOG_ERROR, 752 "Invalid start code 0x%x\n", AV_RL24(buf)); 753 return AVERROR_INVALIDDATA; 754 } 755 width = AV_RL16(buf + 3) & 0x3fff; 756 height = AV_RL16(buf + 5) & 0x3fff; 757 hscale = buf[4] >> 6; 758 vscale = buf[6] >> 6; 759 buf += 7; 760 buf_size -= 7; 761 762 if (hscale || vscale) 763 avpriv_request_sample(s->avctx, "Upscaling"); 764 765 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; 766 vp78_reset_probability_tables(s); 767 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, 768 sizeof(s->prob->pred16x16)); 769 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, 770 sizeof(s->prob->pred8x8c)); 771 memcpy(s->prob->mvc, vp8_mv_default_prob, 772 sizeof(s->prob->mvc)); 773 memset(&s->segmentation, 0, sizeof(s->segmentation)); 774 memset(&s->lf_delta, 0, sizeof(s->lf_delta)); 775 } 776 777 ret = ff_vp56_init_range_decoder(c, buf, header_size); 778 if (ret < 0) 779 return ret; 780 buf += header_size; 781 buf_size -= header_size; 782 783 if (s->keyframe) { 784 s->colorspace = vp8_rac_get(c); 785 if (s->colorspace) 786 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); 787 s->fullrange = vp8_rac_get(c); 788 } 789 790 if ((s->segmentation.enabled = vp8_rac_get(c))) 791 parse_segment_info(s); 792 else 793 s->segmentation.update_map = 0; // FIXME: move this to some init function? 794 795 s->filter.simple = vp8_rac_get(c); 796 s->filter.level = vp8_rac_get_uint(c, 6); 797 s->filter.sharpness = vp8_rac_get_uint(c, 3); 798 799 if ((s->lf_delta.enabled = vp8_rac_get(c))) { 800 s->lf_delta.update = vp8_rac_get(c); 801 if (s->lf_delta.update) 802 update_lf_deltas(s); 803 } 804 805 if (setup_partitions(s, buf, buf_size)) { 806 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); 807 return AVERROR_INVALIDDATA; 808 } 809 810 if (!s->macroblocks_base || /* first frame */ 811 width != s->avctx->width || height != s->avctx->height || 812 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) 813 if ((ret = vp8_update_dimensions(s, width, height)) < 0) 814 return ret; 815 816 vp8_get_quants(s); 817 818 if (!s->keyframe) { 819 update_refs(s); 820 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 821 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); 822 } 823 824 // if we aren't saving this frame's probabilities for future frames, 825 // make a copy of the current probabilities 826 if (!(s->update_probabilities = vp8_rac_get(c))) 827 s->prob[1] = s->prob[0]; 828 829 s->update_last = s->keyframe || vp8_rac_get(c); 830 831 vp78_update_probability_tables(s); 832 833 if ((s->mbskip_enabled = vp8_rac_get(c))) 834 s->prob->mbskip = vp8_rac_get_uint(c, 8); 835 836 if (!s->keyframe) { 837 s->prob->intra = vp8_rac_get_uint(c, 8); 838 s->prob->last = vp8_rac_get_uint(c, 8); 839 s->prob->golden = vp8_rac_get_uint(c, 8); 840 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); 841 } 842 843 // Record the entropy coder state here so that hwaccels can use it. 844 s->c.code_word = vp56_rac_renorm(&s->c); 845 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8); 846 s->coder_state_at_header_end.range = s->c.high; 847 s->coder_state_at_header_end.value = s->c.code_word >> 16; 848 s->coder_state_at_header_end.bit_count = -s->c.bits % 8; 849 850 return 0; 851} 852 853static av_always_inline 854void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src) 855{ 856 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), 857 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); 858 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX), 859 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX)); 860} 861 862/** 863 * Motion vector coding, 17.1. 864 */ 865static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7) 866{ 867 int bit, x = 0; 868 869 if (vp56_rac_get_prob_branchy(c, p[0])) { 870 int i; 871 872 for (i = 0; i < 3; i++) 873 x += vp56_rac_get_prob(c, p[9 + i]) << i; 874 for (i = (vp7 ? 7 : 9); i > 3; i--) 875 x += vp56_rac_get_prob(c, p[9 + i]) << i; 876 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12])) 877 x += 8; 878 } else { 879 // small_mvtree 880 const uint8_t *ps = p + 2; 881 bit = vp56_rac_get_prob(c, *ps); 882 ps += 1 + 3 * bit; 883 x += 4 * bit; 884 bit = vp56_rac_get_prob(c, *ps); 885 ps += 1 + bit; 886 x += 2 * bit; 887 x += vp56_rac_get_prob(c, *ps); 888 } 889 890 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; 891} 892 893static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p) 894{ 895 return read_mv_component(c, p, 1); 896} 897 898static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p) 899{ 900 return read_mv_component(c, p, 0); 901} 902 903static av_always_inline 904const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7) 905{ 906 if (is_vp7) 907 return vp7_submv_prob; 908 909 if (left == top) 910 return vp8_submv_prob[4 - !!left]; 911 if (!top) 912 return vp8_submv_prob[2]; 913 return vp8_submv_prob[1 - !!left]; 914} 915 916/** 917 * Split motion vector prediction, 16.4. 918 * @returns the number of motion vectors parsed (2, 4 or 16) 919 */ 920static av_always_inline 921int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 922 int layout, int is_vp7) 923{ 924 int part_idx; 925 int n, num; 926 VP8Macroblock *top_mb; 927 VP8Macroblock *left_mb = &mb[-1]; 928 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; 929 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; 930 VP56mv *top_mv; 931 VP56mv *left_mv = left_mb->bmv; 932 VP56mv *cur_mv = mb->bmv; 933 934 if (!layout) // layout is inlined, s->mb_layout is not 935 top_mb = &mb[2]; 936 else 937 top_mb = &mb[-s->mb_width - 1]; 938 mbsplits_top = vp8_mbsplits[top_mb->partitioning]; 939 top_mv = top_mb->bmv; 940 941 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { 942 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) 943 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); 944 else 945 part_idx = VP8_SPLITMVMODE_8x8; 946 } else { 947 part_idx = VP8_SPLITMVMODE_4x4; 948 } 949 950 num = vp8_mbsplit_count[part_idx]; 951 mbsplits_cur = vp8_mbsplits[part_idx], 952 firstidx = vp8_mbfirstidx[part_idx]; 953 mb->partitioning = part_idx; 954 955 for (n = 0; n < num; n++) { 956 int k = firstidx[n]; 957 uint32_t left, above; 958 const uint8_t *submv_prob; 959 960 if (!(k & 3)) 961 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); 962 else 963 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); 964 if (k <= 3) 965 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); 966 else 967 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); 968 969 submv_prob = get_submv_prob(left, above, is_vp7); 970 971 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { 972 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { 973 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { 974 mb->bmv[n].y = mb->mv.y + 975 read_mv_component(c, s->prob->mvc[0], is_vp7); 976 mb->bmv[n].x = mb->mv.x + 977 read_mv_component(c, s->prob->mvc[1], is_vp7); 978 } else { 979 AV_ZERO32(&mb->bmv[n]); 980 } 981 } else { 982 AV_WN32A(&mb->bmv[n], above); 983 } 984 } else { 985 AV_WN32A(&mb->bmv[n], left); 986 } 987 } 988 989 return num; 990} 991 992/** 993 * The vp7 reference decoder uses a padding macroblock column (added to right 994 * edge of the frame) to guard against illegal macroblock offsets. The 995 * algorithm has bugs that permit offsets to straddle the padding column. 996 * This function replicates those bugs. 997 * 998 * @param[out] edge_x macroblock x address 999 * @param[out] edge_y macroblock y address 1000 * 1001 * @return macroblock offset legal (boolean) 1002 */ 1003static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, 1004 int xoffset, int yoffset, int boundary, 1005 int *edge_x, int *edge_y) 1006{ 1007 int vwidth = mb_width + 1; 1008 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset; 1009 if (new < boundary || new % vwidth == vwidth - 1) 1010 return 0; 1011 *edge_y = new / vwidth; 1012 *edge_x = new % vwidth; 1013 return 1; 1014} 1015 1016static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock) 1017{ 1018 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0]; 1019} 1020 1021static av_always_inline 1022void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, 1023 int mb_x, int mb_y, int layout) 1024{ 1025 VP8Macroblock *mb_edge[12]; 1026 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR }; 1027 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; 1028 int idx = CNT_ZERO; 1029 VP56mv near_mv[3]; 1030 uint8_t cnt[3] = { 0 }; 1031 VP56RangeCoder *c = &s->c; 1032 int i; 1033 1034 AV_ZERO32(&near_mv[0]); 1035 AV_ZERO32(&near_mv[1]); 1036 AV_ZERO32(&near_mv[2]); 1037 1038 for (i = 0; i < VP7_MV_PRED_COUNT; i++) { 1039 const VP7MVPred * pred = &vp7_mv_pred[i]; 1040 int edge_x, edge_y; 1041 1042 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, 1043 pred->yoffset, !s->profile, &edge_x, &edge_y)) { 1044 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1) 1045 ? s->macroblocks_base + 1 + edge_x + 1046 (s->mb_width + 1) * (edge_y + 1) 1047 : s->macroblocks + edge_x + 1048 (s->mb_height - edge_y - 1) * 2; 1049 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock)); 1050 if (mv) { 1051 if (AV_RN32A(&near_mv[CNT_NEAREST])) { 1052 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) { 1053 idx = CNT_NEAREST; 1054 } else if (AV_RN32A(&near_mv[CNT_NEAR])) { 1055 if (mv != AV_RN32A(&near_mv[CNT_NEAR])) 1056 continue; 1057 idx = CNT_NEAR; 1058 } else { 1059 AV_WN32A(&near_mv[CNT_NEAR], mv); 1060 idx = CNT_NEAR; 1061 } 1062 } else { 1063 AV_WN32A(&near_mv[CNT_NEAREST], mv); 1064 idx = CNT_NEAREST; 1065 } 1066 } else { 1067 idx = CNT_ZERO; 1068 } 1069 } else { 1070 idx = CNT_ZERO; 1071 } 1072 cnt[idx] += vp7_mv_pred[i].score; 1073 } 1074 1075 mb->partitioning = VP8_SPLITMVMODE_NONE; 1076 1077 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) { 1078 mb->mode = VP8_MVMODE_MV; 1079 1080 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) { 1081 1082 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) { 1083 1084 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR]) 1085 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST])); 1086 else 1087 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR])); 1088 1089 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) { 1090 mb->mode = VP8_MVMODE_SPLIT; 1091 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1]; 1092 } else { 1093 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]); 1094 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]); 1095 mb->bmv[0] = mb->mv; 1096 } 1097 } else { 1098 mb->mv = near_mv[CNT_NEAR]; 1099 mb->bmv[0] = mb->mv; 1100 } 1101 } else { 1102 mb->mv = near_mv[CNT_NEAREST]; 1103 mb->bmv[0] = mb->mv; 1104 } 1105 } else { 1106 mb->mode = VP8_MVMODE_ZERO; 1107 AV_ZERO32(&mb->mv); 1108 mb->bmv[0] = mb->mv; 1109 } 1110} 1111 1112static av_always_inline 1113void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb, 1114 int mb_x, int mb_y, int layout) 1115{ 1116 VP8Macroblock *mb_edge[3] = { 0 /* top */, 1117 mb - 1 /* left */, 1118 0 /* top-left */ }; 1119 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; 1120 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; 1121 int idx = CNT_ZERO; 1122 int cur_sign_bias = s->sign_bias[mb->ref_frame]; 1123 int8_t *sign_bias = s->sign_bias; 1124 VP56mv near_mv[4]; 1125 uint8_t cnt[4] = { 0 }; 1126 VP56RangeCoder *c = &s->c; 1127 1128 if (!layout) { // layout is inlined (s->mb_layout is not) 1129 mb_edge[0] = mb + 2; 1130 mb_edge[2] = mb + 1; 1131 } else { 1132 mb_edge[0] = mb - s->mb_width - 1; 1133 mb_edge[2] = mb - s->mb_width - 2; 1134 } 1135 1136 AV_ZERO32(&near_mv[0]); 1137 AV_ZERO32(&near_mv[1]); 1138 AV_ZERO32(&near_mv[2]); 1139 1140 /* Process MB on top, left and top-left */ 1141#define MV_EDGE_CHECK(n) \ 1142 { \ 1143 VP8Macroblock *edge = mb_edge[n]; \ 1144 int edge_ref = edge->ref_frame; \ 1145 if (edge_ref != VP56_FRAME_CURRENT) { \ 1146 uint32_t mv = AV_RN32A(&edge->mv); \ 1147 if (mv) { \ 1148 if (cur_sign_bias != sign_bias[edge_ref]) { \ 1149 /* SWAR negate of the values in mv. */ \ 1150 mv = ~mv; \ 1151 mv = ((mv & 0x7fff7fff) + \ 1152 0x00010001) ^ (mv & 0x80008000); \ 1153 } \ 1154 if (!n || mv != AV_RN32A(&near_mv[idx])) \ 1155 AV_WN32A(&near_mv[++idx], mv); \ 1156 cnt[idx] += 1 + (n != 2); \ 1157 } else \ 1158 cnt[CNT_ZERO] += 1 + (n != 2); \ 1159 } \ 1160 } 1161 1162 MV_EDGE_CHECK(0) 1163 MV_EDGE_CHECK(1) 1164 MV_EDGE_CHECK(2) 1165 1166 mb->partitioning = VP8_SPLITMVMODE_NONE; 1167 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { 1168 mb->mode = VP8_MVMODE_MV; 1169 1170 /* If we have three distinct MVs, merge first and last if they're the same */ 1171 if (cnt[CNT_SPLITMV] && 1172 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) 1173 cnt[CNT_NEAREST] += 1; 1174 1175 /* Swap near and nearest if necessary */ 1176 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { 1177 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 1178 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 1179 } 1180 1181 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { 1182 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { 1183 /* Choose the best mv out of 0,0 and the nearest mv */ 1184 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); 1185 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + 1186 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + 1187 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); 1188 1189 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { 1190 mb->mode = VP8_MVMODE_SPLIT; 1191 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1]; 1192 } else { 1193 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]); 1194 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]); 1195 mb->bmv[0] = mb->mv; 1196 } 1197 } else { 1198 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]); 1199 mb->bmv[0] = mb->mv; 1200 } 1201 } else { 1202 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]); 1203 mb->bmv[0] = mb->mv; 1204 } 1205 } else { 1206 mb->mode = VP8_MVMODE_ZERO; 1207 AV_ZERO32(&mb->mv); 1208 mb->bmv[0] = mb->mv; 1209 } 1210} 1211 1212static av_always_inline 1213void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 1214 int mb_x, int keyframe, int layout) 1215{ 1216 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; 1217 1218 if (layout) { 1219 VP8Macroblock *mb_top = mb - s->mb_width - 1; 1220 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); 1221 } 1222 if (keyframe) { 1223 int x, y; 1224 uint8_t *top; 1225 uint8_t *const left = s->intra4x4_pred_mode_left; 1226 if (layout) 1227 top = mb->intra4x4_pred_mode_top; 1228 else 1229 top = s->intra4x4_pred_mode_top + 4 * mb_x; 1230 for (y = 0; y < 4; y++) { 1231 for (x = 0; x < 4; x++) { 1232 const uint8_t *ctx; 1233 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 1234 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 1235 left[y] = top[x] = *intra4x4; 1236 intra4x4++; 1237 } 1238 } 1239 } else { 1240 int i; 1241 for (i = 0; i < 16; i++) 1242 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, 1243 vp8_pred4x4_prob_inter); 1244 } 1245} 1246 1247static av_always_inline 1248void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds, 1249 VP8Macroblock *mb, int mb_x, int mb_y, 1250 uint8_t *segment, uint8_t *ref, int layout, int is_vp7) 1251{ 1252 VP56RangeCoder *c = &s->c; 1253 static const char * const vp7_feature_name[] = { "q-index", 1254 "lf-delta", 1255 "partial-golden-update", 1256 "blit-pitch" }; 1257 if (is_vp7) { 1258 int i; 1259 *segment = 0; 1260 for (i = 0; i < 4; i++) { 1261 if (s->feature_enabled[i]) { 1262 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) { 1263 int index = vp8_rac_get_tree(c, vp7_feature_index_tree, 1264 s->feature_index_prob[i]); 1265 av_log(s->avctx, AV_LOG_WARNING, 1266 "Feature %s present in macroblock (value 0x%x)\n", 1267 vp7_feature_name[i], s->feature_value[i][index]); 1268 } 1269 } 1270 } 1271 } else if (s->segmentation.update_map) { 1272 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]); 1273 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; 1274 } else if (s->segmentation.enabled) 1275 *segment = ref ? *ref : *segment; 1276 mb->segment = *segment; 1277 1278 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; 1279 1280 if (s->keyframe) { 1281 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, 1282 vp8_pred16x16_prob_intra); 1283 1284 if (mb->mode == MODE_I4x4) { 1285 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); 1286 } else { 1287 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode 1288 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u; 1289 if (s->mb_layout) 1290 AV_WN32A(mb->intra4x4_pred_mode_top, modes); 1291 else 1292 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); 1293 AV_WN32A(s->intra4x4_pred_mode_left, modes); 1294 } 1295 1296 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, 1297 vp8_pred8x8c_prob_intra); 1298 mb->ref_frame = VP56_FRAME_CURRENT; 1299 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { 1300 // inter MB, 16.2 1301 if (vp56_rac_get_prob_branchy(c, s->prob->last)) 1302 mb->ref_frame = 1303 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */ 1304 : VP56_FRAME_GOLDEN; 1305 else 1306 mb->ref_frame = VP56_FRAME_PREVIOUS; 1307 s->ref_count[mb->ref_frame - 1]++; 1308 1309 // motion vectors, 16.3 1310 if (is_vp7) 1311 vp7_decode_mvs(s, mb, mb_x, mb_y, layout); 1312 else 1313 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout); 1314 } else { 1315 // intra MB, 16.1 1316 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); 1317 1318 if (mb->mode == MODE_I4x4) 1319 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); 1320 1321 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, 1322 s->prob->pred8x8c); 1323 mb->ref_frame = VP56_FRAME_CURRENT; 1324 mb->partitioning = VP8_SPLITMVMODE_NONE; 1325 AV_ZERO32(&mb->bmv[0]); 1326 } 1327} 1328 1329/** 1330 * @param r arithmetic bitstream reader context 1331 * @param block destination for block coefficients 1332 * @param probs probabilities to use when reading trees from the bitstream 1333 * @param i initial coeff index, 0 unless a separate DC block is coded 1334 * @param qmul array holding the dc/ac dequant factor at position 0/1 1335 * 1336 * @return 0 if no coeffs were decoded 1337 * otherwise, the index of the last coeff decoded plus one 1338 */ 1339static av_always_inline 1340int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], 1341 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1342 int i, uint8_t *token_prob, int16_t qmul[2], 1343 const uint8_t scan[16], int vp7) 1344{ 1345 VP56RangeCoder c = *r; 1346 goto skip_eob; 1347 do { 1348 int coeff; 1349restart: 1350 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB 1351 break; 1352 1353skip_eob: 1354 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 1355 if (++i == 16) 1356 break; // invalid input; blocks should end with EOB 1357 token_prob = probs[i][0]; 1358 if (vp7) 1359 goto restart; 1360 goto skip_eob; 1361 } 1362 1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 1364 coeff = 1; 1365 token_prob = probs[i + 1][1]; 1366 } else { 1367 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 1368 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]); 1369 if (coeff) 1370 coeff += vp56_rac_get_prob(&c, token_prob[5]); 1371 coeff += 2; 1372 } else { 1373 // DCT_CAT* 1374 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) { 1375 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 1376 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); 1377 } else { // DCT_CAT2 1378 coeff = 7; 1379 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; 1380 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]); 1381 } 1382 } else { // DCT_CAT3 and up 1383 int a = vp56_rac_get_prob(&c, token_prob[8]); 1384 int b = vp56_rac_get_prob(&c, token_prob[9 + a]); 1385 int cat = (a << 1) + b; 1386 coeff = 3 + (8 << cat); 1387 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); 1388 } 1389 } 1390 token_prob = probs[i + 1][2]; 1391 } 1392 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; 1393 } while (++i < 16); 1394 1395 *r = c; 1396 return i; 1397} 1398 1399static av_always_inline 1400int inter_predict_dc(int16_t block[16], int16_t pred[2]) 1401{ 1402 int16_t dc = block[0]; 1403 int ret = 0; 1404 1405 if (pred[1] > 3) { 1406 dc += pred[0]; 1407 ret = 1; 1408 } 1409 1410 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) { 1411 block[0] = pred[0] = dc; 1412 pred[1] = 0; 1413 } else { 1414 if (pred[0] == dc) 1415 pred[1]++; 1416 block[0] = pred[0] = dc; 1417 } 1418 1419 return ret; 1420} 1421 1422static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r, 1423 int16_t block[16], 1424 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1425 int i, uint8_t *token_prob, 1426 int16_t qmul[2], 1427 const uint8_t scan[16]) 1428{ 1429 return decode_block_coeffs_internal(r, block, probs, i, 1430 token_prob, qmul, scan, IS_VP7); 1431} 1432 1433#ifndef vp8_decode_block_coeffs_internal 1434static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r, 1435 int16_t block[16], 1436 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1437 int i, uint8_t *token_prob, 1438 int16_t qmul[2]) 1439{ 1440 return decode_block_coeffs_internal(r, block, probs, i, 1441 token_prob, qmul, ff_zigzag_scan, IS_VP8); 1442} 1443#endif 1444 1445/** 1446 * @param c arithmetic bitstream reader context 1447 * @param block destination for block coefficients 1448 * @param probs probabilities to use when reading trees from the bitstream 1449 * @param i initial coeff index, 0 unless a separate DC block is coded 1450 * @param zero_nhood the initial prediction context for number of surrounding 1451 * all-zero blocks (only left/top, so 0-2) 1452 * @param qmul array holding the dc/ac dequant factor at position 0/1 1453 * @param scan scan pattern (VP7 only) 1454 * 1455 * @return 0 if no coeffs were decoded 1456 * otherwise, the index of the last coeff decoded plus one 1457 */ 1458static av_always_inline 1459int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], 1460 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1461 int i, int zero_nhood, int16_t qmul[2], 1462 const uint8_t scan[16], int vp7) 1463{ 1464 uint8_t *token_prob = probs[i][zero_nhood]; 1465 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 1466 return 0; 1467 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, 1468 token_prob, qmul, scan) 1469 : vp8_decode_block_coeffs_internal(c, block, probs, i, 1470 token_prob, qmul); 1471} 1472 1473static av_always_inline 1474void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, 1475 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], 1476 int is_vp7) 1477{ 1478 int i, x, y, luma_start = 0, luma_ctx = 3; 1479 int nnz_pred, nnz, nnz_total = 0; 1480 int segment = mb->segment; 1481 int block_dc = 0; 1482 1483 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) { 1484 nnz_pred = t_nnz[8] + l_nnz[8]; 1485 1486 // decode DC values and do hadamard 1487 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, 1488 nnz_pred, s->qmat[segment].luma_dc_qmul, 1489 ff_zigzag_scan, is_vp7); 1490 l_nnz[8] = t_nnz[8] = !!nnz; 1491 1492 if (is_vp7 && mb->mode > MODE_I4x4) { 1493 nnz |= inter_predict_dc(td->block_dc, 1494 s->inter_dc_pred[mb->ref_frame - 1]); 1495 } 1496 1497 if (nnz) { 1498 nnz_total += nnz; 1499 block_dc = 1; 1500 if (nnz == 1) 1501 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); 1502 else 1503 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); 1504 } 1505 luma_start = 1; 1506 luma_ctx = 0; 1507 } 1508 1509 // luma blocks 1510 for (y = 0; y < 4; y++) 1511 for (x = 0; x < 4; x++) { 1512 nnz_pred = l_nnz[y] + t_nnz[x]; 1513 nnz = decode_block_coeffs(c, td->block[y][x], 1514 s->prob->token[luma_ctx], 1515 luma_start, nnz_pred, 1516 s->qmat[segment].luma_qmul, 1517 s->prob[0].scan, is_vp7); 1518 /* nnz+block_dc may be one more than the actual last index, 1519 * but we don't care */ 1520 td->non_zero_count_cache[y][x] = nnz + block_dc; 1521 t_nnz[x] = l_nnz[y] = !!nnz; 1522 nnz_total += nnz; 1523 } 1524 1525 // chroma blocks 1526 // TODO: what to do about dimensions? 2nd dim for luma is x, 1527 // but for chroma it's (y<<1)|x 1528 for (i = 4; i < 6; i++) 1529 for (y = 0; y < 2; y++) 1530 for (x = 0; x < 2; x++) { 1531 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; 1532 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], 1533 s->prob->token[2], 0, nnz_pred, 1534 s->qmat[segment].chroma_qmul, 1535 s->prob[0].scan, is_vp7); 1536 td->non_zero_count_cache[i][(y << 1) + x] = nnz; 1537 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; 1538 nnz_total += nnz; 1539 } 1540 1541 // if there were no coded coeffs despite the macroblock not being marked skip, 1542 // we MUST not do the inner loop filter and should not do IDCT 1543 // Since skip isn't used for bitstream prediction, just manually set it. 1544 if (!nnz_total) 1545 mb->skip = 1; 1546} 1547 1548static av_always_inline 1549void backup_mb_border(uint8_t *top_border, uint8_t *src_y, 1550 uint8_t *src_cb, uint8_t *src_cr, 1551 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple) 1552{ 1553 AV_COPY128(top_border, src_y + 15 * linesize); 1554 if (!simple) { 1555 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); 1556 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); 1557 } 1558} 1559 1560static av_always_inline 1561void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, 1562 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x, 1563 int mb_y, int mb_width, int simple, int xchg) 1564{ 1565 uint8_t *top_border_m1 = top_border - 32; // for TL prediction 1566 src_y -= linesize; 1567 src_cb -= uvlinesize; 1568 src_cr -= uvlinesize; 1569 1570#define XCHG(a, b, xchg) \ 1571 do { \ 1572 if (xchg) \ 1573 AV_SWAP64(b, a); \ 1574 else \ 1575 AV_COPY64(b, a); \ 1576 } while (0) 1577 1578 XCHG(top_border_m1 + 8, src_y - 8, xchg); 1579 XCHG(top_border, src_y, xchg); 1580 XCHG(top_border + 8, src_y + 8, 1); 1581 if (mb_x < mb_width - 1) 1582 XCHG(top_border + 32, src_y + 16, 1); 1583 1584 // only copy chroma for normal loop filter 1585 // or to initialize the top row to 127 1586 if (!simple || !mb_y) { 1587 XCHG(top_border_m1 + 16, src_cb - 8, xchg); 1588 XCHG(top_border_m1 + 24, src_cr - 8, xchg); 1589 XCHG(top_border + 16, src_cb, 1); 1590 XCHG(top_border + 24, src_cr, 1); 1591 } 1592} 1593 1594static av_always_inline 1595int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) 1596{ 1597 if (!mb_x) 1598 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 1599 else 1600 return mb_y ? mode : LEFT_DC_PRED8x8; 1601} 1602 1603static av_always_inline 1604int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7) 1605{ 1606 if (!mb_x) 1607 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8); 1608 else 1609 return mb_y ? mode : HOR_PRED8x8; 1610} 1611 1612static av_always_inline 1613int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7) 1614{ 1615 switch (mode) { 1616 case DC_PRED8x8: 1617 return check_dc_pred8x8_mode(mode, mb_x, mb_y); 1618 case VERT_PRED8x8: 1619 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode; 1620 case HOR_PRED8x8: 1621 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode; 1622 case PLANE_PRED8x8: /* TM */ 1623 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7); 1624 } 1625 return mode; 1626} 1627 1628static av_always_inline 1629int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7) 1630{ 1631 if (!mb_x) { 1632 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED); 1633 } else { 1634 return mb_y ? mode : HOR_VP8_PRED; 1635 } 1636} 1637 1638static av_always_inline 1639int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, 1640 int *copy_buf, int vp7) 1641{ 1642 switch (mode) { 1643 case VERT_PRED: 1644 if (!mb_x && mb_y) { 1645 *copy_buf = 1; 1646 return mode; 1647 } 1648 /* fall-through */ 1649 case DIAG_DOWN_LEFT_PRED: 1650 case VERT_LEFT_PRED: 1651 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode; 1652 case HOR_PRED: 1653 if (!mb_y) { 1654 *copy_buf = 1; 1655 return mode; 1656 } 1657 /* fall-through */ 1658 case HOR_UP_PRED: 1659 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode; 1660 case TM_VP8_PRED: 1661 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7); 1662 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions 1663 * as 16x16/8x8 DC */ 1664 case DIAG_DOWN_RIGHT_PRED: 1665 case VERT_RIGHT_PRED: 1666 case HOR_DOWN_PRED: 1667 if (!mb_y || !mb_x) 1668 *copy_buf = 1; 1669 return mode; 1670 } 1671 return mode; 1672} 1673 1674static av_always_inline 1675void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], 1676 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7) 1677{ 1678 int x, y, mode, nnz; 1679 uint32_t tr; 1680 1681 /* for the first row, we need to run xchg_mb_border to init the top edge 1682 * to 127 otherwise, skip it if we aren't going to deblock */ 1683 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) 1684 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], 1685 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 1686 s->filter.simple, 1); 1687 1688 if (mb->mode < MODE_I4x4) { 1689 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7); 1690 s->hpc.pred16x16[mode](dst[0], s->linesize); 1691 } else { 1692 uint8_t *ptr = dst[0]; 1693 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; 1694 const uint8_t lo = is_vp7 ? 128 : 127; 1695 const uint8_t hi = is_vp7 ? 128 : 129; 1696 uint8_t tr_top[4] = { lo, lo, lo, lo }; 1697 1698 // all blocks on the right edge of the macroblock use bottom edge 1699 // the top macroblock for their topright edge 1700 uint8_t *tr_right = ptr - s->linesize + 16; 1701 1702 // if we're on the right edge of the frame, said edge is extended 1703 // from the top macroblock 1704 if (mb_y && mb_x == s->mb_width - 1) { 1705 tr = tr_right[-1] * 0x01010101u; 1706 tr_right = (uint8_t *) &tr; 1707 } 1708 1709 if (mb->skip) 1710 AV_ZERO128(td->non_zero_count_cache); 1711 1712 for (y = 0; y < 4; y++) { 1713 uint8_t *topright = ptr + 4 - s->linesize; 1714 for (x = 0; x < 4; x++) { 1715 int copy = 0; 1716 ptrdiff_t linesize = s->linesize; 1717 uint8_t *dst = ptr + 4 * x; 1718 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]); 1719 1720 if ((y == 0 || x == 3) && mb_y == 0) { 1721 topright = tr_top; 1722 } else if (x == 3) 1723 topright = tr_right; 1724 1725 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, 1726 mb_y + y, ©, is_vp7); 1727 if (copy) { 1728 dst = copy_dst + 12; 1729 linesize = 8; 1730 if (!(mb_y + y)) { 1731 copy_dst[3] = lo; 1732 AV_WN32A(copy_dst + 4, lo * 0x01010101U); 1733 } else { 1734 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); 1735 if (!(mb_x + x)) { 1736 copy_dst[3] = hi; 1737 } else { 1738 copy_dst[3] = ptr[4 * x - s->linesize - 1]; 1739 } 1740 } 1741 if (!(mb_x + x)) { 1742 copy_dst[11] = 1743 copy_dst[19] = 1744 copy_dst[27] = 1745 copy_dst[35] = hi; 1746 } else { 1747 copy_dst[11] = ptr[4 * x - 1]; 1748 copy_dst[19] = ptr[4 * x + s->linesize - 1]; 1749 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; 1750 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; 1751 } 1752 } 1753 s->hpc.pred4x4[mode](dst, topright, linesize); 1754 if (copy) { 1755 AV_COPY32(ptr + 4 * x, copy_dst + 12); 1756 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); 1757 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); 1758 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); 1759 } 1760 1761 nnz = td->non_zero_count_cache[y][x]; 1762 if (nnz) { 1763 if (nnz == 1) 1764 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, 1765 td->block[y][x], s->linesize); 1766 else 1767 s->vp8dsp.vp8_idct_add(ptr + 4 * x, 1768 td->block[y][x], s->linesize); 1769 } 1770 topright += 4; 1771 } 1772 1773 ptr += 4 * s->linesize; 1774 intra4x4 += 4; 1775 } 1776 } 1777 1778 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, 1779 mb_x, mb_y, is_vp7); 1780 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); 1781 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); 1782 1783 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) 1784 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], 1785 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 1786 s->filter.simple, 0); 1787} 1788 1789static const uint8_t subpel_idx[3][8] = { 1790 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, 1791 // also function pointer index 1792 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required 1793 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels 1794}; 1795 1796/** 1797 * luma MC function 1798 * 1799 * @param s VP8 decoding context 1800 * @param dst target buffer for block data at block position 1801 * @param ref reference picture buffer at origin (0, 0) 1802 * @param mv motion vector (relative to block position) to get pixel data from 1803 * @param x_off horizontal position of block from origin (0, 0) 1804 * @param y_off vertical position of block from origin (0, 0) 1805 * @param block_w width of block (16, 8 or 4) 1806 * @param block_h height of block (always same as block_w) 1807 * @param width width of src/dst plane data 1808 * @param height height of src/dst plane data 1809 * @param linesize size of a single line of plane data, including padding 1810 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1811 */ 1812static av_always_inline 1813void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, 1814 ThreadFrame *ref, const VP56mv *mv, 1815 int x_off, int y_off, int block_w, int block_h, 1816 int width, int height, ptrdiff_t linesize, 1817 vp8_mc_func mc_func[3][3]) 1818{ 1819 uint8_t *src = ref->f->data[0]; 1820 1821 if (AV_RN32A(mv)) { 1822 ptrdiff_t src_linesize = linesize; 1823 1824 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx]; 1825 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my]; 1826 1827 x_off += mv->x >> 2; 1828 y_off += mv->y >> 2; 1829 1830 // edge emulation 1831 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0); 1832 src += y_off * linesize + x_off; 1833 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1834 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1835 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1836 src - my_idx * linesize - mx_idx, 1837 EDGE_EMU_LINESIZE, linesize, 1838 block_w + subpel_idx[1][mx], 1839 block_h + subpel_idx[1][my], 1840 x_off - mx_idx, y_off - my_idx, 1841 width, height); 1842 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1843 src_linesize = EDGE_EMU_LINESIZE; 1844 } 1845 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); 1846 } else { 1847 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); 1848 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, 1849 linesize, block_h, 0, 0); 1850 } 1851} 1852 1853/** 1854 * chroma MC function 1855 * 1856 * @param s VP8 decoding context 1857 * @param dst1 target buffer for block data at block position (U plane) 1858 * @param dst2 target buffer for block data at block position (V plane) 1859 * @param ref reference picture buffer at origin (0, 0) 1860 * @param mv motion vector (relative to block position) to get pixel data from 1861 * @param x_off horizontal position of block from origin (0, 0) 1862 * @param y_off vertical position of block from origin (0, 0) 1863 * @param block_w width of block (16, 8 or 4) 1864 * @param block_h height of block (always same as block_w) 1865 * @param width width of src/dst plane data 1866 * @param height height of src/dst plane data 1867 * @param linesize size of a single line of plane data, including padding 1868 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1869 */ 1870static av_always_inline 1871void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, 1872 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv, 1873 int x_off, int y_off, int block_w, int block_h, 1874 int width, int height, ptrdiff_t linesize, 1875 vp8_mc_func mc_func[3][3]) 1876{ 1877 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; 1878 1879 if (AV_RN32A(mv)) { 1880 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; 1881 int my = mv->y & 7, my_idx = subpel_idx[0][my]; 1882 1883 x_off += mv->x >> 3; 1884 y_off += mv->y >> 3; 1885 1886 // edge emulation 1887 src1 += y_off * linesize + x_off; 1888 src2 += y_off * linesize + x_off; 1889 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); 1890 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1891 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1892 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1893 src1 - my_idx * linesize - mx_idx, 1894 EDGE_EMU_LINESIZE, linesize, 1895 block_w + subpel_idx[1][mx], 1896 block_h + subpel_idx[1][my], 1897 x_off - mx_idx, y_off - my_idx, width, height); 1898 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1899 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my); 1900 1901 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1902 src2 - my_idx * linesize - mx_idx, 1903 EDGE_EMU_LINESIZE, linesize, 1904 block_w + subpel_idx[1][mx], 1905 block_h + subpel_idx[1][my], 1906 x_off - mx_idx, y_off - my_idx, width, height); 1907 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1908 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); 1909 } else { 1910 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); 1911 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); 1912 } 1913 } else { 1914 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0); 1915 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1916 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1917 } 1918} 1919 1920static av_always_inline 1921void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], 1922 ThreadFrame *ref_frame, int x_off, int y_off, 1923 int bx_off, int by_off, int block_w, int block_h, 1924 int width, int height, VP56mv *mv) 1925{ 1926 VP56mv uvmv = *mv; 1927 1928 /* Y */ 1929 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, 1930 ref_frame, mv, x_off + bx_off, y_off + by_off, 1931 block_w, block_h, width, height, s->linesize, 1932 s->put_pixels_tab[block_w == 8]); 1933 1934 /* U/V */ 1935 if (s->profile == 3) { 1936 /* this block only applies VP8; it is safe to check 1937 * only the profile, as VP7 profile <= 1 */ 1938 uvmv.x &= ~7; 1939 uvmv.y &= ~7; 1940 } 1941 x_off >>= 1; 1942 y_off >>= 1; 1943 bx_off >>= 1; 1944 by_off >>= 1; 1945 width >>= 1; 1946 height >>= 1; 1947 block_w >>= 1; 1948 block_h >>= 1; 1949 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, 1950 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, 1951 &uvmv, x_off + bx_off, y_off + by_off, 1952 block_w, block_h, width, height, s->uvlinesize, 1953 s->put_pixels_tab[1 + (block_w == 4)]); 1954} 1955 1956/* Fetch pixels for estimated mv 4 macroblocks ahead. 1957 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ 1958static av_always_inline 1959void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 1960 int mb_xy, int ref) 1961{ 1962 /* Don't prefetch refs that haven't been used very often this frame. */ 1963 if (s->ref_count[ref - 1] > (mb_xy >> 5)) { 1964 int x_off = mb_x << 4, y_off = mb_y << 4; 1965 int mx = (mb->mv.x >> 2) + x_off + 8; 1966 int my = (mb->mv.y >> 2) + y_off; 1967 uint8_t **src = s->framep[ref]->tf.f->data; 1968 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; 1969 /* For threading, a ff_thread_await_progress here might be useful, but 1970 * it actually slows down the decoder. Since a bad prefetch doesn't 1971 * generate bad decoder output, we don't run it here. */ 1972 s->vdsp.prefetch(src[0] + off, s->linesize, 4); 1973 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; 1974 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); 1975 } 1976} 1977 1978/** 1979 * Apply motion vectors to prediction buffer, chapter 18. 1980 */ 1981static av_always_inline 1982void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], 1983 VP8Macroblock *mb, int mb_x, int mb_y) 1984{ 1985 int x_off = mb_x << 4, y_off = mb_y << 4; 1986 int width = 16 * s->mb_width, height = 16 * s->mb_height; 1987 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf; 1988 VP56mv *bmv = mb->bmv; 1989 1990 switch (mb->partitioning) { 1991 case VP8_SPLITMVMODE_NONE: 1992 vp8_mc_part(s, td, dst, ref, x_off, y_off, 1993 0, 0, 16, 16, width, height, &mb->mv); 1994 break; 1995 case VP8_SPLITMVMODE_4x4: { 1996 int x, y; 1997 VP56mv uvmv; 1998 1999 /* Y */ 2000 for (y = 0; y < 4; y++) { 2001 for (x = 0; x < 4; x++) { 2002 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, 2003 ref, &bmv[4 * y + x], 2004 4 * x + x_off, 4 * y + y_off, 4, 4, 2005 width, height, s->linesize, 2006 s->put_pixels_tab[2]); 2007 } 2008 } 2009 2010 /* U/V */ 2011 x_off >>= 1; 2012 y_off >>= 1; 2013 width >>= 1; 2014 height >>= 1; 2015 for (y = 0; y < 2; y++) { 2016 for (x = 0; x < 2; x++) { 2017 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + 2018 mb->bmv[2 * y * 4 + 2 * x + 1].x + 2019 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + 2020 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; 2021 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + 2022 mb->bmv[2 * y * 4 + 2 * x + 1].y + 2023 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + 2024 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; 2025 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2; 2026 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2; 2027 if (s->profile == 3) { 2028 uvmv.x &= ~7; 2029 uvmv.y &= ~7; 2030 } 2031 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, 2032 dst[2] + 4 * y * s->uvlinesize + x * 4, ref, 2033 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, 2034 width, height, s->uvlinesize, 2035 s->put_pixels_tab[2]); 2036 } 2037 } 2038 break; 2039 } 2040 case VP8_SPLITMVMODE_16x8: 2041 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2042 0, 0, 16, 8, width, height, &bmv[0]); 2043 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2044 0, 8, 16, 8, width, height, &bmv[1]); 2045 break; 2046 case VP8_SPLITMVMODE_8x16: 2047 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2048 0, 0, 8, 16, width, height, &bmv[0]); 2049 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2050 8, 0, 8, 16, width, height, &bmv[1]); 2051 break; 2052 case VP8_SPLITMVMODE_8x8: 2053 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2054 0, 0, 8, 8, width, height, &bmv[0]); 2055 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2056 8, 0, 8, 8, width, height, &bmv[1]); 2057 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2058 0, 8, 8, 8, width, height, &bmv[2]); 2059 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2060 8, 8, 8, 8, width, height, &bmv[3]); 2061 break; 2062 } 2063} 2064 2065static av_always_inline 2066void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb) 2067{ 2068 int x, y, ch; 2069 2070 if (mb->mode != MODE_I4x4) { 2071 uint8_t *y_dst = dst[0]; 2072 for (y = 0; y < 4; y++) { 2073 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); 2074 if (nnz4) { 2075 if (nnz4 & ~0x01010101) { 2076 for (x = 0; x < 4; x++) { 2077 if ((uint8_t) nnz4 == 1) 2078 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, 2079 td->block[y][x], 2080 s->linesize); 2081 else if ((uint8_t) nnz4 > 1) 2082 s->vp8dsp.vp8_idct_add(y_dst + 4 * x, 2083 td->block[y][x], 2084 s->linesize); 2085 nnz4 >>= 8; 2086 if (!nnz4) 2087 break; 2088 } 2089 } else { 2090 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); 2091 } 2092 } 2093 y_dst += 4 * s->linesize; 2094 } 2095 } 2096 2097 for (ch = 0; ch < 2; ch++) { 2098 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); 2099 if (nnz4) { 2100 uint8_t *ch_dst = dst[1 + ch]; 2101 if (nnz4 & ~0x01010101) { 2102 for (y = 0; y < 2; y++) { 2103 for (x = 0; x < 2; x++) { 2104 if ((uint8_t) nnz4 == 1) 2105 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, 2106 td->block[4 + ch][(y << 1) + x], 2107 s->uvlinesize); 2108 else if ((uint8_t) nnz4 > 1) 2109 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, 2110 td->block[4 + ch][(y << 1) + x], 2111 s->uvlinesize); 2112 nnz4 >>= 8; 2113 if (!nnz4) 2114 goto chroma_idct_end; 2115 } 2116 ch_dst += 4 * s->uvlinesize; 2117 } 2118 } else { 2119 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); 2120 } 2121 } 2122chroma_idct_end: 2123 ; 2124 } 2125} 2126 2127static av_always_inline 2128void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, 2129 VP8FilterStrength *f, int is_vp7) 2130{ 2131 int interior_limit, filter_level; 2132 2133 if (s->segmentation.enabled) { 2134 filter_level = s->segmentation.filter_level[mb->segment]; 2135 if (!s->segmentation.absolute_vals) 2136 filter_level += s->filter.level; 2137 } else 2138 filter_level = s->filter.level; 2139 2140 if (s->lf_delta.enabled) { 2141 filter_level += s->lf_delta.ref[mb->ref_frame]; 2142 filter_level += s->lf_delta.mode[mb->mode]; 2143 } 2144 2145 filter_level = av_clip_uintp2(filter_level, 6); 2146 2147 interior_limit = filter_level; 2148 if (s->filter.sharpness) { 2149 interior_limit >>= (s->filter.sharpness + 3) >> 2; 2150 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); 2151 } 2152 interior_limit = FFMAX(interior_limit, 1); 2153 2154 f->filter_level = filter_level; 2155 f->inner_limit = interior_limit; 2156 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 || 2157 mb->mode == VP8_MVMODE_SPLIT; 2158} 2159 2160static av_always_inline 2161void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, 2162 int mb_x, int mb_y, int is_vp7) 2163{ 2164 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh; 2165 int filter_level = f->filter_level; 2166 int inner_limit = f->inner_limit; 2167 int inner_filter = f->inner_filter; 2168 ptrdiff_t linesize = s->linesize; 2169 ptrdiff_t uvlinesize = s->uvlinesize; 2170 static const uint8_t hev_thresh_lut[2][64] = { 2171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2174 3, 3, 3, 3 }, 2175 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2178 2, 2, 2, 2 } 2179 }; 2180 2181 if (!filter_level) 2182 return; 2183 2184 if (is_vp7) { 2185 bedge_lim_y = filter_level; 2186 bedge_lim_uv = filter_level * 2; 2187 mbedge_lim = filter_level + 2; 2188 } else { 2189 bedge_lim_y = 2190 bedge_lim_uv = filter_level * 2 + inner_limit; 2191 mbedge_lim = bedge_lim_y + 4; 2192 } 2193 2194 hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; 2195 2196 if (mb_x) { 2197 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, 2198 mbedge_lim, inner_limit, hev_thresh); 2199 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 2200 mbedge_lim, inner_limit, hev_thresh); 2201 } 2202 2203#define H_LOOP_FILTER_16Y_INNER(cond) \ 2204 if (cond && inner_filter) { \ 2205 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \ 2206 bedge_lim_y, inner_limit, \ 2207 hev_thresh); \ 2208 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \ 2209 bedge_lim_y, inner_limit, \ 2210 hev_thresh); \ 2211 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \ 2212 bedge_lim_y, inner_limit, \ 2213 hev_thresh); \ 2214 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \ 2215 uvlinesize, bedge_lim_uv, \ 2216 inner_limit, hev_thresh); \ 2217 } 2218 2219 H_LOOP_FILTER_16Y_INNER(!is_vp7) 2220 2221 if (mb_y) { 2222 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, 2223 mbedge_lim, inner_limit, hev_thresh); 2224 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 2225 mbedge_lim, inner_limit, hev_thresh); 2226 } 2227 2228 if (inner_filter) { 2229 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, 2230 linesize, bedge_lim_y, 2231 inner_limit, hev_thresh); 2232 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, 2233 linesize, bedge_lim_y, 2234 inner_limit, hev_thresh); 2235 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, 2236 linesize, bedge_lim_y, 2237 inner_limit, hev_thresh); 2238 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 2239 dst[2] + 4 * uvlinesize, 2240 uvlinesize, bedge_lim_uv, 2241 inner_limit, hev_thresh); 2242 } 2243 2244 H_LOOP_FILTER_16Y_INNER(is_vp7) 2245} 2246 2247static av_always_inline 2248void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, 2249 int mb_x, int mb_y) 2250{ 2251 int mbedge_lim, bedge_lim; 2252 int filter_level = f->filter_level; 2253 int inner_limit = f->inner_limit; 2254 int inner_filter = f->inner_filter; 2255 ptrdiff_t linesize = s->linesize; 2256 2257 if (!filter_level) 2258 return; 2259 2260 bedge_lim = 2 * filter_level + inner_limit; 2261 mbedge_lim = bedge_lim + 4; 2262 2263 if (mb_x) 2264 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 2265 if (inner_filter) { 2266 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); 2267 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); 2268 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); 2269 } 2270 2271 if (mb_y) 2272 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 2273 if (inner_filter) { 2274 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); 2275 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); 2276 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); 2277 } 2278} 2279 2280#define MARGIN (16 << 2) 2281static av_always_inline 2282int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, 2283 VP8Frame *prev_frame, int is_vp7) 2284{ 2285 VP8Context *s = avctx->priv_data; 2286 int mb_x, mb_y; 2287 2288 s->mv_bounds.mv_min.y = -MARGIN; 2289 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; 2290 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { 2291 VP8Macroblock *mb = s->macroblocks_base + 2292 ((s->mb_width + 1) * (mb_y + 1) + 1); 2293 int mb_xy = mb_y * s->mb_width; 2294 2295 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); 2296 2297 s->mv_bounds.mv_min.x = -MARGIN; 2298 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; 2299 2300 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 2301 if (vpX_rac_is_end(&s->c)) { 2302 return AVERROR_INVALIDDATA; 2303 } 2304 if (mb_y == 0) 2305 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, 2306 DC_PRED * 0x01010101); 2307 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, 2308 prev_frame && prev_frame->seg_map ? 2309 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7); 2310 s->mv_bounds.mv_min.x -= 64; 2311 s->mv_bounds.mv_max.x -= 64; 2312 } 2313 s->mv_bounds.mv_min.y -= 64; 2314 s->mv_bounds.mv_max.y -= 64; 2315 } 2316 return 0; 2317} 2318 2319static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, 2320 VP8Frame *prev_frame) 2321{ 2322 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7); 2323} 2324 2325static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, 2326 VP8Frame *prev_frame) 2327{ 2328 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8); 2329} 2330 2331#if HAVE_THREADS 2332#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ 2333 do { \ 2334 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ 2335 if (atomic_load(&otd->thread_mb_pos) < tmp) { \ 2336 pthread_mutex_lock(&otd->lock); \ 2337 atomic_store(&td->wait_mb_pos, tmp); \ 2338 do { \ 2339 if (atomic_load(&otd->thread_mb_pos) >= tmp) \ 2340 break; \ 2341 pthread_cond_wait(&otd->cond, &otd->lock); \ 2342 } while (1); \ 2343 atomic_store(&td->wait_mb_pos, INT_MAX); \ 2344 pthread_mutex_unlock(&otd->lock); \ 2345 } \ 2346 } while (0) 2347 2348#define update_pos(td, mb_y, mb_x) \ 2349 do { \ 2350 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ 2351 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ 2352 (num_jobs > 1); \ 2353 int is_null = !next_td || !prev_td; \ 2354 int pos_check = (is_null) ? 1 : \ 2355 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \ 2356 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \ 2357 atomic_store(&td->thread_mb_pos, pos); \ 2358 if (sliced_threading && pos_check) { \ 2359 pthread_mutex_lock(&td->lock); \ 2360 pthread_cond_broadcast(&td->cond); \ 2361 pthread_mutex_unlock(&td->lock); \ 2362 } \ 2363 } while (0) 2364#else 2365#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0) 2366#define update_pos(td, mb_y, mb_x) while(0) 2367#endif 2368 2369static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2370 int jobnr, int threadnr, int is_vp7) 2371{ 2372 VP8Context *s = avctx->priv_data; 2373 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; 2374 int mb_y = atomic_load(&td->thread_mb_pos) >> 16; 2375 int mb_x, mb_xy = mb_y * s->mb_width; 2376 int num_jobs = s->num_jobs; 2377 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame; 2378 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; 2379 VP8Macroblock *mb; 2380 uint8_t *dst[3] = { 2381 curframe->tf.f->data[0] + 16 * mb_y * s->linesize, 2382 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, 2383 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize 2384 }; 2385 2386 if (vpX_rac_is_end(c)) 2387 return AVERROR_INVALIDDATA; 2388 2389 if (mb_y == 0) 2390 prev_td = td; 2391 else 2392 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; 2393 if (mb_y == s->mb_height - 1) 2394 next_td = td; 2395 else 2396 next_td = &s->thread_data[(jobnr + 1) % num_jobs]; 2397 if (s->mb_layout == 1) 2398 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); 2399 else { 2400 // Make sure the previous frame has read its segmentation map, 2401 // if we re-use the same map. 2402 if (prev_frame && s->segmentation.enabled && 2403 !s->segmentation.update_map) 2404 ff_thread_await_progress(&prev_frame->tf, mb_y, 0); 2405 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; 2406 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock 2407 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); 2408 } 2409 2410 if (!is_vp7 || mb_y == 0) 2411 memset(td->left_nnz, 0, sizeof(td->left_nnz)); 2412 2413 td->mv_bounds.mv_min.x = -MARGIN; 2414 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; 2415 2416 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 2417 if (vpX_rac_is_end(c)) 2418 return AVERROR_INVALIDDATA; 2419 // Wait for previous thread to read mb_x+2, and reach mb_y-1. 2420 if (prev_td != td) { 2421 if (threadnr != 0) { 2422 check_thread_pos(td, prev_td, 2423 mb_x + (is_vp7 ? 2 : 1), 2424 mb_y - (is_vp7 ? 2 : 1)); 2425 } else { 2426 check_thread_pos(td, prev_td, 2427 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3, 2428 mb_y - (is_vp7 ? 2 : 1)); 2429 } 2430 } 2431 2432 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, 2433 s->linesize, 4); 2434 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, 2435 dst[2] - dst[1], 2); 2436 2437 if (!s->mb_layout) 2438 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, 2439 prev_frame && prev_frame->seg_map ? 2440 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7); 2441 2442 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 2443 2444 if (!mb->skip) 2445 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7); 2446 2447 if (mb->mode <= MODE_I4x4) 2448 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7); 2449 else 2450 inter_predict(s, td, dst, mb, mb_x, mb_y); 2451 2452 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 2453 2454 if (!mb->skip) { 2455 idct_mb(s, td, dst, mb); 2456 } else { 2457 AV_ZERO64(td->left_nnz); 2458 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned 2459 2460 /* Reset DC block predictors if they would exist 2461 * if the mb had coefficients */ 2462 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { 2463 td->left_nnz[8] = 0; 2464 s->top_nnz[mb_x][8] = 0; 2465 } 2466 } 2467 2468 if (s->deblock_filter) 2469 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7); 2470 2471 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { 2472 if (s->filter.simple) 2473 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2474 NULL, NULL, s->linesize, 0, 1); 2475 else 2476 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2477 dst[1], dst[2], s->linesize, s->uvlinesize, 0); 2478 } 2479 2480 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 2481 2482 dst[0] += 16; 2483 dst[1] += 8; 2484 dst[2] += 8; 2485 td->mv_bounds.mv_min.x -= 64; 2486 td->mv_bounds.mv_max.x -= 64; 2487 2488 if (mb_x == s->mb_width + 1) { 2489 update_pos(td, mb_y, s->mb_width + 3); 2490 } else { 2491 update_pos(td, mb_y, mb_x); 2492 } 2493 } 2494 return 0; 2495} 2496 2497static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2498 int jobnr, int threadnr) 2499{ 2500 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); 2501} 2502 2503static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2504 int jobnr, int threadnr) 2505{ 2506 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); 2507} 2508 2509static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, 2510 int jobnr, int threadnr, int is_vp7) 2511{ 2512 VP8Context *s = avctx->priv_data; 2513 VP8ThreadData *td = &s->thread_data[threadnr]; 2514 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs; 2515 AVFrame *curframe = s->curframe->tf.f; 2516 VP8Macroblock *mb; 2517 VP8ThreadData *prev_td, *next_td; 2518 uint8_t *dst[3] = { 2519 curframe->data[0] + 16 * mb_y * s->linesize, 2520 curframe->data[1] + 8 * mb_y * s->uvlinesize, 2521 curframe->data[2] + 8 * mb_y * s->uvlinesize 2522 }; 2523 2524 if (s->mb_layout == 1) 2525 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); 2526 else 2527 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; 2528 2529 if (mb_y == 0) 2530 prev_td = td; 2531 else 2532 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; 2533 if (mb_y == s->mb_height - 1) 2534 next_td = td; 2535 else 2536 next_td = &s->thread_data[(jobnr + 1) % num_jobs]; 2537 2538 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { 2539 VP8FilterStrength *f = &td->filter_strength[mb_x]; 2540 if (prev_td != td) 2541 check_thread_pos(td, prev_td, 2542 (mb_x + 1) + (s->mb_width + 3), mb_y - 1); 2543 if (next_td != td) 2544 if (next_td != &s->thread_data[0]) 2545 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); 2546 2547 if (num_jobs == 1) { 2548 if (s->filter.simple) 2549 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2550 NULL, NULL, s->linesize, 0, 1); 2551 else 2552 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2553 dst[1], dst[2], s->linesize, s->uvlinesize, 0); 2554 } 2555 2556 if (s->filter.simple) 2557 filter_mb_simple(s, dst[0], f, mb_x, mb_y); 2558 else 2559 filter_mb(s, dst, f, mb_x, mb_y, is_vp7); 2560 dst[0] += 16; 2561 dst[1] += 8; 2562 dst[2] += 8; 2563 2564 update_pos(td, mb_y, (s->mb_width + 3) + mb_x); 2565 } 2566} 2567 2568static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, 2569 int jobnr, int threadnr) 2570{ 2571 filter_mb_row(avctx, tdata, jobnr, threadnr, 1); 2572} 2573 2574static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, 2575 int jobnr, int threadnr) 2576{ 2577 filter_mb_row(avctx, tdata, jobnr, threadnr, 0); 2578} 2579 2580static av_always_inline 2581int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, 2582 int threadnr, int is_vp7) 2583{ 2584 VP8Context *s = avctx->priv_data; 2585 VP8ThreadData *td = &s->thread_data[jobnr]; 2586 VP8ThreadData *next_td = NULL, *prev_td = NULL; 2587 VP8Frame *curframe = s->curframe; 2588 int mb_y, num_jobs = s->num_jobs; 2589 int ret; 2590 2591 td->thread_nr = threadnr; 2592 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; 2593 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; 2594 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { 2595 atomic_store(&td->thread_mb_pos, mb_y << 16); 2596 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); 2597 if (ret < 0) { 2598 update_pos(td, s->mb_height, INT_MAX & 0xFFFF); 2599 return ret; 2600 } 2601 if (s->deblock_filter) 2602 s->filter_mb_row(avctx, tdata, jobnr, threadnr); 2603 update_pos(td, mb_y, INT_MAX & 0xFFFF); 2604 2605 td->mv_bounds.mv_min.y -= 64 * num_jobs; 2606 td->mv_bounds.mv_max.y -= 64 * num_jobs; 2607 2608 if (avctx->active_thread_type == FF_THREAD_FRAME) 2609 ff_thread_report_progress(&curframe->tf, mb_y, 0); 2610 } 2611 2612 return 0; 2613} 2614 2615static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, 2616 int jobnr, int threadnr) 2617{ 2618 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7); 2619} 2620 2621static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, 2622 int jobnr, int threadnr) 2623{ 2624 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); 2625} 2626 2627static av_always_inline 2628int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame, 2629 const AVPacket *avpkt, int is_vp7) 2630{ 2631 VP8Context *s = avctx->priv_data; 2632 int ret, i, referenced, num_jobs; 2633 enum AVDiscard skip_thresh; 2634 VP8Frame *av_uninit(curframe), *prev_frame; 2635 2636 if (is_vp7) 2637 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size); 2638 else 2639 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size); 2640 2641 if (ret < 0) 2642 goto err; 2643 2644 if (s->actually_webp) { 2645 // avctx->pix_fmt already set in caller. 2646 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { 2647 s->pix_fmt = get_pixel_format(s); 2648 if (s->pix_fmt < 0) { 2649 ret = AVERROR(EINVAL); 2650 goto err; 2651 } 2652 avctx->pix_fmt = s->pix_fmt; 2653 } 2654 2655 prev_frame = s->framep[VP56_FRAME_CURRENT]; 2656 2657 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT || 2658 s->update_altref == VP56_FRAME_CURRENT; 2659 2660 skip_thresh = !referenced ? AVDISCARD_NONREF 2661 : !s->keyframe ? AVDISCARD_NONKEY 2662 : AVDISCARD_ALL; 2663 2664 if (avctx->skip_frame >= skip_thresh) { 2665 s->invisible = 1; 2666 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); 2667 goto skip_decode; 2668 } 2669 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; 2670 2671 // release no longer referenced frames 2672 for (i = 0; i < 5; i++) 2673 if (s->frames[i].tf.f->buf[0] && 2674 &s->frames[i] != prev_frame && 2675 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && 2676 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && 2677 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) 2678 vp8_release_frame(s, &s->frames[i]); 2679 2680 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s); 2681 2682 if (!s->colorspace) 2683 avctx->colorspace = AVCOL_SPC_BT470BG; 2684 if (s->fullrange) 2685 avctx->color_range = AVCOL_RANGE_JPEG; 2686 else 2687 avctx->color_range = AVCOL_RANGE_MPEG; 2688 2689 /* Given that arithmetic probabilities are updated every frame, it's quite 2690 * likely that the values we have on a random interframe are complete 2691 * junk if we didn't start decode on a keyframe. So just don't display 2692 * anything rather than junk. */ 2693 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || 2694 !s->framep[VP56_FRAME_GOLDEN] || 2695 !s->framep[VP56_FRAME_GOLDEN2])) { 2696 av_log(avctx, AV_LOG_WARNING, 2697 "Discarding interframe without a prior keyframe!\n"); 2698 ret = AVERROR_INVALIDDATA; 2699 goto err; 2700 } 2701 2702 curframe->tf.f->key_frame = s->keyframe; 2703 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I 2704 : AV_PICTURE_TYPE_P; 2705 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0) 2706 goto err; 2707 2708 // check if golden and altref are swapped 2709 if (s->update_altref != VP56_FRAME_NONE) 2710 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; 2711 else 2712 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; 2713 2714 if (s->update_golden != VP56_FRAME_NONE) 2715 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; 2716 else 2717 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; 2718 2719 if (s->update_last) 2720 s->next_framep[VP56_FRAME_PREVIOUS] = curframe; 2721 else 2722 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; 2723 2724 s->next_framep[VP56_FRAME_CURRENT] = curframe; 2725 2726 if (ffcodec(avctx->codec)->update_thread_context) 2727 ff_thread_finish_setup(avctx); 2728 2729 if (avctx->hwaccel) { 2730 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size); 2731 if (ret < 0) 2732 goto err; 2733 2734 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); 2735 if (ret < 0) 2736 goto err; 2737 2738 ret = avctx->hwaccel->end_frame(avctx); 2739 if (ret < 0) 2740 goto err; 2741 2742 } else { 2743 s->linesize = curframe->tf.f->linesize[0]; 2744 s->uvlinesize = curframe->tf.f->linesize[1]; 2745 2746 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); 2747 /* Zero macroblock structures for top/top-left prediction 2748 * from outside the frame. */ 2749 if (!s->mb_layout) 2750 memset(s->macroblocks + s->mb_height * 2 - 1, 0, 2751 (s->mb_width + 1) * sizeof(*s->macroblocks)); 2752 if (!s->mb_layout && s->keyframe) 2753 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); 2754 2755 memset(s->ref_count, 0, sizeof(s->ref_count)); 2756 2757 if (s->mb_layout == 1) { 2758 // Make sure the previous frame has read its segmentation map, 2759 // if we re-use the same map. 2760 if (prev_frame && s->segmentation.enabled && 2761 !s->segmentation.update_map) 2762 ff_thread_await_progress(&prev_frame->tf, 1, 0); 2763 if (is_vp7) 2764 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); 2765 else 2766 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); 2767 if (ret < 0) 2768 goto err; 2769 } 2770 2771 if (avctx->active_thread_type == FF_THREAD_FRAME) 2772 num_jobs = 1; 2773 else 2774 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); 2775 s->num_jobs = num_jobs; 2776 s->curframe = curframe; 2777 s->prev_frame = prev_frame; 2778 s->mv_bounds.mv_min.y = -MARGIN; 2779 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; 2780 for (i = 0; i < MAX_THREADS; i++) { 2781 VP8ThreadData *td = &s->thread_data[i]; 2782 atomic_init(&td->thread_mb_pos, 0); 2783 atomic_init(&td->wait_mb_pos, INT_MAX); 2784 } 2785 if (is_vp7) 2786 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, 2787 num_jobs); 2788 else 2789 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, 2790 num_jobs); 2791 } 2792 2793 ff_thread_report_progress(&curframe->tf, INT_MAX, 0); 2794 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); 2795 2796skip_decode: 2797 // if future frames don't use the updated probabilities, 2798 // reset them to the values we saved 2799 if (!s->update_probabilities) 2800 s->prob[0] = s->prob[1]; 2801 2802 if (!s->invisible) { 2803 if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0) 2804 return ret; 2805 *got_frame = 1; 2806 } 2807 2808 return avpkt->size; 2809err: 2810 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); 2811 return ret; 2812} 2813 2814int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame, 2815 int *got_frame, AVPacket *avpkt) 2816{ 2817 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8); 2818} 2819 2820#if CONFIG_VP7_DECODER 2821static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame, 2822 int *got_frame, AVPacket *avpkt) 2823{ 2824 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7); 2825} 2826#endif /* CONFIG_VP7_DECODER */ 2827 2828av_cold int ff_vp8_decode_free(AVCodecContext *avctx) 2829{ 2830 VP8Context *s = avctx->priv_data; 2831 int i; 2832 2833 vp8_decode_flush_impl(avctx, 1); 2834 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) 2835 av_frame_free(&s->frames[i].tf.f); 2836 2837 return 0; 2838} 2839 2840static av_cold int vp8_init_frames(VP8Context *s) 2841{ 2842 int i; 2843 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) { 2844 s->frames[i].tf.f = av_frame_alloc(); 2845 if (!s->frames[i].tf.f) 2846 return AVERROR(ENOMEM); 2847 } 2848 return 0; 2849} 2850 2851static av_always_inline 2852int vp78_decode_init(AVCodecContext *avctx, int is_vp7) 2853{ 2854 VP8Context *s = avctx->priv_data; 2855 int ret; 2856 2857 s->avctx = avctx; 2858 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7; 2859 s->pix_fmt = AV_PIX_FMT_NONE; 2860 avctx->pix_fmt = AV_PIX_FMT_YUV420P; 2861 2862 ff_videodsp_init(&s->vdsp, 8); 2863 2864 ff_vp78dsp_init(&s->vp8dsp); 2865 if (CONFIG_VP7_DECODER && is_vp7) { 2866 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1); 2867 ff_vp7dsp_init(&s->vp8dsp); 2868 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter; 2869 s->filter_mb_row = vp7_filter_mb_row; 2870 } else if (CONFIG_VP8_DECODER && !is_vp7) { 2871 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); 2872 ff_vp8dsp_init(&s->vp8dsp); 2873 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter; 2874 s->filter_mb_row = vp8_filter_mb_row; 2875 } 2876 2877 /* does not change for VP8 */ 2878 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); 2879 2880 if ((ret = vp8_init_frames(s)) < 0) { 2881 ff_vp8_decode_free(avctx); 2882 return ret; 2883 } 2884 2885 return 0; 2886} 2887 2888#if CONFIG_VP7_DECODER 2889static int vp7_decode_init(AVCodecContext *avctx) 2890{ 2891 return vp78_decode_init(avctx, IS_VP7); 2892} 2893#endif /* CONFIG_VP7_DECODER */ 2894 2895av_cold int ff_vp8_decode_init(AVCodecContext *avctx) 2896{ 2897 return vp78_decode_init(avctx, IS_VP8); 2898} 2899 2900#if CONFIG_VP8_DECODER 2901#if HAVE_THREADS 2902#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL) 2903 2904static int vp8_decode_update_thread_context(AVCodecContext *dst, 2905 const AVCodecContext *src) 2906{ 2907 VP8Context *s = dst->priv_data, *s_src = src->priv_data; 2908 int i; 2909 2910 if (s->macroblocks_base && 2911 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { 2912 free_buffers(s); 2913 s->mb_width = s_src->mb_width; 2914 s->mb_height = s_src->mb_height; 2915 } 2916 2917 s->pix_fmt = s_src->pix_fmt; 2918 s->prob[0] = s_src->prob[!s_src->update_probabilities]; 2919 s->segmentation = s_src->segmentation; 2920 s->lf_delta = s_src->lf_delta; 2921 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); 2922 2923 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) { 2924 if (s_src->frames[i].tf.f->buf[0]) { 2925 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]); 2926 if (ret < 0) 2927 return ret; 2928 } 2929 } 2930 2931 s->framep[0] = REBASE(s_src->next_framep[0]); 2932 s->framep[1] = REBASE(s_src->next_framep[1]); 2933 s->framep[2] = REBASE(s_src->next_framep[2]); 2934 s->framep[3] = REBASE(s_src->next_framep[3]); 2935 2936 return 0; 2937} 2938#endif /* HAVE_THREADS */ 2939#endif /* CONFIG_VP8_DECODER */ 2940 2941#if CONFIG_VP7_DECODER 2942const FFCodec ff_vp7_decoder = { 2943 .p.name = "vp7", 2944 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP7"), 2945 .p.type = AVMEDIA_TYPE_VIDEO, 2946 .p.id = AV_CODEC_ID_VP7, 2947 .priv_data_size = sizeof(VP8Context), 2948 .init = vp7_decode_init, 2949 .close = ff_vp8_decode_free, 2950 FF_CODEC_DECODE_CB(vp7_decode_frame), 2951 .p.capabilities = AV_CODEC_CAP_DR1, 2952 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, 2953 .flush = vp8_decode_flush, 2954}; 2955#endif /* CONFIG_VP7_DECODER */ 2956 2957#if CONFIG_VP8_DECODER 2958const FFCodec ff_vp8_decoder = { 2959 .p.name = "vp8", 2960 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), 2961 .p.type = AVMEDIA_TYPE_VIDEO, 2962 .p.id = AV_CODEC_ID_VP8, 2963 .priv_data_size = sizeof(VP8Context), 2964 .init = ff_vp8_decode_init, 2965 .close = ff_vp8_decode_free, 2966 FF_CODEC_DECODE_CB(ff_vp8_decode_frame), 2967 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | 2968 AV_CODEC_CAP_SLICE_THREADS, 2969 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | 2970 FF_CODEC_CAP_ALLOCATE_PROGRESS, 2971 .flush = vp8_decode_flush, 2972 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context), 2973 .hw_configs = (const AVCodecHWConfigInternal *const []) { 2974#if CONFIG_VP8_VAAPI_HWACCEL 2975 HWACCEL_VAAPI(vp8), 2976#endif 2977#if CONFIG_VP8_NVDEC_HWACCEL 2978 HWACCEL_NVDEC(vp8), 2979#endif 2980 NULL 2981 }, 2982}; 2983#endif /* CONFIG_VP7_DECODER */ 2984