1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "config_components.h"
28
29 #include "libavutil/imgutils.h"
30 #include "libavutil/mem_internal.h"
31
32 #include "avcodec.h"
33 #include "codec_internal.h"
34 #include "hwconfig.h"
35 #include "internal.h"
36 #include "mathops.h"
37 #include "rectangle.h"
38 #include "thread.h"
39 #include "threadframe.h"
40 #include "vp8.h"
41 #include "vp8data.h"
42
43 #if ARCH_ARM
44 # include "arm/vp8.h"
45 #endif
46
47 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
48 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
49 #elif CONFIG_VP7_DECODER
50 #define VPX(vp7, f) vp7_ ## f
51 #else // CONFIG_VP8_DECODER
52 #define VPX(vp7, f) vp8_ ## f
53 #endif
54
free_buffers(VP8Context *s)55 static void free_buffers(VP8Context *s)
56 {
57 int i;
58 if (s->thread_data)
59 for (i = 0; i < MAX_THREADS; i++) {
60 #if HAVE_THREADS
61 pthread_cond_destroy(&s->thread_data[i].cond);
62 pthread_mutex_destroy(&s->thread_data[i].lock);
63 #endif
64 av_freep(&s->thread_data[i].filter_strength);
65 }
66 av_freep(&s->thread_data);
67 av_freep(&s->macroblocks_base);
68 av_freep(&s->intra4x4_pred_mode_top);
69 av_freep(&s->top_nnz);
70 av_freep(&s->top_border);
71
72 s->macroblocks = NULL;
73 }
74
vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)75 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
76 {
77 int ret;
78 if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf,
79 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
80 return ret;
81 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
82 goto fail;
83 if (s->avctx->hwaccel) {
84 const AVHWAccel *hwaccel = s->avctx->hwaccel;
85 if (hwaccel->frame_priv_data_size) {
86 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
87 if (!f->hwaccel_priv_buf)
88 goto fail;
89 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 }
91 }
92 return 0;
93
94 fail:
95 av_buffer_unref(&f->seg_map);
96 ff_thread_release_ext_buffer(s->avctx, &f->tf);
97 return AVERROR(ENOMEM);
98 }
99
vp8_release_frame(VP8Context *s, VP8Frame *f)100 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
101 {
102 av_buffer_unref(&f->seg_map);
103 av_buffer_unref(&f->hwaccel_priv_buf);
104 f->hwaccel_picture_private = NULL;
105 ff_thread_release_ext_buffer(s->avctx, &f->tf);
106 }
107
108 #if CONFIG_VP8_DECODER
vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)109 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
110 {
111 int ret;
112
113 vp8_release_frame(s, dst);
114
115 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
116 return ret;
117 if (src->seg_map &&
118 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
119 vp8_release_frame(s, dst);
120 return AVERROR(ENOMEM);
121 }
122 if (src->hwaccel_picture_private) {
123 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
124 if (!dst->hwaccel_priv_buf)
125 return AVERROR(ENOMEM);
126 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
127 }
128
129 return 0;
130 }
131 #endif /* CONFIG_VP8_DECODER */
132
vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)133 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
134 {
135 VP8Context *s = avctx->priv_data;
136 int i;
137
138 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
139 vp8_release_frame(s, &s->frames[i]);
140 memset(s->framep, 0, sizeof(s->framep));
141
142 if (free_mem)
143 free_buffers(s);
144 }
145
vp8_decode_flush(AVCodecContext *avctx)146 static void vp8_decode_flush(AVCodecContext *avctx)
147 {
148 vp8_decode_flush_impl(avctx, 0);
149 }
150
vp8_find_free_buffer(VP8Context *s)151 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
152 {
153 VP8Frame *frame = NULL;
154 int i;
155
156 // find a free buffer
157 for (i = 0; i < 5; i++)
158 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
159 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
160 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
161 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
162 frame = &s->frames[i];
163 break;
164 }
165 if (i == 5) {
166 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
167 abort();
168 }
169 if (frame->tf.f->buf[0])
170 vp8_release_frame(s, frame);
171
172 return frame;
173 }
174
get_pixel_format(VP8Context *s)175 static enum AVPixelFormat get_pixel_format(VP8Context *s)
176 {
177 enum AVPixelFormat pix_fmts[] = {
178 #if CONFIG_VP8_VAAPI_HWACCEL
179 AV_PIX_FMT_VAAPI,
180 #endif
181 #if CONFIG_VP8_NVDEC_HWACCEL
182 AV_PIX_FMT_CUDA,
183 #endif
184 AV_PIX_FMT_YUV420P,
185 AV_PIX_FMT_NONE,
186 };
187
188 return ff_get_format(s->avctx, pix_fmts);
189 }
190
191 static av_always_inline
update_dimensions(VP8Context *s, int width, int height, int is_vp7)192 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
193 {
194 AVCodecContext *avctx = s->avctx;
195 int i, ret, dim_reset = 0;
196
197 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
198 height != s->avctx->height) {
199 vp8_decode_flush_impl(s->avctx, 1);
200
201 ret = ff_set_dimensions(s->avctx, width, height);
202 if (ret < 0)
203 return ret;
204
205 dim_reset = (s->macroblocks_base != NULL);
206 }
207
208 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
209 !s->actually_webp && !is_vp7) {
210 s->pix_fmt = get_pixel_format(s);
211 if (s->pix_fmt < 0)
212 return AVERROR(EINVAL);
213 avctx->pix_fmt = s->pix_fmt;
214 }
215
216 s->mb_width = (s->avctx->coded_width + 15) / 16;
217 s->mb_height = (s->avctx->coded_height + 15) / 16;
218
219 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
220 avctx->thread_count > 1;
221 if (!s->mb_layout) { // Frame threading and one thread
222 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
223 sizeof(*s->macroblocks));
224 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
225 } else // Sliced threading
226 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
227 sizeof(*s->macroblocks));
228 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
229 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
230 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
231
232 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
233 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
234 free_buffers(s);
235 return AVERROR(ENOMEM);
236 }
237
238 for (i = 0; i < MAX_THREADS; i++) {
239 s->thread_data[i].filter_strength =
240 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
241 if (!s->thread_data[i].filter_strength) {
242 free_buffers(s);
243 return AVERROR(ENOMEM);
244 }
245 #if HAVE_THREADS
246 pthread_mutex_init(&s->thread_data[i].lock, NULL);
247 pthread_cond_init(&s->thread_data[i].cond, NULL);
248 #endif
249 }
250
251 s->macroblocks = s->macroblocks_base + 1;
252
253 return 0;
254 }
255
vp7_update_dimensions(VP8Context *s, int width, int height)256 static int vp7_update_dimensions(VP8Context *s, int width, int height)
257 {
258 return update_dimensions(s, width, height, IS_VP7);
259 }
260
vp8_update_dimensions(VP8Context *s, int width, int height)261 static int vp8_update_dimensions(VP8Context *s, int width, int height)
262 {
263 return update_dimensions(s, width, height, IS_VP8);
264 }
265
266
parse_segment_info(VP8Context *s)267 static void parse_segment_info(VP8Context *s)
268 {
269 VP56RangeCoder *c = &s->c;
270 int i;
271
272 s->segmentation.update_map = vp8_rac_get(c);
273 s->segmentation.update_feature_data = vp8_rac_get(c);
274
275 if (s->segmentation.update_feature_data) {
276 s->segmentation.absolute_vals = vp8_rac_get(c);
277
278 for (i = 0; i < 4; i++)
279 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
280
281 for (i = 0; i < 4; i++)
282 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
283 }
284 if (s->segmentation.update_map)
285 for (i = 0; i < 3; i++)
286 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
287 }
288
update_lf_deltas(VP8Context *s)289 static void update_lf_deltas(VP8Context *s)
290 {
291 VP56RangeCoder *c = &s->c;
292 int i;
293
294 for (i = 0; i < 4; i++) {
295 if (vp8_rac_get(c)) {
296 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
297
298 if (vp8_rac_get(c))
299 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
300 }
301 }
302
303 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
304 if (vp8_rac_get(c)) {
305 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
306
307 if (vp8_rac_get(c))
308 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
309 }
310 }
311 }
312
setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)313 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
314 {
315 const uint8_t *sizes = buf;
316 int i;
317 int ret;
318
319 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
320
321 buf += 3 * (s->num_coeff_partitions - 1);
322 buf_size -= 3 * (s->num_coeff_partitions - 1);
323 if (buf_size < 0)
324 return -1;
325
326 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
327 int size = AV_RL24(sizes + 3 * i);
328 if (buf_size - size < 0)
329 return -1;
330 s->coeff_partition_size[i] = size;
331
332 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
333 if (ret < 0)
334 return ret;
335 buf += size;
336 buf_size -= size;
337 }
338
339 s->coeff_partition_size[i] = buf_size;
340 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
341
342 return 0;
343 }
344
vp7_get_quants(VP8Context *s)345 static void vp7_get_quants(VP8Context *s)
346 {
347 VP56RangeCoder *c = &s->c;
348
349 int yac_qi = vp8_rac_get_uint(c, 7);
350 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
351 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
352 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
353 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
354 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
355
356 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
357 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
358 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
359 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
360 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
361 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
362 }
363
vp8_get_quants(VP8Context *s)364 static void vp8_get_quants(VP8Context *s)
365 {
366 VP56RangeCoder *c = &s->c;
367 int i, base_qi;
368
369 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
370 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
371 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
372 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
373 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
374 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
375
376 for (i = 0; i < 4; i++) {
377 if (s->segmentation.enabled) {
378 base_qi = s->segmentation.base_quant[i];
379 if (!s->segmentation.absolute_vals)
380 base_qi += s->quant.yac_qi;
381 } else
382 base_qi = s->quant.yac_qi;
383
384 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
385 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
386 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
387 /* 101581>>16 is equivalent to 155/100 */
388 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
389 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
390 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
391
392 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
393 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
394 }
395 }
396
397 /**
398 * Determine which buffers golden and altref should be updated with after this frame.
399 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
400 *
401 * Intra frames update all 3 references
402 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
403 * If the update (golden|altref) flag is set, it's updated with the current frame
404 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
405 * If the flag is not set, the number read means:
406 * 0: no update
407 * 1: VP56_FRAME_PREVIOUS
408 * 2: update golden with altref, or update altref with golden
409 */
ref_to_update(VP8Context *s, int update, VP56Frame ref)410 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
411 {
412 VP56RangeCoder *c = &s->c;
413
414 if (update)
415 return VP56_FRAME_CURRENT;
416
417 switch (vp8_rac_get_uint(c, 2)) {
418 case 1:
419 return VP56_FRAME_PREVIOUS;
420 case 2:
421 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
422 }
423 return VP56_FRAME_NONE;
424 }
425
vp78_reset_probability_tables(VP8Context *s)426 static void vp78_reset_probability_tables(VP8Context *s)
427 {
428 int i, j;
429 for (i = 0; i < 4; i++)
430 for (j = 0; j < 16; j++)
431 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
432 sizeof(s->prob->token[i][j]));
433 }
434
vp78_update_probability_tables(VP8Context *s)435 static void vp78_update_probability_tables(VP8Context *s)
436 {
437 VP56RangeCoder *c = &s->c;
438 int i, j, k, l, m;
439
440 for (i = 0; i < 4; i++)
441 for (j = 0; j < 8; j++)
442 for (k = 0; k < 3; k++)
443 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
444 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
445 int prob = vp8_rac_get_uint(c, 8);
446 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
447 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
448 }
449 }
450
451 #define VP7_MVC_SIZE 17
452 #define VP8_MVC_SIZE 19
453
vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, int mvc_size)454 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
455 int mvc_size)
456 {
457 VP56RangeCoder *c = &s->c;
458 int i, j;
459
460 if (vp8_rac_get(c))
461 for (i = 0; i < 4; i++)
462 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
463 if (vp8_rac_get(c))
464 for (i = 0; i < 3; i++)
465 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
466
467 // 17.2 MV probability update
468 for (i = 0; i < 2; i++)
469 for (j = 0; j < mvc_size; j++)
470 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
471 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
472 }
473
update_refs(VP8Context *s)474 static void update_refs(VP8Context *s)
475 {
476 VP56RangeCoder *c = &s->c;
477
478 int update_golden = vp8_rac_get(c);
479 int update_altref = vp8_rac_get(c);
480
481 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
482 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
483 }
484
copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)485 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
486 {
487 int i, j;
488
489 for (j = 1; j < 3; j++) {
490 for (i = 0; i < height / 2; i++)
491 memcpy(dst->data[j] + i * dst->linesize[j],
492 src->data[j] + i * src->linesize[j], width / 2);
493 }
494 }
495
fade(uint8_t *dst, ptrdiff_t dst_linesize, const uint8_t *src, ptrdiff_t src_linesize, int width, int height, int alpha, int beta)496 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
497 const uint8_t *src, ptrdiff_t src_linesize,
498 int width, int height,
499 int alpha, int beta)
500 {
501 int i, j;
502 for (j = 0; j < height; j++) {
503 const uint8_t *src2 = src + j * src_linesize;
504 uint8_t *dst2 = dst + j * dst_linesize;
505 for (i = 0; i < width; i++) {
506 uint8_t y = src2[i];
507 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
508 }
509 }
510 }
511
vp7_fade_frame(VP8Context *s, int alpha, int beta)512 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
513 {
514 int ret;
515
516 if (!s->keyframe && (alpha || beta)) {
517 int width = s->mb_width * 16;
518 int height = s->mb_height * 16;
519 AVFrame *src, *dst;
520
521 if (!s->framep[VP56_FRAME_PREVIOUS] ||
522 !s->framep[VP56_FRAME_GOLDEN]) {
523 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
524 return AVERROR_INVALIDDATA;
525 }
526
527 dst =
528 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
529
530 /* preserve the golden frame, write a new previous frame */
531 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
532 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
533 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
534 return ret;
535
536 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
537
538 copy_chroma(dst, src, width, height);
539 }
540
541 fade(dst->data[0], dst->linesize[0],
542 src->data[0], src->linesize[0],
543 width, height, alpha, beta);
544 }
545
546 return 0;
547 }
548
vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)549 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
550 {
551 VP56RangeCoder *c = &s->c;
552 int part1_size, hscale, vscale, i, j, ret;
553 int width = s->avctx->width;
554 int height = s->avctx->height;
555 int alpha = 0;
556 int beta = 0;
557
558 if (buf_size < 4) {
559 return AVERROR_INVALIDDATA;
560 }
561
562 s->profile = (buf[0] >> 1) & 7;
563 if (s->profile > 1) {
564 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
565 return AVERROR_INVALIDDATA;
566 }
567
568 s->keyframe = !(buf[0] & 1);
569 s->invisible = 0;
570 part1_size = AV_RL24(buf) >> 4;
571
572 if (buf_size < 4 - s->profile + part1_size) {
573 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
574 return AVERROR_INVALIDDATA;
575 }
576
577 buf += 4 - s->profile;
578 buf_size -= 4 - s->profile;
579
580 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
581
582 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
583 if (ret < 0)
584 return ret;
585 buf += part1_size;
586 buf_size -= part1_size;
587
588 /* A. Dimension information (keyframes only) */
589 if (s->keyframe) {
590 width = vp8_rac_get_uint(c, 12);
591 height = vp8_rac_get_uint(c, 12);
592 hscale = vp8_rac_get_uint(c, 2);
593 vscale = vp8_rac_get_uint(c, 2);
594 if (hscale || vscale)
595 avpriv_request_sample(s->avctx, "Upscaling");
596
597 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
598 vp78_reset_probability_tables(s);
599 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
600 sizeof(s->prob->pred16x16));
601 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
602 sizeof(s->prob->pred8x8c));
603 for (i = 0; i < 2; i++)
604 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
605 sizeof(vp7_mv_default_prob[i]));
606 memset(&s->segmentation, 0, sizeof(s->segmentation));
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
608 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
609 }
610
611 if (s->keyframe || s->profile > 0)
612 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
613
614 /* B. Decoding information for all four macroblock-level features */
615 for (i = 0; i < 4; i++) {
616 s->feature_enabled[i] = vp8_rac_get(c);
617 if (s->feature_enabled[i]) {
618 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
619
620 for (j = 0; j < 3; j++)
621 s->feature_index_prob[i][j] =
622 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
623
624 if (vp7_feature_value_size[s->profile][i])
625 for (j = 0; j < 4; j++)
626 s->feature_value[i][j] =
627 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
628 }
629 }
630
631 s->segmentation.enabled = 0;
632 s->segmentation.update_map = 0;
633 s->lf_delta.enabled = 0;
634
635 s->num_coeff_partitions = 1;
636 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
637 if (ret < 0)
638 return ret;
639
640 if (!s->macroblocks_base || /* first frame */
641 width != s->avctx->width || height != s->avctx->height ||
642 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
643 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
644 return ret;
645 }
646
647 /* C. Dequantization indices */
648 vp7_get_quants(s);
649
650 /* D. Golden frame update flag (a Flag) for interframes only */
651 if (!s->keyframe) {
652 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
653 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
654 }
655
656 s->update_last = 1;
657 s->update_probabilities = 1;
658 s->fade_present = 1;
659
660 if (s->profile > 0) {
661 s->update_probabilities = vp8_rac_get(c);
662 if (!s->update_probabilities)
663 s->prob[1] = s->prob[0];
664
665 if (!s->keyframe)
666 s->fade_present = vp8_rac_get(c);
667 }
668
669 if (vpX_rac_is_end(c))
670 return AVERROR_INVALIDDATA;
671 /* E. Fading information for previous frame */
672 if (s->fade_present && vp8_rac_get(c)) {
673 alpha = (int8_t) vp8_rac_get_uint(c, 8);
674 beta = (int8_t) vp8_rac_get_uint(c, 8);
675 }
676
677 /* F. Loop filter type */
678 if (!s->profile)
679 s->filter.simple = vp8_rac_get(c);
680
681 /* G. DCT coefficient ordering specification */
682 if (vp8_rac_get(c))
683 for (i = 1; i < 16; i++)
684 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
685
686 /* H. Loop filter levels */
687 if (s->profile > 0)
688 s->filter.simple = vp8_rac_get(c);
689 s->filter.level = vp8_rac_get_uint(c, 6);
690 s->filter.sharpness = vp8_rac_get_uint(c, 3);
691
692 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
693 vp78_update_probability_tables(s);
694
695 s->mbskip_enabled = 0;
696
697 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
698 if (!s->keyframe) {
699 s->prob->intra = vp8_rac_get_uint(c, 8);
700 s->prob->last = vp8_rac_get_uint(c, 8);
701 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
702 }
703
704 if (vpX_rac_is_end(c))
705 return AVERROR_INVALIDDATA;
706
707 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
708 return ret;
709
710 return 0;
711 }
712
vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)713 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
714 {
715 VP56RangeCoder *c = &s->c;
716 int header_size, hscale, vscale, ret;
717 int width = s->avctx->width;
718 int height = s->avctx->height;
719
720 if (buf_size < 3) {
721 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
722 return AVERROR_INVALIDDATA;
723 }
724
725 s->keyframe = !(buf[0] & 1);
726 s->profile = (buf[0]>>1) & 7;
727 s->invisible = !(buf[0] & 0x10);
728 header_size = AV_RL24(buf) >> 5;
729 buf += 3;
730 buf_size -= 3;
731
732 s->header_partition_size = header_size;
733
734 if (s->profile > 3)
735 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
736
737 if (!s->profile)
738 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
739 sizeof(s->put_pixels_tab));
740 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
741 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
742 sizeof(s->put_pixels_tab));
743
744 if (header_size > buf_size - 7 * s->keyframe) {
745 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
746 return AVERROR_INVALIDDATA;
747 }
748
749 if (s->keyframe) {
750 if (AV_RL24(buf) != 0x2a019d) {
751 av_log(s->avctx, AV_LOG_ERROR,
752 "Invalid start code 0x%x\n", AV_RL24(buf));
753 return AVERROR_INVALIDDATA;
754 }
755 width = AV_RL16(buf + 3) & 0x3fff;
756 height = AV_RL16(buf + 5) & 0x3fff;
757 hscale = buf[4] >> 6;
758 vscale = buf[6] >> 6;
759 buf += 7;
760 buf_size -= 7;
761
762 if (hscale || vscale)
763 avpriv_request_sample(s->avctx, "Upscaling");
764
765 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
766 vp78_reset_probability_tables(s);
767 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
768 sizeof(s->prob->pred16x16));
769 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
770 sizeof(s->prob->pred8x8c));
771 memcpy(s->prob->mvc, vp8_mv_default_prob,
772 sizeof(s->prob->mvc));
773 memset(&s->segmentation, 0, sizeof(s->segmentation));
774 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
775 }
776
777 ret = ff_vp56_init_range_decoder(c, buf, header_size);
778 if (ret < 0)
779 return ret;
780 buf += header_size;
781 buf_size -= header_size;
782
783 if (s->keyframe) {
784 s->colorspace = vp8_rac_get(c);
785 if (s->colorspace)
786 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
787 s->fullrange = vp8_rac_get(c);
788 }
789
790 if ((s->segmentation.enabled = vp8_rac_get(c)))
791 parse_segment_info(s);
792 else
793 s->segmentation.update_map = 0; // FIXME: move this to some init function?
794
795 s->filter.simple = vp8_rac_get(c);
796 s->filter.level = vp8_rac_get_uint(c, 6);
797 s->filter.sharpness = vp8_rac_get_uint(c, 3);
798
799 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
800 s->lf_delta.update = vp8_rac_get(c);
801 if (s->lf_delta.update)
802 update_lf_deltas(s);
803 }
804
805 if (setup_partitions(s, buf, buf_size)) {
806 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
807 return AVERROR_INVALIDDATA;
808 }
809
810 if (!s->macroblocks_base || /* first frame */
811 width != s->avctx->width || height != s->avctx->height ||
812 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
813 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
814 return ret;
815
816 vp8_get_quants(s);
817
818 if (!s->keyframe) {
819 update_refs(s);
820 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
821 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
822 }
823
824 // if we aren't saving this frame's probabilities for future frames,
825 // make a copy of the current probabilities
826 if (!(s->update_probabilities = vp8_rac_get(c)))
827 s->prob[1] = s->prob[0];
828
829 s->update_last = s->keyframe || vp8_rac_get(c);
830
831 vp78_update_probability_tables(s);
832
833 if ((s->mbskip_enabled = vp8_rac_get(c)))
834 s->prob->mbskip = vp8_rac_get_uint(c, 8);
835
836 if (!s->keyframe) {
837 s->prob->intra = vp8_rac_get_uint(c, 8);
838 s->prob->last = vp8_rac_get_uint(c, 8);
839 s->prob->golden = vp8_rac_get_uint(c, 8);
840 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
841 }
842
843 // Record the entropy coder state here so that hwaccels can use it.
844 s->c.code_word = vp56_rac_renorm(&s->c);
845 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
846 s->coder_state_at_header_end.range = s->c.high;
847 s->coder_state_at_header_end.value = s->c.code_word >> 16;
848 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
849
850 return 0;
851 }
852
853 static av_always_inline
clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)854 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
855 {
856 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
857 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
858 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
859 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
860 }
861
862 /**
863 * Motion vector coding, 17.1.
864 */
read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)865 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
866 {
867 int bit, x = 0;
868
869 if (vp56_rac_get_prob_branchy(c, p[0])) {
870 int i;
871
872 for (i = 0; i < 3; i++)
873 x += vp56_rac_get_prob(c, p[9 + i]) << i;
874 for (i = (vp7 ? 7 : 9); i > 3; i--)
875 x += vp56_rac_get_prob(c, p[9 + i]) << i;
876 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
877 x += 8;
878 } else {
879 // small_mvtree
880 const uint8_t *ps = p + 2;
881 bit = vp56_rac_get_prob(c, *ps);
882 ps += 1 + 3 * bit;
883 x += 4 * bit;
884 bit = vp56_rac_get_prob(c, *ps);
885 ps += 1 + bit;
886 x += 2 * bit;
887 x += vp56_rac_get_prob(c, *ps);
888 }
889
890 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
891 }
892
vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)893 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
894 {
895 return read_mv_component(c, p, 1);
896 }
897
vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)898 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
899 {
900 return read_mv_component(c, p, 0);
901 }
902
903 static av_always_inline
get_submv_prob(uint32_t left, uint32_t top, int is_vp7)904 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
905 {
906 if (is_vp7)
907 return vp7_submv_prob;
908
909 if (left == top)
910 return vp8_submv_prob[4 - !!left];
911 if (!top)
912 return vp8_submv_prob[2];
913 return vp8_submv_prob[1 - !!left];
914 }
915
916 /**
917 * Split motion vector prediction, 16.4.
918 * @returns the number of motion vectors parsed (2, 4 or 16)
919 */
920 static av_always_inline
decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout, int is_vp7)921 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
922 int layout, int is_vp7)
923 {
924 int part_idx;
925 int n, num;
926 VP8Macroblock *top_mb;
927 VP8Macroblock *left_mb = &mb[-1];
928 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
929 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
930 VP56mv *top_mv;
931 VP56mv *left_mv = left_mb->bmv;
932 VP56mv *cur_mv = mb->bmv;
933
934 if (!layout) // layout is inlined, s->mb_layout is not
935 top_mb = &mb[2];
936 else
937 top_mb = &mb[-s->mb_width - 1];
938 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
939 top_mv = top_mb->bmv;
940
941 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
942 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
943 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
944 else
945 part_idx = VP8_SPLITMVMODE_8x8;
946 } else {
947 part_idx = VP8_SPLITMVMODE_4x4;
948 }
949
950 num = vp8_mbsplit_count[part_idx];
951 mbsplits_cur = vp8_mbsplits[part_idx],
952 firstidx = vp8_mbfirstidx[part_idx];
953 mb->partitioning = part_idx;
954
955 for (n = 0; n < num; n++) {
956 int k = firstidx[n];
957 uint32_t left, above;
958 const uint8_t *submv_prob;
959
960 if (!(k & 3))
961 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
962 else
963 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
964 if (k <= 3)
965 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
966 else
967 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
968
969 submv_prob = get_submv_prob(left, above, is_vp7);
970
971 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
972 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
973 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
974 mb->bmv[n].y = mb->mv.y +
975 read_mv_component(c, s->prob->mvc[0], is_vp7);
976 mb->bmv[n].x = mb->mv.x +
977 read_mv_component(c, s->prob->mvc[1], is_vp7);
978 } else {
979 AV_ZERO32(&mb->bmv[n]);
980 }
981 } else {
982 AV_WN32A(&mb->bmv[n], above);
983 }
984 } else {
985 AV_WN32A(&mb->bmv[n], left);
986 }
987 }
988
989 return num;
990 }
991
992 /**
993 * The vp7 reference decoder uses a padding macroblock column (added to right
994 * edge of the frame) to guard against illegal macroblock offsets. The
995 * algorithm has bugs that permit offsets to straddle the padding column.
996 * This function replicates those bugs.
997 *
998 * @param[out] edge_x macroblock x address
999 * @param[out] edge_y macroblock y address
1000 *
1001 * @return macroblock offset legal (boolean)
1002 */
vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, int xoffset, int yoffset, int boundary, int *edge_x, int *edge_y)1003 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1004 int xoffset, int yoffset, int boundary,
1005 int *edge_x, int *edge_y)
1006 {
1007 int vwidth = mb_width + 1;
1008 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1009 if (new < boundary || new % vwidth == vwidth - 1)
1010 return 0;
1011 *edge_y = new / vwidth;
1012 *edge_x = new % vwidth;
1013 return 1;
1014 }
1015
get_bmv_ptr(const VP8Macroblock *mb, int subblock)1016 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1017 {
1018 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1019 }
1020
1021 static av_always_inline
vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)1022 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1023 int mb_x, int mb_y, int layout)
1024 {
1025 VP8Macroblock *mb_edge[12];
1026 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1027 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1028 int idx = CNT_ZERO;
1029 VP56mv near_mv[3];
1030 uint8_t cnt[3] = { 0 };
1031 VP56RangeCoder *c = &s->c;
1032 int i;
1033
1034 AV_ZERO32(&near_mv[0]);
1035 AV_ZERO32(&near_mv[1]);
1036 AV_ZERO32(&near_mv[2]);
1037
1038 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1039 const VP7MVPred * pred = &vp7_mv_pred[i];
1040 int edge_x, edge_y;
1041
1042 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1043 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1044 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1045 ? s->macroblocks_base + 1 + edge_x +
1046 (s->mb_width + 1) * (edge_y + 1)
1047 : s->macroblocks + edge_x +
1048 (s->mb_height - edge_y - 1) * 2;
1049 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1050 if (mv) {
1051 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1052 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1053 idx = CNT_NEAREST;
1054 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1055 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1056 continue;
1057 idx = CNT_NEAR;
1058 } else {
1059 AV_WN32A(&near_mv[CNT_NEAR], mv);
1060 idx = CNT_NEAR;
1061 }
1062 } else {
1063 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1064 idx = CNT_NEAREST;
1065 }
1066 } else {
1067 idx = CNT_ZERO;
1068 }
1069 } else {
1070 idx = CNT_ZERO;
1071 }
1072 cnt[idx] += vp7_mv_pred[i].score;
1073 }
1074
1075 mb->partitioning = VP8_SPLITMVMODE_NONE;
1076
1077 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1078 mb->mode = VP8_MVMODE_MV;
1079
1080 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1081
1082 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1083
1084 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1085 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1086 else
1087 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1088
1089 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1090 mb->mode = VP8_MVMODE_SPLIT;
1091 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1092 } else {
1093 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1094 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1095 mb->bmv[0] = mb->mv;
1096 }
1097 } else {
1098 mb->mv = near_mv[CNT_NEAR];
1099 mb->bmv[0] = mb->mv;
1100 }
1101 } else {
1102 mb->mv = near_mv[CNT_NEAREST];
1103 mb->bmv[0] = mb->mv;
1104 }
1105 } else {
1106 mb->mode = VP8_MVMODE_ZERO;
1107 AV_ZERO32(&mb->mv);
1108 mb->bmv[0] = mb->mv;
1109 }
1110 }
1111
1112 static av_always_inline
vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb, int mb_x, int mb_y, int layout)1113 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1114 int mb_x, int mb_y, int layout)
1115 {
1116 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1117 mb - 1 /* left */,
1118 0 /* top-left */ };
1119 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1120 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1121 int idx = CNT_ZERO;
1122 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1123 int8_t *sign_bias = s->sign_bias;
1124 VP56mv near_mv[4];
1125 uint8_t cnt[4] = { 0 };
1126 VP56RangeCoder *c = &s->c;
1127
1128 if (!layout) { // layout is inlined (s->mb_layout is not)
1129 mb_edge[0] = mb + 2;
1130 mb_edge[2] = mb + 1;
1131 } else {
1132 mb_edge[0] = mb - s->mb_width - 1;
1133 mb_edge[2] = mb - s->mb_width - 2;
1134 }
1135
1136 AV_ZERO32(&near_mv[0]);
1137 AV_ZERO32(&near_mv[1]);
1138 AV_ZERO32(&near_mv[2]);
1139
1140 /* Process MB on top, left and top-left */
1141 #define MV_EDGE_CHECK(n) \
1142 { \
1143 VP8Macroblock *edge = mb_edge[n]; \
1144 int edge_ref = edge->ref_frame; \
1145 if (edge_ref != VP56_FRAME_CURRENT) { \
1146 uint32_t mv = AV_RN32A(&edge->mv); \
1147 if (mv) { \
1148 if (cur_sign_bias != sign_bias[edge_ref]) { \
1149 /* SWAR negate of the values in mv. */ \
1150 mv = ~mv; \
1151 mv = ((mv & 0x7fff7fff) + \
1152 0x00010001) ^ (mv & 0x80008000); \
1153 } \
1154 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1155 AV_WN32A(&near_mv[++idx], mv); \
1156 cnt[idx] += 1 + (n != 2); \
1157 } else \
1158 cnt[CNT_ZERO] += 1 + (n != 2); \
1159 } \
1160 }
1161
1162 MV_EDGE_CHECK(0)
1163 MV_EDGE_CHECK(1)
1164 MV_EDGE_CHECK(2)
1165
1166 mb->partitioning = VP8_SPLITMVMODE_NONE;
1167 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1168 mb->mode = VP8_MVMODE_MV;
1169
1170 /* If we have three distinct MVs, merge first and last if they're the same */
1171 if (cnt[CNT_SPLITMV] &&
1172 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1173 cnt[CNT_NEAREST] += 1;
1174
1175 /* Swap near and nearest if necessary */
1176 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1177 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1178 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1179 }
1180
1181 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1182 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1183 /* Choose the best mv out of 0,0 and the nearest mv */
1184 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1185 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1186 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1187 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1188
1189 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1190 mb->mode = VP8_MVMODE_SPLIT;
1191 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1192 } else {
1193 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1194 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1195 mb->bmv[0] = mb->mv;
1196 }
1197 } else {
1198 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1199 mb->bmv[0] = mb->mv;
1200 }
1201 } else {
1202 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1203 mb->bmv[0] = mb->mv;
1204 }
1205 } else {
1206 mb->mode = VP8_MVMODE_ZERO;
1207 AV_ZERO32(&mb->mv);
1208 mb->bmv[0] = mb->mv;
1209 }
1210 }
1211
1212 static av_always_inline
decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int mb_x, int keyframe, int layout)1213 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1214 int mb_x, int keyframe, int layout)
1215 {
1216 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1217
1218 if (layout) {
1219 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1220 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1221 }
1222 if (keyframe) {
1223 int x, y;
1224 uint8_t *top;
1225 uint8_t *const left = s->intra4x4_pred_mode_left;
1226 if (layout)
1227 top = mb->intra4x4_pred_mode_top;
1228 else
1229 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1230 for (y = 0; y < 4; y++) {
1231 for (x = 0; x < 4; x++) {
1232 const uint8_t *ctx;
1233 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1234 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1235 left[y] = top[x] = *intra4x4;
1236 intra4x4++;
1237 }
1238 }
1239 } else {
1240 int i;
1241 for (i = 0; i < 16; i++)
1242 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1243 vp8_pred4x4_prob_inter);
1244 }
1245 }
1246
1247 static av_always_inline
decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref, int layout, int is_vp7)1248 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1249 VP8Macroblock *mb, int mb_x, int mb_y,
1250 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1251 {
1252 VP56RangeCoder *c = &s->c;
1253 static const char * const vp7_feature_name[] = { "q-index",
1254 "lf-delta",
1255 "partial-golden-update",
1256 "blit-pitch" };
1257 if (is_vp7) {
1258 int i;
1259 *segment = 0;
1260 for (i = 0; i < 4; i++) {
1261 if (s->feature_enabled[i]) {
1262 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1263 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1264 s->feature_index_prob[i]);
1265 av_log(s->avctx, AV_LOG_WARNING,
1266 "Feature %s present in macroblock (value 0x%x)\n",
1267 vp7_feature_name[i], s->feature_value[i][index]);
1268 }
1269 }
1270 }
1271 } else if (s->segmentation.update_map) {
1272 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1273 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1274 } else if (s->segmentation.enabled)
1275 *segment = ref ? *ref : *segment;
1276 mb->segment = *segment;
1277
1278 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1279
1280 if (s->keyframe) {
1281 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1282 vp8_pred16x16_prob_intra);
1283
1284 if (mb->mode == MODE_I4x4) {
1285 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1286 } else {
1287 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1288 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1289 if (s->mb_layout)
1290 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1291 else
1292 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1293 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1294 }
1295
1296 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1297 vp8_pred8x8c_prob_intra);
1298 mb->ref_frame = VP56_FRAME_CURRENT;
1299 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1300 // inter MB, 16.2
1301 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1302 mb->ref_frame =
1303 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1304 : VP56_FRAME_GOLDEN;
1305 else
1306 mb->ref_frame = VP56_FRAME_PREVIOUS;
1307 s->ref_count[mb->ref_frame - 1]++;
1308
1309 // motion vectors, 16.3
1310 if (is_vp7)
1311 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1312 else
1313 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1314 } else {
1315 // intra MB, 16.1
1316 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1317
1318 if (mb->mode == MODE_I4x4)
1319 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1320
1321 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1322 s->prob->pred8x8c);
1323 mb->ref_frame = VP56_FRAME_CURRENT;
1324 mb->partitioning = VP8_SPLITMVMODE_NONE;
1325 AV_ZERO32(&mb->bmv[0]);
1326 }
1327 }
1328
1329 /**
1330 * @param r arithmetic bitstream reader context
1331 * @param block destination for block coefficients
1332 * @param probs probabilities to use when reading trees from the bitstream
1333 * @param i initial coeff index, 0 unless a separate DC block is coded
1334 * @param qmul array holding the dc/ac dequant factor at position 0/1
1335 *
1336 * @return 0 if no coeffs were decoded
1337 * otherwise, the index of the last coeff decoded plus one
1338 */
1339 static av_always_inline
decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], uint8_t probs[16][3][NUM_DCT_TOKENS - 1], int i, uint8_t *token_prob, int16_t qmul[2], const uint8_t scan[16], int vp7)1340 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1341 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1342 int i, uint8_t *token_prob, int16_t qmul[2],
1343 const uint8_t scan[16], int vp7)
1344 {
1345 VP56RangeCoder c = *r;
1346 goto skip_eob;
1347 do {
1348 int coeff;
1349 restart:
1350 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1351 break;
1352
1353 skip_eob:
1354 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1355 if (++i == 16)
1356 break; // invalid input; blocks should end with EOB
1357 token_prob = probs[i][0];
1358 if (vp7)
1359 goto restart;
1360 goto skip_eob;
1361 }
1362
1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1364 coeff = 1;
1365 token_prob = probs[i + 1][1];
1366 } else {
1367 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1368 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1369 if (coeff)
1370 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1371 coeff += 2;
1372 } else {
1373 // DCT_CAT*
1374 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1375 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1376 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1377 } else { // DCT_CAT2
1378 coeff = 7;
1379 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1380 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1381 }
1382 } else { // DCT_CAT3 and up
1383 int a = vp56_rac_get_prob(&c, token_prob[8]);
1384 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1385 int cat = (a << 1) + b;
1386 coeff = 3 + (8 << cat);
1387 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1388 }
1389 }
1390 token_prob = probs[i + 1][2];
1391 }
1392 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1393 } while (++i < 16);
1394
1395 *r = c;
1396 return i;
1397 }
1398
1399 static av_always_inline
inter_predict_dc(int16_t block[16], int16_t pred[2])1400 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1401 {
1402 int16_t dc = block[0];
1403 int ret = 0;
1404
1405 if (pred[1] > 3) {
1406 dc += pred[0];
1407 ret = 1;
1408 }
1409
1410 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1411 block[0] = pred[0] = dc;
1412 pred[1] = 0;
1413 } else {
1414 if (pred[0] == dc)
1415 pred[1]++;
1416 block[0] = pred[0] = dc;
1417 }
1418
1419 return ret;
1420 }
1421
vp7_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], uint8_t probs[16][3][NUM_DCT_TOKENS - 1], int i, uint8_t *token_prob, int16_t qmul[2], const uint8_t scan[16])1422 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1423 int16_t block[16],
1424 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1425 int i, uint8_t *token_prob,
1426 int16_t qmul[2],
1427 const uint8_t scan[16])
1428 {
1429 return decode_block_coeffs_internal(r, block, probs, i,
1430 token_prob, qmul, scan, IS_VP7);
1431 }
1432
1433 #ifndef vp8_decode_block_coeffs_internal
vp8_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], uint8_t probs[16][3][NUM_DCT_TOKENS - 1], int i, uint8_t *token_prob, int16_t qmul[2])1434 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1435 int16_t block[16],
1436 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1437 int i, uint8_t *token_prob,
1438 int16_t qmul[2])
1439 {
1440 return decode_block_coeffs_internal(r, block, probs, i,
1441 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1442 }
1443 #endif
1444
1445 /**
1446 * @param c arithmetic bitstream reader context
1447 * @param block destination for block coefficients
1448 * @param probs probabilities to use when reading trees from the bitstream
1449 * @param i initial coeff index, 0 unless a separate DC block is coded
1450 * @param zero_nhood the initial prediction context for number of surrounding
1451 * all-zero blocks (only left/top, so 0-2)
1452 * @param qmul array holding the dc/ac dequant factor at position 0/1
1453 * @param scan scan pattern (VP7 only)
1454 *
1455 * @return 0 if no coeffs were decoded
1456 * otherwise, the index of the last coeff decoded plus one
1457 */
1458 static av_always_inline
decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], uint8_t probs[16][3][NUM_DCT_TOKENS - 1], int i, int zero_nhood, int16_t qmul[2], const uint8_t scan[16], int vp7)1459 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1460 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1461 int i, int zero_nhood, int16_t qmul[2],
1462 const uint8_t scan[16], int vp7)
1463 {
1464 uint8_t *token_prob = probs[i][zero_nhood];
1465 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1466 return 0;
1467 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1468 token_prob, qmul, scan)
1469 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1470 token_prob, qmul);
1471 }
1472
1473 static av_always_inline
decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], int is_vp7)1474 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1475 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1476 int is_vp7)
1477 {
1478 int i, x, y, luma_start = 0, luma_ctx = 3;
1479 int nnz_pred, nnz, nnz_total = 0;
1480 int segment = mb->segment;
1481 int block_dc = 0;
1482
1483 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1484 nnz_pred = t_nnz[8] + l_nnz[8];
1485
1486 // decode DC values and do hadamard
1487 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1488 nnz_pred, s->qmat[segment].luma_dc_qmul,
1489 ff_zigzag_scan, is_vp7);
1490 l_nnz[8] = t_nnz[8] = !!nnz;
1491
1492 if (is_vp7 && mb->mode > MODE_I4x4) {
1493 nnz |= inter_predict_dc(td->block_dc,
1494 s->inter_dc_pred[mb->ref_frame - 1]);
1495 }
1496
1497 if (nnz) {
1498 nnz_total += nnz;
1499 block_dc = 1;
1500 if (nnz == 1)
1501 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1502 else
1503 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1504 }
1505 luma_start = 1;
1506 luma_ctx = 0;
1507 }
1508
1509 // luma blocks
1510 for (y = 0; y < 4; y++)
1511 for (x = 0; x < 4; x++) {
1512 nnz_pred = l_nnz[y] + t_nnz[x];
1513 nnz = decode_block_coeffs(c, td->block[y][x],
1514 s->prob->token[luma_ctx],
1515 luma_start, nnz_pred,
1516 s->qmat[segment].luma_qmul,
1517 s->prob[0].scan, is_vp7);
1518 /* nnz+block_dc may be one more than the actual last index,
1519 * but we don't care */
1520 td->non_zero_count_cache[y][x] = nnz + block_dc;
1521 t_nnz[x] = l_nnz[y] = !!nnz;
1522 nnz_total += nnz;
1523 }
1524
1525 // chroma blocks
1526 // TODO: what to do about dimensions? 2nd dim for luma is x,
1527 // but for chroma it's (y<<1)|x
1528 for (i = 4; i < 6; i++)
1529 for (y = 0; y < 2; y++)
1530 for (x = 0; x < 2; x++) {
1531 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1532 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1533 s->prob->token[2], 0, nnz_pred,
1534 s->qmat[segment].chroma_qmul,
1535 s->prob[0].scan, is_vp7);
1536 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1537 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1538 nnz_total += nnz;
1539 }
1540
1541 // if there were no coded coeffs despite the macroblock not being marked skip,
1542 // we MUST not do the inner loop filter and should not do IDCT
1543 // Since skip isn't used for bitstream prediction, just manually set it.
1544 if (!nnz_total)
1545 mb->skip = 1;
1546 }
1547
1548 static av_always_inline
backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)1549 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1550 uint8_t *src_cb, uint8_t *src_cr,
1551 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1552 {
1553 AV_COPY128(top_border, src_y + 15 * linesize);
1554 if (!simple) {
1555 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1556 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1557 }
1558 }
1559
1560 static av_always_inline
xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x, int mb_y, int mb_width, int simple, int xchg)1561 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1562 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1563 int mb_y, int mb_width, int simple, int xchg)
1564 {
1565 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1566 src_y -= linesize;
1567 src_cb -= uvlinesize;
1568 src_cr -= uvlinesize;
1569
1570 #define XCHG(a, b, xchg) \
1571 do { \
1572 if (xchg) \
1573 AV_SWAP64(b, a); \
1574 else \
1575 AV_COPY64(b, a); \
1576 } while (0)
1577
1578 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1579 XCHG(top_border, src_y, xchg);
1580 XCHG(top_border + 8, src_y + 8, 1);
1581 if (mb_x < mb_width - 1)
1582 XCHG(top_border + 32, src_y + 16, 1);
1583
1584 // only copy chroma for normal loop filter
1585 // or to initialize the top row to 127
1586 if (!simple || !mb_y) {
1587 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1588 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1589 XCHG(top_border + 16, src_cb, 1);
1590 XCHG(top_border + 24, src_cr, 1);
1591 }
1592 }
1593
1594 static av_always_inline
check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)1595 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1596 {
1597 if (!mb_x)
1598 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1599 else
1600 return mb_y ? mode : LEFT_DC_PRED8x8;
1601 }
1602
1603 static av_always_inline
check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)1604 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1605 {
1606 if (!mb_x)
1607 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1608 else
1609 return mb_y ? mode : HOR_PRED8x8;
1610 }
1611
1612 static av_always_inline
check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)1613 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1614 {
1615 switch (mode) {
1616 case DC_PRED8x8:
1617 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1618 case VERT_PRED8x8:
1619 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1620 case HOR_PRED8x8:
1621 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1622 case PLANE_PRED8x8: /* TM */
1623 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1624 }
1625 return mode;
1626 }
1627
1628 static av_always_inline
check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)1629 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1630 {
1631 if (!mb_x) {
1632 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1633 } else {
1634 return mb_y ? mode : HOR_VP8_PRED;
1635 }
1636 }
1637
1638 static av_always_inline
check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf, int vp7)1639 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1640 int *copy_buf, int vp7)
1641 {
1642 switch (mode) {
1643 case VERT_PRED:
1644 if (!mb_x && mb_y) {
1645 *copy_buf = 1;
1646 return mode;
1647 }
1648 /* fall-through */
1649 case DIAG_DOWN_LEFT_PRED:
1650 case VERT_LEFT_PRED:
1651 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1652 case HOR_PRED:
1653 if (!mb_y) {
1654 *copy_buf = 1;
1655 return mode;
1656 }
1657 /* fall-through */
1658 case HOR_UP_PRED:
1659 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1660 case TM_VP8_PRED:
1661 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1662 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1663 * as 16x16/8x8 DC */
1664 case DIAG_DOWN_RIGHT_PRED:
1665 case VERT_RIGHT_PRED:
1666 case HOR_DOWN_PRED:
1667 if (!mb_y || !mb_x)
1668 *copy_buf = 1;
1669 return mode;
1670 }
1671 return mode;
1672 }
1673
1674 static av_always_inline
intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)1675 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1676 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1677 {
1678 int x, y, mode, nnz;
1679 uint32_t tr;
1680
1681 /* for the first row, we need to run xchg_mb_border to init the top edge
1682 * to 127 otherwise, skip it if we aren't going to deblock */
1683 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1684 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1685 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1686 s->filter.simple, 1);
1687
1688 if (mb->mode < MODE_I4x4) {
1689 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1690 s->hpc.pred16x16[mode](dst[0], s->linesize);
1691 } else {
1692 uint8_t *ptr = dst[0];
1693 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1694 const uint8_t lo = is_vp7 ? 128 : 127;
1695 const uint8_t hi = is_vp7 ? 128 : 129;
1696 uint8_t tr_top[4] = { lo, lo, lo, lo };
1697
1698 // all blocks on the right edge of the macroblock use bottom edge
1699 // the top macroblock for their topright edge
1700 uint8_t *tr_right = ptr - s->linesize + 16;
1701
1702 // if we're on the right edge of the frame, said edge is extended
1703 // from the top macroblock
1704 if (mb_y && mb_x == s->mb_width - 1) {
1705 tr = tr_right[-1] * 0x01010101u;
1706 tr_right = (uint8_t *) &tr;
1707 }
1708
1709 if (mb->skip)
1710 AV_ZERO128(td->non_zero_count_cache);
1711
1712 for (y = 0; y < 4; y++) {
1713 uint8_t *topright = ptr + 4 - s->linesize;
1714 for (x = 0; x < 4; x++) {
1715 int copy = 0;
1716 ptrdiff_t linesize = s->linesize;
1717 uint8_t *dst = ptr + 4 * x;
1718 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1719
1720 if ((y == 0 || x == 3) && mb_y == 0) {
1721 topright = tr_top;
1722 } else if (x == 3)
1723 topright = tr_right;
1724
1725 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1726 mb_y + y, ©, is_vp7);
1727 if (copy) {
1728 dst = copy_dst + 12;
1729 linesize = 8;
1730 if (!(mb_y + y)) {
1731 copy_dst[3] = lo;
1732 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1733 } else {
1734 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1735 if (!(mb_x + x)) {
1736 copy_dst[3] = hi;
1737 } else {
1738 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1739 }
1740 }
1741 if (!(mb_x + x)) {
1742 copy_dst[11] =
1743 copy_dst[19] =
1744 copy_dst[27] =
1745 copy_dst[35] = hi;
1746 } else {
1747 copy_dst[11] = ptr[4 * x - 1];
1748 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1749 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1750 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1751 }
1752 }
1753 s->hpc.pred4x4[mode](dst, topright, linesize);
1754 if (copy) {
1755 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1756 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1757 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1758 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1759 }
1760
1761 nnz = td->non_zero_count_cache[y][x];
1762 if (nnz) {
1763 if (nnz == 1)
1764 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1765 td->block[y][x], s->linesize);
1766 else
1767 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1768 td->block[y][x], s->linesize);
1769 }
1770 topright += 4;
1771 }
1772
1773 ptr += 4 * s->linesize;
1774 intra4x4 += 4;
1775 }
1776 }
1777
1778 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1779 mb_x, mb_y, is_vp7);
1780 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1781 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1782
1783 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1784 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1785 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1786 s->filter.simple, 0);
1787 }
1788
1789 static const uint8_t subpel_idx[3][8] = {
1790 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1791 // also function pointer index
1792 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1793 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1794 };
1795
1796 /**
1797 * luma MC function
1798 *
1799 * @param s VP8 decoding context
1800 * @param dst target buffer for block data at block position
1801 * @param ref reference picture buffer at origin (0, 0)
1802 * @param mv motion vector (relative to block position) to get pixel data from
1803 * @param x_off horizontal position of block from origin (0, 0)
1804 * @param y_off vertical position of block from origin (0, 0)
1805 * @param block_w width of block (16, 8 or 4)
1806 * @param block_h height of block (always same as block_w)
1807 * @param width width of src/dst plane data
1808 * @param height height of src/dst plane data
1809 * @param linesize size of a single line of plane data, including padding
1810 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1811 */
1812 static av_always_inline
vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off, int block_w, int block_h, int width, int height, ptrdiff_t linesize, vp8_mc_func mc_func[3][3])1813 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1814 ThreadFrame *ref, const VP56mv *mv,
1815 int x_off, int y_off, int block_w, int block_h,
1816 int width, int height, ptrdiff_t linesize,
1817 vp8_mc_func mc_func[3][3])
1818 {
1819 uint8_t *src = ref->f->data[0];
1820
1821 if (AV_RN32A(mv)) {
1822 ptrdiff_t src_linesize = linesize;
1823
1824 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1825 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1826
1827 x_off += mv->x >> 2;
1828 y_off += mv->y >> 2;
1829
1830 // edge emulation
1831 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1832 src += y_off * linesize + x_off;
1833 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1834 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1835 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1836 src - my_idx * linesize - mx_idx,
1837 EDGE_EMU_LINESIZE, linesize,
1838 block_w + subpel_idx[1][mx],
1839 block_h + subpel_idx[1][my],
1840 x_off - mx_idx, y_off - my_idx,
1841 width, height);
1842 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1843 src_linesize = EDGE_EMU_LINESIZE;
1844 }
1845 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1846 } else {
1847 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1848 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1849 linesize, block_h, 0, 0);
1850 }
1851 }
1852
1853 /**
1854 * chroma MC function
1855 *
1856 * @param s VP8 decoding context
1857 * @param dst1 target buffer for block data at block position (U plane)
1858 * @param dst2 target buffer for block data at block position (V plane)
1859 * @param ref reference picture buffer at origin (0, 0)
1860 * @param mv motion vector (relative to block position) to get pixel data from
1861 * @param x_off horizontal position of block from origin (0, 0)
1862 * @param y_off vertical position of block from origin (0, 0)
1863 * @param block_w width of block (16, 8 or 4)
1864 * @param block_h height of block (always same as block_w)
1865 * @param width width of src/dst plane data
1866 * @param height height of src/dst plane data
1867 * @param linesize size of a single line of plane data, including padding
1868 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1869 */
1870 static av_always_inline
vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off, int block_w, int block_h, int width, int height, ptrdiff_t linesize, vp8_mc_func mc_func[3][3])1871 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1872 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1873 int x_off, int y_off, int block_w, int block_h,
1874 int width, int height, ptrdiff_t linesize,
1875 vp8_mc_func mc_func[3][3])
1876 {
1877 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1878
1879 if (AV_RN32A(mv)) {
1880 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1881 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1882
1883 x_off += mv->x >> 3;
1884 y_off += mv->y >> 3;
1885
1886 // edge emulation
1887 src1 += y_off * linesize + x_off;
1888 src2 += y_off * linesize + x_off;
1889 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1890 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1891 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1892 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1893 src1 - my_idx * linesize - mx_idx,
1894 EDGE_EMU_LINESIZE, linesize,
1895 block_w + subpel_idx[1][mx],
1896 block_h + subpel_idx[1][my],
1897 x_off - mx_idx, y_off - my_idx, width, height);
1898 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1899 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1900
1901 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1902 src2 - my_idx * linesize - mx_idx,
1903 EDGE_EMU_LINESIZE, linesize,
1904 block_w + subpel_idx[1][mx],
1905 block_h + subpel_idx[1][my],
1906 x_off - mx_idx, y_off - my_idx, width, height);
1907 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1908 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1909 } else {
1910 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1911 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1912 }
1913 } else {
1914 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1915 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1916 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1917 }
1918 }
1919
1920 static av_always_inline
vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], ThreadFrame *ref_frame, int x_off, int y_off, int bx_off, int by_off, int block_w, int block_h, int width, int height, VP56mv *mv)1921 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1922 ThreadFrame *ref_frame, int x_off, int y_off,
1923 int bx_off, int by_off, int block_w, int block_h,
1924 int width, int height, VP56mv *mv)
1925 {
1926 VP56mv uvmv = *mv;
1927
1928 /* Y */
1929 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1930 ref_frame, mv, x_off + bx_off, y_off + by_off,
1931 block_w, block_h, width, height, s->linesize,
1932 s->put_pixels_tab[block_w == 8]);
1933
1934 /* U/V */
1935 if (s->profile == 3) {
1936 /* this block only applies VP8; it is safe to check
1937 * only the profile, as VP7 profile <= 1 */
1938 uvmv.x &= ~7;
1939 uvmv.y &= ~7;
1940 }
1941 x_off >>= 1;
1942 y_off >>= 1;
1943 bx_off >>= 1;
1944 by_off >>= 1;
1945 width >>= 1;
1946 height >>= 1;
1947 block_w >>= 1;
1948 block_h >>= 1;
1949 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1950 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1951 &uvmv, x_off + bx_off, y_off + by_off,
1952 block_w, block_h, width, height, s->uvlinesize,
1953 s->put_pixels_tab[1 + (block_w == 4)]);
1954 }
1955
1956 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1957 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1958 static av_always_inline
prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)1959 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1960 int mb_xy, int ref)
1961 {
1962 /* Don't prefetch refs that haven't been used very often this frame. */
1963 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1964 int x_off = mb_x << 4, y_off = mb_y << 4;
1965 int mx = (mb->mv.x >> 2) + x_off + 8;
1966 int my = (mb->mv.y >> 2) + y_off;
1967 uint8_t **src = s->framep[ref]->tf.f->data;
1968 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1969 /* For threading, a ff_thread_await_progress here might be useful, but
1970 * it actually slows down the decoder. Since a bad prefetch doesn't
1971 * generate bad decoder output, we don't run it here. */
1972 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1973 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1974 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1975 }
1976 }
1977
1978 /**
1979 * Apply motion vectors to prediction buffer, chapter 18.
1980 */
1981 static av_always_inline
inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)1982 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1983 VP8Macroblock *mb, int mb_x, int mb_y)
1984 {
1985 int x_off = mb_x << 4, y_off = mb_y << 4;
1986 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1987 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1988 VP56mv *bmv = mb->bmv;
1989
1990 switch (mb->partitioning) {
1991 case VP8_SPLITMVMODE_NONE:
1992 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1993 0, 0, 16, 16, width, height, &mb->mv);
1994 break;
1995 case VP8_SPLITMVMODE_4x4: {
1996 int x, y;
1997 VP56mv uvmv;
1998
1999 /* Y */
2000 for (y = 0; y < 4; y++) {
2001 for (x = 0; x < 4; x++) {
2002 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
2003 ref, &bmv[4 * y + x],
2004 4 * x + x_off, 4 * y + y_off, 4, 4,
2005 width, height, s->linesize,
2006 s->put_pixels_tab[2]);
2007 }
2008 }
2009
2010 /* U/V */
2011 x_off >>= 1;
2012 y_off >>= 1;
2013 width >>= 1;
2014 height >>= 1;
2015 for (y = 0; y < 2; y++) {
2016 for (x = 0; x < 2; x++) {
2017 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2018 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2020 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2021 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2022 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2023 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2024 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2025 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2026 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2027 if (s->profile == 3) {
2028 uvmv.x &= ~7;
2029 uvmv.y &= ~7;
2030 }
2031 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2032 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2033 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2034 width, height, s->uvlinesize,
2035 s->put_pixels_tab[2]);
2036 }
2037 }
2038 break;
2039 }
2040 case VP8_SPLITMVMODE_16x8:
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 0, 0, 16, 8, width, height, &bmv[0]);
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 0, 8, 16, 8, width, height, &bmv[1]);
2045 break;
2046 case VP8_SPLITMVMODE_8x16:
2047 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2048 0, 0, 8, 16, width, height, &bmv[0]);
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 8, 0, 8, 16, width, height, &bmv[1]);
2051 break;
2052 case VP8_SPLITMVMODE_8x8:
2053 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2054 0, 0, 8, 8, width, height, &bmv[0]);
2055 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2056 8, 0, 8, 8, width, height, &bmv[1]);
2057 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2058 0, 8, 8, 8, width, height, &bmv[2]);
2059 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2060 8, 8, 8, 8, width, height, &bmv[3]);
2061 break;
2062 }
2063 }
2064
2065 static av_always_inline
idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)2066 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2067 {
2068 int x, y, ch;
2069
2070 if (mb->mode != MODE_I4x4) {
2071 uint8_t *y_dst = dst[0];
2072 for (y = 0; y < 4; y++) {
2073 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2074 if (nnz4) {
2075 if (nnz4 & ~0x01010101) {
2076 for (x = 0; x < 4; x++) {
2077 if ((uint8_t) nnz4 == 1)
2078 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2079 td->block[y][x],
2080 s->linesize);
2081 else if ((uint8_t) nnz4 > 1)
2082 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2083 td->block[y][x],
2084 s->linesize);
2085 nnz4 >>= 8;
2086 if (!nnz4)
2087 break;
2088 }
2089 } else {
2090 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2091 }
2092 }
2093 y_dst += 4 * s->linesize;
2094 }
2095 }
2096
2097 for (ch = 0; ch < 2; ch++) {
2098 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2099 if (nnz4) {
2100 uint8_t *ch_dst = dst[1 + ch];
2101 if (nnz4 & ~0x01010101) {
2102 for (y = 0; y < 2; y++) {
2103 for (x = 0; x < 2; x++) {
2104 if ((uint8_t) nnz4 == 1)
2105 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2106 td->block[4 + ch][(y << 1) + x],
2107 s->uvlinesize);
2108 else if ((uint8_t) nnz4 > 1)
2109 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2110 td->block[4 + ch][(y << 1) + x],
2111 s->uvlinesize);
2112 nnz4 >>= 8;
2113 if (!nnz4)
2114 goto chroma_idct_end;
2115 }
2116 ch_dst += 4 * s->uvlinesize;
2117 }
2118 } else {
2119 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2120 }
2121 }
2122 chroma_idct_end:
2123 ;
2124 }
2125 }
2126
2127 static av_always_inline
filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f, int is_vp7)2128 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2129 VP8FilterStrength *f, int is_vp7)
2130 {
2131 int interior_limit, filter_level;
2132
2133 if (s->segmentation.enabled) {
2134 filter_level = s->segmentation.filter_level[mb->segment];
2135 if (!s->segmentation.absolute_vals)
2136 filter_level += s->filter.level;
2137 } else
2138 filter_level = s->filter.level;
2139
2140 if (s->lf_delta.enabled) {
2141 filter_level += s->lf_delta.ref[mb->ref_frame];
2142 filter_level += s->lf_delta.mode[mb->mode];
2143 }
2144
2145 filter_level = av_clip_uintp2(filter_level, 6);
2146
2147 interior_limit = filter_level;
2148 if (s->filter.sharpness) {
2149 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2150 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2151 }
2152 interior_limit = FFMAX(interior_limit, 1);
2153
2154 f->filter_level = filter_level;
2155 f->inner_limit = interior_limit;
2156 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2157 mb->mode == VP8_MVMODE_SPLIT;
2158 }
2159
2160 static av_always_inline
filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y, int is_vp7)2161 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2162 int mb_x, int mb_y, int is_vp7)
2163 {
2164 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2165 int filter_level = f->filter_level;
2166 int inner_limit = f->inner_limit;
2167 int inner_filter = f->inner_filter;
2168 ptrdiff_t linesize = s->linesize;
2169 ptrdiff_t uvlinesize = s->uvlinesize;
2170 static const uint8_t hev_thresh_lut[2][64] = {
2171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2174 3, 3, 3, 3 },
2175 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2178 2, 2, 2, 2 }
2179 };
2180
2181 if (!filter_level)
2182 return;
2183
2184 if (is_vp7) {
2185 bedge_lim_y = filter_level;
2186 bedge_lim_uv = filter_level * 2;
2187 mbedge_lim = filter_level + 2;
2188 } else {
2189 bedge_lim_y =
2190 bedge_lim_uv = filter_level * 2 + inner_limit;
2191 mbedge_lim = bedge_lim_y + 4;
2192 }
2193
2194 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2195
2196 if (mb_x) {
2197 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2198 mbedge_lim, inner_limit, hev_thresh);
2199 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2200 mbedge_lim, inner_limit, hev_thresh);
2201 }
2202
2203 #define H_LOOP_FILTER_16Y_INNER(cond) \
2204 if (cond && inner_filter) { \
2205 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2206 bedge_lim_y, inner_limit, \
2207 hev_thresh); \
2208 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2209 bedge_lim_y, inner_limit, \
2210 hev_thresh); \
2211 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2212 bedge_lim_y, inner_limit, \
2213 hev_thresh); \
2214 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2215 uvlinesize, bedge_lim_uv, \
2216 inner_limit, hev_thresh); \
2217 }
2218
2219 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2220
2221 if (mb_y) {
2222 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2223 mbedge_lim, inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2225 mbedge_lim, inner_limit, hev_thresh);
2226 }
2227
2228 if (inner_filter) {
2229 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2230 linesize, bedge_lim_y,
2231 inner_limit, hev_thresh);
2232 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2233 linesize, bedge_lim_y,
2234 inner_limit, hev_thresh);
2235 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2236 linesize, bedge_lim_y,
2237 inner_limit, hev_thresh);
2238 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2239 dst[2] + 4 * uvlinesize,
2240 uvlinesize, bedge_lim_uv,
2241 inner_limit, hev_thresh);
2242 }
2243
2244 H_LOOP_FILTER_16Y_INNER(is_vp7)
2245 }
2246
2247 static av_always_inline
filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)2248 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2249 int mb_x, int mb_y)
2250 {
2251 int mbedge_lim, bedge_lim;
2252 int filter_level = f->filter_level;
2253 int inner_limit = f->inner_limit;
2254 int inner_filter = f->inner_filter;
2255 ptrdiff_t linesize = s->linesize;
2256
2257 if (!filter_level)
2258 return;
2259
2260 bedge_lim = 2 * filter_level + inner_limit;
2261 mbedge_lim = bedge_lim + 4;
2262
2263 if (mb_x)
2264 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2265 if (inner_filter) {
2266 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2267 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2268 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2269 }
2270
2271 if (mb_y)
2272 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2273 if (inner_filter) {
2274 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2275 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2276 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2277 }
2278 }
2279
2280 #define MARGIN (16 << 2)
2281 static av_always_inline
vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, VP8Frame *prev_frame, int is_vp7)2282 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2283 VP8Frame *prev_frame, int is_vp7)
2284 {
2285 VP8Context *s = avctx->priv_data;
2286 int mb_x, mb_y;
2287
2288 s->mv_bounds.mv_min.y = -MARGIN;
2289 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2290 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2291 VP8Macroblock *mb = s->macroblocks_base +
2292 ((s->mb_width + 1) * (mb_y + 1) + 1);
2293 int mb_xy = mb_y * s->mb_width;
2294
2295 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2296
2297 s->mv_bounds.mv_min.x = -MARGIN;
2298 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2299
2300 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2301 if (vpX_rac_is_end(&s->c)) {
2302 return AVERROR_INVALIDDATA;
2303 }
2304 if (mb_y == 0)
2305 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2306 DC_PRED * 0x01010101);
2307 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2308 prev_frame && prev_frame->seg_map ?
2309 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2310 s->mv_bounds.mv_min.x -= 64;
2311 s->mv_bounds.mv_max.x -= 64;
2312 }
2313 s->mv_bounds.mv_min.y -= 64;
2314 s->mv_bounds.mv_max.y -= 64;
2315 }
2316 return 0;
2317 }
2318
vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, VP8Frame *prev_frame)2319 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2320 VP8Frame *prev_frame)
2321 {
2322 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2323 }
2324
vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, VP8Frame *prev_frame)2325 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2326 VP8Frame *prev_frame)
2327 {
2328 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2329 }
2330
2331 #if HAVE_THREADS
2332 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2333 do { \
2334 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2335 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2336 pthread_mutex_lock(&otd->lock); \
2337 atomic_store(&td->wait_mb_pos, tmp); \
2338 do { \
2339 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2340 break; \
2341 pthread_cond_wait(&otd->cond, &otd->lock); \
2342 } while (1); \
2343 atomic_store(&td->wait_mb_pos, INT_MAX); \
2344 pthread_mutex_unlock(&otd->lock); \
2345 } \
2346 } while (0)
2347
2348 #define update_pos(td, mb_y, mb_x) \
2349 do { \
2350 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2351 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2352 (num_jobs > 1); \
2353 int is_null = !next_td || !prev_td; \
2354 int pos_check = (is_null) ? 1 : \
2355 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2356 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2357 atomic_store(&td->thread_mb_pos, pos); \
2358 if (sliced_threading && pos_check) { \
2359 pthread_mutex_lock(&td->lock); \
2360 pthread_cond_broadcast(&td->cond); \
2361 pthread_mutex_unlock(&td->lock); \
2362 } \
2363 } while (0)
2364 #else
2365 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2366 #define update_pos(td, mb_y, mb_x) while(0)
2367 #endif
2368
decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr, int is_vp7)2369 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2370 int jobnr, int threadnr, int is_vp7)
2371 {
2372 VP8Context *s = avctx->priv_data;
2373 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2374 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2375 int mb_x, mb_xy = mb_y * s->mb_width;
2376 int num_jobs = s->num_jobs;
2377 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2378 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2379 VP8Macroblock *mb;
2380 uint8_t *dst[3] = {
2381 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2382 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2383 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2384 };
2385
2386 if (vpX_rac_is_end(c))
2387 return AVERROR_INVALIDDATA;
2388
2389 if (mb_y == 0)
2390 prev_td = td;
2391 else
2392 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2393 if (mb_y == s->mb_height - 1)
2394 next_td = td;
2395 else
2396 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2397 if (s->mb_layout == 1)
2398 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2399 else {
2400 // Make sure the previous frame has read its segmentation map,
2401 // if we re-use the same map.
2402 if (prev_frame && s->segmentation.enabled &&
2403 !s->segmentation.update_map)
2404 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2405 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2406 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2407 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2408 }
2409
2410 if (!is_vp7 || mb_y == 0)
2411 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2412
2413 td->mv_bounds.mv_min.x = -MARGIN;
2414 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2415
2416 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2417 if (vpX_rac_is_end(c))
2418 return AVERROR_INVALIDDATA;
2419 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2420 if (prev_td != td) {
2421 if (threadnr != 0) {
2422 check_thread_pos(td, prev_td,
2423 mb_x + (is_vp7 ? 2 : 1),
2424 mb_y - (is_vp7 ? 2 : 1));
2425 } else {
2426 check_thread_pos(td, prev_td,
2427 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2428 mb_y - (is_vp7 ? 2 : 1));
2429 }
2430 }
2431
2432 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2433 s->linesize, 4);
2434 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2435 dst[2] - dst[1], 2);
2436
2437 if (!s->mb_layout)
2438 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2439 prev_frame && prev_frame->seg_map ?
2440 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2441
2442 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2443
2444 if (!mb->skip)
2445 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2446
2447 if (mb->mode <= MODE_I4x4)
2448 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2449 else
2450 inter_predict(s, td, dst, mb, mb_x, mb_y);
2451
2452 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2453
2454 if (!mb->skip) {
2455 idct_mb(s, td, dst, mb);
2456 } else {
2457 AV_ZERO64(td->left_nnz);
2458 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2459
2460 /* Reset DC block predictors if they would exist
2461 * if the mb had coefficients */
2462 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2463 td->left_nnz[8] = 0;
2464 s->top_nnz[mb_x][8] = 0;
2465 }
2466 }
2467
2468 if (s->deblock_filter)
2469 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2470
2471 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2472 if (s->filter.simple)
2473 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2474 NULL, NULL, s->linesize, 0, 1);
2475 else
2476 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2477 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2478 }
2479
2480 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2481
2482 dst[0] += 16;
2483 dst[1] += 8;
2484 dst[2] += 8;
2485 td->mv_bounds.mv_min.x -= 64;
2486 td->mv_bounds.mv_max.x -= 64;
2487
2488 if (mb_x == s->mb_width + 1) {
2489 update_pos(td, mb_y, s->mb_width + 3);
2490 } else {
2491 update_pos(td, mb_y, mb_x);
2492 }
2493 }
2494 return 0;
2495 }
2496
vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2497 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2498 int jobnr, int threadnr)
2499 {
2500 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2501 }
2502
vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2503 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2504 int jobnr, int threadnr)
2505 {
2506 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2507 }
2508
filter_mb_row(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr, int is_vp7)2509 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2510 int jobnr, int threadnr, int is_vp7)
2511 {
2512 VP8Context *s = avctx->priv_data;
2513 VP8ThreadData *td = &s->thread_data[threadnr];
2514 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2515 AVFrame *curframe = s->curframe->tf.f;
2516 VP8Macroblock *mb;
2517 VP8ThreadData *prev_td, *next_td;
2518 uint8_t *dst[3] = {
2519 curframe->data[0] + 16 * mb_y * s->linesize,
2520 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2521 curframe->data[2] + 8 * mb_y * s->uvlinesize
2522 };
2523
2524 if (s->mb_layout == 1)
2525 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2526 else
2527 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2528
2529 if (mb_y == 0)
2530 prev_td = td;
2531 else
2532 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2533 if (mb_y == s->mb_height - 1)
2534 next_td = td;
2535 else
2536 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2537
2538 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2539 VP8FilterStrength *f = &td->filter_strength[mb_x];
2540 if (prev_td != td)
2541 check_thread_pos(td, prev_td,
2542 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2543 if (next_td != td)
2544 if (next_td != &s->thread_data[0])
2545 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2546
2547 if (num_jobs == 1) {
2548 if (s->filter.simple)
2549 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2550 NULL, NULL, s->linesize, 0, 1);
2551 else
2552 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2553 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2554 }
2555
2556 if (s->filter.simple)
2557 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2558 else
2559 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2560 dst[0] += 16;
2561 dst[1] += 8;
2562 dst[2] += 8;
2563
2564 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2565 }
2566 }
2567
vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2568 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2569 int jobnr, int threadnr)
2570 {
2571 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2572 }
2573
vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2574 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2575 int jobnr, int threadnr)
2576 {
2577 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2578 }
2579
2580 static av_always_inline
vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr, int is_vp7)2581 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2582 int threadnr, int is_vp7)
2583 {
2584 VP8Context *s = avctx->priv_data;
2585 VP8ThreadData *td = &s->thread_data[jobnr];
2586 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2587 VP8Frame *curframe = s->curframe;
2588 int mb_y, num_jobs = s->num_jobs;
2589 int ret;
2590
2591 td->thread_nr = threadnr;
2592 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2593 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2594 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2595 atomic_store(&td->thread_mb_pos, mb_y << 16);
2596 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2597 if (ret < 0) {
2598 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2599 return ret;
2600 }
2601 if (s->deblock_filter)
2602 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2603 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2604
2605 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2606 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2607
2608 if (avctx->active_thread_type == FF_THREAD_FRAME)
2609 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2610 }
2611
2612 return 0;
2613 }
2614
vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2615 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2616 int jobnr, int threadnr)
2617 {
2618 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2619 }
2620
vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)2621 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2622 int jobnr, int threadnr)
2623 {
2624 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2625 }
2626
2627 static av_always_inline
vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame, const AVPacket *avpkt, int is_vp7)2628 int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
2629 const AVPacket *avpkt, int is_vp7)
2630 {
2631 VP8Context *s = avctx->priv_data;
2632 int ret, i, referenced, num_jobs;
2633 enum AVDiscard skip_thresh;
2634 VP8Frame *av_uninit(curframe), *prev_frame;
2635
2636 if (is_vp7)
2637 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2638 else
2639 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2640
2641 if (ret < 0)
2642 goto err;
2643
2644 if (s->actually_webp) {
2645 // avctx->pix_fmt already set in caller.
2646 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2647 s->pix_fmt = get_pixel_format(s);
2648 if (s->pix_fmt < 0) {
2649 ret = AVERROR(EINVAL);
2650 goto err;
2651 }
2652 avctx->pix_fmt = s->pix_fmt;
2653 }
2654
2655 prev_frame = s->framep[VP56_FRAME_CURRENT];
2656
2657 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2658 s->update_altref == VP56_FRAME_CURRENT;
2659
2660 skip_thresh = !referenced ? AVDISCARD_NONREF
2661 : !s->keyframe ? AVDISCARD_NONKEY
2662 : AVDISCARD_ALL;
2663
2664 if (avctx->skip_frame >= skip_thresh) {
2665 s->invisible = 1;
2666 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2667 goto skip_decode;
2668 }
2669 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2670
2671 // release no longer referenced frames
2672 for (i = 0; i < 5; i++)
2673 if (s->frames[i].tf.f->buf[0] &&
2674 &s->frames[i] != prev_frame &&
2675 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2676 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2677 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2678 vp8_release_frame(s, &s->frames[i]);
2679
2680 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2681
2682 if (!s->colorspace)
2683 avctx->colorspace = AVCOL_SPC_BT470BG;
2684 if (s->fullrange)
2685 avctx->color_range = AVCOL_RANGE_JPEG;
2686 else
2687 avctx->color_range = AVCOL_RANGE_MPEG;
2688
2689 /* Given that arithmetic probabilities are updated every frame, it's quite
2690 * likely that the values we have on a random interframe are complete
2691 * junk if we didn't start decode on a keyframe. So just don't display
2692 * anything rather than junk. */
2693 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2694 !s->framep[VP56_FRAME_GOLDEN] ||
2695 !s->framep[VP56_FRAME_GOLDEN2])) {
2696 av_log(avctx, AV_LOG_WARNING,
2697 "Discarding interframe without a prior keyframe!\n");
2698 ret = AVERROR_INVALIDDATA;
2699 goto err;
2700 }
2701
2702 curframe->tf.f->key_frame = s->keyframe;
2703 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2704 : AV_PICTURE_TYPE_P;
2705 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2706 goto err;
2707
2708 // check if golden and altref are swapped
2709 if (s->update_altref != VP56_FRAME_NONE)
2710 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2711 else
2712 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2713
2714 if (s->update_golden != VP56_FRAME_NONE)
2715 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2716 else
2717 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2718
2719 if (s->update_last)
2720 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2721 else
2722 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2723
2724 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2725
2726 if (ffcodec(avctx->codec)->update_thread_context)
2727 ff_thread_finish_setup(avctx);
2728
2729 if (avctx->hwaccel) {
2730 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2731 if (ret < 0)
2732 goto err;
2733
2734 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2735 if (ret < 0)
2736 goto err;
2737
2738 ret = avctx->hwaccel->end_frame(avctx);
2739 if (ret < 0)
2740 goto err;
2741
2742 } else {
2743 s->linesize = curframe->tf.f->linesize[0];
2744 s->uvlinesize = curframe->tf.f->linesize[1];
2745
2746 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2747 /* Zero macroblock structures for top/top-left prediction
2748 * from outside the frame. */
2749 if (!s->mb_layout)
2750 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2751 (s->mb_width + 1) * sizeof(*s->macroblocks));
2752 if (!s->mb_layout && s->keyframe)
2753 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2754
2755 memset(s->ref_count, 0, sizeof(s->ref_count));
2756
2757 if (s->mb_layout == 1) {
2758 // Make sure the previous frame has read its segmentation map,
2759 // if we re-use the same map.
2760 if (prev_frame && s->segmentation.enabled &&
2761 !s->segmentation.update_map)
2762 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2763 if (is_vp7)
2764 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2765 else
2766 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2767 if (ret < 0)
2768 goto err;
2769 }
2770
2771 if (avctx->active_thread_type == FF_THREAD_FRAME)
2772 num_jobs = 1;
2773 else
2774 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2775 s->num_jobs = num_jobs;
2776 s->curframe = curframe;
2777 s->prev_frame = prev_frame;
2778 s->mv_bounds.mv_min.y = -MARGIN;
2779 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2780 for (i = 0; i < MAX_THREADS; i++) {
2781 VP8ThreadData *td = &s->thread_data[i];
2782 atomic_init(&td->thread_mb_pos, 0);
2783 atomic_init(&td->wait_mb_pos, INT_MAX);
2784 }
2785 if (is_vp7)
2786 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2787 num_jobs);
2788 else
2789 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2790 num_jobs);
2791 }
2792
2793 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2794 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2795
2796 skip_decode:
2797 // if future frames don't use the updated probabilities,
2798 // reset them to the values we saved
2799 if (!s->update_probabilities)
2800 s->prob[0] = s->prob[1];
2801
2802 if (!s->invisible) {
2803 if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0)
2804 return ret;
2805 *got_frame = 1;
2806 }
2807
2808 return avpkt->size;
2809 err:
2810 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2811 return ret;
2812 }
2813
ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *avpkt)2814 int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2815 int *got_frame, AVPacket *avpkt)
2816 {
2817 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8);
2818 }
2819
2820 #if CONFIG_VP7_DECODER
vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *avpkt)2821 static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2822 int *got_frame, AVPacket *avpkt)
2823 {
2824 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7);
2825 }
2826 #endif /* CONFIG_VP7_DECODER */
2827
ff_vp8_decode_free(AVCodecContext *avctx)2828 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2829 {
2830 VP8Context *s = avctx->priv_data;
2831 int i;
2832
2833 vp8_decode_flush_impl(avctx, 1);
2834 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2835 av_frame_free(&s->frames[i].tf.f);
2836
2837 return 0;
2838 }
2839
vp8_init_frames(VP8Context *s)2840 static av_cold int vp8_init_frames(VP8Context *s)
2841 {
2842 int i;
2843 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2844 s->frames[i].tf.f = av_frame_alloc();
2845 if (!s->frames[i].tf.f)
2846 return AVERROR(ENOMEM);
2847 }
2848 return 0;
2849 }
2850
2851 static av_always_inline
vp78_decode_init(AVCodecContext *avctx, int is_vp7)2852 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2853 {
2854 VP8Context *s = avctx->priv_data;
2855 int ret;
2856
2857 s->avctx = avctx;
2858 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2859 s->pix_fmt = AV_PIX_FMT_NONE;
2860 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2861
2862 ff_videodsp_init(&s->vdsp, 8);
2863
2864 ff_vp78dsp_init(&s->vp8dsp);
2865 if (CONFIG_VP7_DECODER && is_vp7) {
2866 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2867 ff_vp7dsp_init(&s->vp8dsp);
2868 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2869 s->filter_mb_row = vp7_filter_mb_row;
2870 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2871 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2872 ff_vp8dsp_init(&s->vp8dsp);
2873 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2874 s->filter_mb_row = vp8_filter_mb_row;
2875 }
2876
2877 /* does not change for VP8 */
2878 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2879
2880 if ((ret = vp8_init_frames(s)) < 0) {
2881 ff_vp8_decode_free(avctx);
2882 return ret;
2883 }
2884
2885 return 0;
2886 }
2887
2888 #if CONFIG_VP7_DECODER
vp7_decode_init(AVCodecContext *avctx)2889 static int vp7_decode_init(AVCodecContext *avctx)
2890 {
2891 return vp78_decode_init(avctx, IS_VP7);
2892 }
2893 #endif /* CONFIG_VP7_DECODER */
2894
ff_vp8_decode_init(AVCodecContext *avctx)2895 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2896 {
2897 return vp78_decode_init(avctx, IS_VP8);
2898 }
2899
2900 #if CONFIG_VP8_DECODER
2901 #if HAVE_THREADS
2902 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2903
vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)2904 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2905 const AVCodecContext *src)
2906 {
2907 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2908 int i;
2909
2910 if (s->macroblocks_base &&
2911 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2912 free_buffers(s);
2913 s->mb_width = s_src->mb_width;
2914 s->mb_height = s_src->mb_height;
2915 }
2916
2917 s->pix_fmt = s_src->pix_fmt;
2918 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2919 s->segmentation = s_src->segmentation;
2920 s->lf_delta = s_src->lf_delta;
2921 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2922
2923 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2924 if (s_src->frames[i].tf.f->buf[0]) {
2925 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2926 if (ret < 0)
2927 return ret;
2928 }
2929 }
2930
2931 s->framep[0] = REBASE(s_src->next_framep[0]);
2932 s->framep[1] = REBASE(s_src->next_framep[1]);
2933 s->framep[2] = REBASE(s_src->next_framep[2]);
2934 s->framep[3] = REBASE(s_src->next_framep[3]);
2935
2936 return 0;
2937 }
2938 #endif /* HAVE_THREADS */
2939 #endif /* CONFIG_VP8_DECODER */
2940
2941 #if CONFIG_VP7_DECODER
2942 const FFCodec ff_vp7_decoder = {
2943 .p.name = "vp7",
2944 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2945 .p.type = AVMEDIA_TYPE_VIDEO,
2946 .p.id = AV_CODEC_ID_VP7,
2947 .priv_data_size = sizeof(VP8Context),
2948 .init = vp7_decode_init,
2949 .close = ff_vp8_decode_free,
2950 FF_CODEC_DECODE_CB(vp7_decode_frame),
2951 .p.capabilities = AV_CODEC_CAP_DR1,
2952 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
2953 .flush = vp8_decode_flush,
2954 };
2955 #endif /* CONFIG_VP7_DECODER */
2956
2957 #if CONFIG_VP8_DECODER
2958 const FFCodec ff_vp8_decoder = {
2959 .p.name = "vp8",
2960 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2961 .p.type = AVMEDIA_TYPE_VIDEO,
2962 .p.id = AV_CODEC_ID_VP8,
2963 .priv_data_size = sizeof(VP8Context),
2964 .init = ff_vp8_decode_init,
2965 .close = ff_vp8_decode_free,
2966 FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
2967 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2968 AV_CODEC_CAP_SLICE_THREADS,
2969 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
2970 FF_CODEC_CAP_ALLOCATE_PROGRESS,
2971 .flush = vp8_decode_flush,
2972 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2973 .hw_configs = (const AVCodecHWConfigInternal *const []) {
2974 #if CONFIG_VP8_VAAPI_HWACCEL
2975 HWACCEL_VAAPI(vp8),
2976 #endif
2977 #if CONFIG_VP8_NVDEC_HWACCEL
2978 HWACCEL_NVDEC(vp8),
2979 #endif
2980 NULL
2981 },
2982 };
2983 #endif /* CONFIG_VP7_DECODER */
2984