1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config_components.h"
25
26 #include "avcodec.h"
27 #include "codec_internal.h"
28 #include "get_bits.h"
29 #include "hwconfig.h"
30 #include "internal.h"
31 #include "profiles.h"
32 #include "thread.h"
33 #include "threadframe.h"
34 #include "pthread_internal.h"
35
36 #include "videodsp.h"
37 #include "vp56.h"
38 #include "vp9.h"
39 #include "vp9data.h"
40 #include "vp9dec.h"
41 #include "libavutil/avassert.h"
42 #include "libavutil/pixdesc.h"
43 #include "libavutil/video_enc_params.h"
44
45 #define VP9_SYNCCODE 0x498342
46
47 #if HAVE_THREADS
48 DEFINE_OFFSET_ARRAY(VP9Context, vp9_context, pthread_init_cnt,
49 (offsetof(VP9Context, progress_mutex)),
50 (offsetof(VP9Context, progress_cond)));
51
vp9_alloc_entries(AVCodecContext *avctx, int n)52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
54 int i;
55
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 if (s->entries)
58 av_freep(&s->entries);
59
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
61 if (!s->entries)
62 return AVERROR(ENOMEM);
63
64 for (i = 0; i < n; i++)
65 atomic_init(&s->entries[i], 0);
66 }
67 return 0;
68 }
69
vp9_report_tile_progress(VP9Context *s, int field, int n)70 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
71 pthread_mutex_lock(&s->progress_mutex);
72 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
73 pthread_cond_signal(&s->progress_cond);
74 pthread_mutex_unlock(&s->progress_mutex);
75 }
76
vp9_await_tile_progress(VP9Context *s, int field, int n)77 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
78 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
79 return;
80
81 pthread_mutex_lock(&s->progress_mutex);
82 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
83 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
84 pthread_mutex_unlock(&s->progress_mutex);
85 }
86 #else
vp9_alloc_entries(AVCodecContext *avctx, int n)87 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
88 #endif
89
vp9_tile_data_free(VP9TileData *td)90 static void vp9_tile_data_free(VP9TileData *td)
91 {
92 av_freep(&td->b_base);
93 av_freep(&td->block_base);
94 av_freep(&td->block_structure);
95 }
96
vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)97 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 {
99 ff_thread_release_ext_buffer(avctx, &f->tf);
100 av_buffer_unref(&f->extradata);
101 av_buffer_unref(&f->hwaccel_priv_buf);
102 f->segmentation_map = NULL;
103 f->hwaccel_picture_private = NULL;
104 }
105
vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)106 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 {
108 VP9Context *s = avctx->priv_data;
109 int ret, sz;
110
111 ret = ff_thread_get_ext_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
112 if (ret < 0)
113 return ret;
114
115 sz = 64 * s->sb_cols * s->sb_rows;
116 if (sz != s->frame_extradata_pool_size) {
117 av_buffer_pool_uninit(&s->frame_extradata_pool);
118 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
119 if (!s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
121 goto fail;
122 }
123 s->frame_extradata_pool_size = sz;
124 }
125 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
126 if (!f->extradata) {
127 goto fail;
128 }
129 memset(f->extradata->data, 0, f->extradata->size);
130
131 f->segmentation_map = f->extradata->data;
132 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
133
134 if (avctx->hwaccel) {
135 const AVHWAccel *hwaccel = avctx->hwaccel;
136 av_assert0(!f->hwaccel_picture_private);
137 if (hwaccel->frame_priv_data_size) {
138 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
139 if (!f->hwaccel_priv_buf)
140 goto fail;
141 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
142 }
143 }
144
145 return 0;
146
147 fail:
148 vp9_frame_unref(avctx, f);
149 return AVERROR(ENOMEM);
150 }
151
vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)152 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
153 {
154 int ret;
155
156 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
157 if (ret < 0)
158 return ret;
159
160 dst->extradata = av_buffer_ref(src->extradata);
161 if (!dst->extradata)
162 goto fail;
163
164 dst->segmentation_map = src->segmentation_map;
165 dst->mv = src->mv;
166 dst->uses_2pass = src->uses_2pass;
167
168 if (src->hwaccel_picture_private) {
169 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
170 if (!dst->hwaccel_priv_buf)
171 goto fail;
172 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
173 }
174
175 return 0;
176
177 fail:
178 vp9_frame_unref(avctx, dst);
179 return AVERROR(ENOMEM);
180 }
181
update_size(AVCodecContext *avctx, int w, int h)182 static int update_size(AVCodecContext *avctx, int w, int h)
183 {
184 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
185 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
186 CONFIG_VP9_NVDEC_HWACCEL + \
187 CONFIG_VP9_VAAPI_HWACCEL + \
188 CONFIG_VP9_VDPAU_HWACCEL + \
189 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
190 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
191 VP9Context *s = avctx->priv_data;
192 uint8_t *p;
193 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
194 int lflvl_len, i;
195
196 av_assert0(w > 0 && h > 0);
197
198 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
199 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
200 return ret;
201
202 switch (s->pix_fmt) {
203 case AV_PIX_FMT_YUV420P:
204 case AV_PIX_FMT_YUV420P10:
205 #if CONFIG_VP9_DXVA2_HWACCEL
206 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
207 #endif
208 #if CONFIG_VP9_D3D11VA_HWACCEL
209 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
210 *fmtp++ = AV_PIX_FMT_D3D11;
211 #endif
212 #if CONFIG_VP9_NVDEC_HWACCEL
213 *fmtp++ = AV_PIX_FMT_CUDA;
214 #endif
215 #if CONFIG_VP9_VAAPI_HWACCEL
216 *fmtp++ = AV_PIX_FMT_VAAPI;
217 #endif
218 #if CONFIG_VP9_VDPAU_HWACCEL
219 *fmtp++ = AV_PIX_FMT_VDPAU;
220 #endif
221 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
222 *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
223 #endif
224 break;
225 case AV_PIX_FMT_YUV420P12:
226 #if CONFIG_VP9_NVDEC_HWACCEL
227 *fmtp++ = AV_PIX_FMT_CUDA;
228 #endif
229 #if CONFIG_VP9_VAAPI_HWACCEL
230 *fmtp++ = AV_PIX_FMT_VAAPI;
231 #endif
232 #if CONFIG_VP9_VDPAU_HWACCEL
233 *fmtp++ = AV_PIX_FMT_VDPAU;
234 #endif
235 break;
236 }
237
238 *fmtp++ = s->pix_fmt;
239 *fmtp = AV_PIX_FMT_NONE;
240
241 ret = ff_thread_get_format(avctx, pix_fmts);
242 if (ret < 0)
243 return ret;
244
245 avctx->pix_fmt = ret;
246 s->gf_fmt = s->pix_fmt;
247 s->w = w;
248 s->h = h;
249 }
250
251 cols = (w + 7) >> 3;
252 rows = (h + 7) >> 3;
253
254 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
255 return 0;
256
257 s->last_fmt = s->pix_fmt;
258 s->sb_cols = (w + 63) >> 6;
259 s->sb_rows = (h + 63) >> 6;
260 s->cols = (w + 7) >> 3;
261 s->rows = (h + 7) >> 3;
262 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
263
264 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
265 av_freep(&s->intra_pred_data[0]);
266 // FIXME we slightly over-allocate here for subsampled chroma, but a little
267 // bit of padding shouldn't affect performance...
268 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
269 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
270 if (!p)
271 return AVERROR(ENOMEM);
272 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
273 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
274 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
275 assign(s->above_y_nnz_ctx, uint8_t *, 16);
276 assign(s->above_mode_ctx, uint8_t *, 16);
277 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
278 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
279 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
280 assign(s->above_partition_ctx, uint8_t *, 8);
281 assign(s->above_skip_ctx, uint8_t *, 8);
282 assign(s->above_txfm_ctx, uint8_t *, 8);
283 assign(s->above_segpred_ctx, uint8_t *, 8);
284 assign(s->above_intra_ctx, uint8_t *, 8);
285 assign(s->above_comp_ctx, uint8_t *, 8);
286 assign(s->above_ref_ctx, uint8_t *, 8);
287 assign(s->above_filter_ctx, uint8_t *, 8);
288 assign(s->lflvl, VP9Filter *, lflvl_len);
289 #undef assign
290
291 if (s->td) {
292 for (i = 0; i < s->active_tile_cols; i++)
293 vp9_tile_data_free(&s->td[i]);
294 }
295
296 if (s->s.h.bpp != s->last_bpp) {
297 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
298 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
299 s->last_bpp = s->s.h.bpp;
300 }
301
302 return 0;
303 }
304
update_block_buffers(AVCodecContext *avctx)305 static int update_block_buffers(AVCodecContext *avctx)
306 {
307 int i;
308 VP9Context *s = avctx->priv_data;
309 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
310 VP9TileData *td = &s->td[0];
311
312 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
313 return 0;
314
315 vp9_tile_data_free(td);
316 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
317 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
318 if (s->s.frames[CUR_FRAME].uses_2pass) {
319 int sbs = s->sb_cols * s->sb_rows;
320
321 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
322 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
323 16 * 16 + 2 * chroma_eobs) * sbs);
324 if (!td->b_base || !td->block_base)
325 return AVERROR(ENOMEM);
326 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
327 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
328 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
329 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
330 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
331
332 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
333 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
334 if (!td->block_structure)
335 return AVERROR(ENOMEM);
336 }
337 } else {
338 for (i = 1; i < s->active_tile_cols; i++)
339 vp9_tile_data_free(&s->td[i]);
340
341 for (i = 0; i < s->active_tile_cols; i++) {
342 s->td[i].b_base = av_malloc(sizeof(VP9Block));
343 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
344 16 * 16 + 2 * chroma_eobs);
345 if (!s->td[i].b_base || !s->td[i].block_base)
346 return AVERROR(ENOMEM);
347 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
348 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
349 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
350 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
351 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
352
353 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
354 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
355 if (!s->td[i].block_structure)
356 return AVERROR(ENOMEM);
357 }
358 }
359 }
360 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
361
362 return 0;
363 }
364
365 // The sign bit is at the end, not the start, of a bit sequence
get_sbits_inv(GetBitContext *gb, int n)366 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
367 {
368 int v = get_bits(gb, n);
369 return get_bits1(gb) ? -v : v;
370 }
371
inv_recenter_nonneg(int v, int m)372 static av_always_inline int inv_recenter_nonneg(int v, int m)
373 {
374 if (v > 2 * m)
375 return v;
376 if (v & 1)
377 return m - ((v + 1) >> 1);
378 return m + (v >> 1);
379 }
380
381 // differential forward probability updates
update_prob(VP56RangeCoder *c, int p)382 static int update_prob(VP56RangeCoder *c, int p)
383 {
384 static const uint8_t inv_map_table[255] = {
385 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
386 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
387 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
388 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
389 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
390 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
391 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
392 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
393 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
394 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
395 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
396 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
397 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
398 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
399 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
400 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
401 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
402 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
403 252, 253, 253,
404 };
405 int d;
406
407 /* This code is trying to do a differential probability update. For a
408 * current probability A in the range [1, 255], the difference to a new
409 * probability of any value can be expressed differentially as 1-A, 255-A
410 * where some part of this (absolute range) exists both in positive as
411 * well as the negative part, whereas another part only exists in one
412 * half. We're trying to code this shared part differentially, i.e.
413 * times two where the value of the lowest bit specifies the sign, and
414 * the single part is then coded on top of this. This absolute difference
415 * then again has a value of [0, 254], but a bigger value in this range
416 * indicates that we're further away from the original value A, so we
417 * can code this as a VLC code, since higher values are increasingly
418 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
419 * updates vs. the 'fine, exact' updates further down the range, which
420 * adds one extra dimension to this differential update model. */
421
422 if (!vp8_rac_get(c)) {
423 d = vp8_rac_get_uint(c, 4) + 0;
424 } else if (!vp8_rac_get(c)) {
425 d = vp8_rac_get_uint(c, 4) + 16;
426 } else if (!vp8_rac_get(c)) {
427 d = vp8_rac_get_uint(c, 5) + 32;
428 } else {
429 d = vp8_rac_get_uint(c, 7);
430 if (d >= 65)
431 d = (d << 1) - 65 + vp8_rac_get(c);
432 d += 64;
433 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
434 }
435
436 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
437 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
438 }
439
read_colorspace_details(AVCodecContext *avctx)440 static int read_colorspace_details(AVCodecContext *avctx)
441 {
442 static const enum AVColorSpace colorspaces[8] = {
443 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
444 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
445 };
446 VP9Context *s = avctx->priv_data;
447 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
448
449 s->bpp_index = bits;
450 s->s.h.bpp = 8 + bits * 2;
451 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
452 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
453 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
454 static const enum AVPixelFormat pix_fmt_rgb[3] = {
455 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
456 };
457 s->ss_h = s->ss_v = 0;
458 avctx->color_range = AVCOL_RANGE_JPEG;
459 s->pix_fmt = pix_fmt_rgb[bits];
460 if (avctx->profile & 1) {
461 if (get_bits1(&s->gb)) {
462 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
463 return AVERROR_INVALIDDATA;
464 }
465 } else {
466 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
467 avctx->profile);
468 return AVERROR_INVALIDDATA;
469 }
470 } else {
471 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
472 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
473 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
474 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
475 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
476 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
477 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
478 };
479 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
480 if (avctx->profile & 1) {
481 s->ss_h = get_bits1(&s->gb);
482 s->ss_v = get_bits1(&s->gb);
483 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
484 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
485 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
486 avctx->profile);
487 return AVERROR_INVALIDDATA;
488 } else if (get_bits1(&s->gb)) {
489 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
490 avctx->profile);
491 return AVERROR_INVALIDDATA;
492 }
493 } else {
494 s->ss_h = s->ss_v = 1;
495 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
496 }
497 }
498
499 return 0;
500 }
501
decode_frame_header(AVCodecContext *avctx, const uint8_t *data, int size, int *ref)502 static int decode_frame_header(AVCodecContext *avctx,
503 const uint8_t *data, int size, int *ref)
504 {
505 VP9Context *s = avctx->priv_data;
506 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
507 int last_invisible;
508 const uint8_t *data2;
509
510 /* general header */
511 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
512 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
513 return ret;
514 }
515 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
516 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
517 return AVERROR_INVALIDDATA;
518 }
519 avctx->profile = get_bits1(&s->gb);
520 avctx->profile |= get_bits1(&s->gb) << 1;
521 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
522 if (avctx->profile > 3) {
523 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
524 return AVERROR_INVALIDDATA;
525 }
526 s->s.h.profile = avctx->profile;
527 if (get_bits1(&s->gb)) {
528 *ref = get_bits(&s->gb, 3);
529 return 0;
530 }
531
532 s->last_keyframe = s->s.h.keyframe;
533 s->s.h.keyframe = !get_bits1(&s->gb);
534
535 last_invisible = s->s.h.invisible;
536 s->s.h.invisible = !get_bits1(&s->gb);
537 s->s.h.errorres = get_bits1(&s->gb);
538 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
539
540 if (s->s.h.keyframe) {
541 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
542 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
543 return AVERROR_INVALIDDATA;
544 }
545 if ((ret = read_colorspace_details(avctx)) < 0)
546 return ret;
547 // for profile 1, here follows the subsampling bits
548 s->s.h.refreshrefmask = 0xff;
549 w = get_bits(&s->gb, 16) + 1;
550 h = get_bits(&s->gb, 16) + 1;
551 if (get_bits1(&s->gb)) // display size
552 skip_bits(&s->gb, 32);
553 } else {
554 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
555 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
556 if (s->s.h.intraonly) {
557 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
558 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
559 return AVERROR_INVALIDDATA;
560 }
561 if (avctx->profile >= 1) {
562 if ((ret = read_colorspace_details(avctx)) < 0)
563 return ret;
564 } else {
565 s->ss_h = s->ss_v = 1;
566 s->s.h.bpp = 8;
567 s->bpp_index = 0;
568 s->bytesperpixel = 1;
569 s->pix_fmt = AV_PIX_FMT_YUV420P;
570 avctx->colorspace = AVCOL_SPC_BT470BG;
571 avctx->color_range = AVCOL_RANGE_MPEG;
572 }
573 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
574 w = get_bits(&s->gb, 16) + 1;
575 h = get_bits(&s->gb, 16) + 1;
576 if (get_bits1(&s->gb)) // display size
577 skip_bits(&s->gb, 32);
578 } else {
579 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
580 s->s.h.refidx[0] = get_bits(&s->gb, 3);
581 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
582 s->s.h.refidx[1] = get_bits(&s->gb, 3);
583 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
584 s->s.h.refidx[2] = get_bits(&s->gb, 3);
585 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
586 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
587 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
588 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
589 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
590 return AVERROR_INVALIDDATA;
591 }
592 if (get_bits1(&s->gb)) {
593 w = s->s.refs[s->s.h.refidx[0]].f->width;
594 h = s->s.refs[s->s.h.refidx[0]].f->height;
595 } else if (get_bits1(&s->gb)) {
596 w = s->s.refs[s->s.h.refidx[1]].f->width;
597 h = s->s.refs[s->s.h.refidx[1]].f->height;
598 } else if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[2]].f->width;
600 h = s->s.refs[s->s.h.refidx[2]].f->height;
601 } else {
602 w = get_bits(&s->gb, 16) + 1;
603 h = get_bits(&s->gb, 16) + 1;
604 }
605 // Note that in this code, "CUR_FRAME" is actually before we
606 // have formally allocated a frame, and thus actually represents
607 // the _last_ frame
608 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
609 s->s.frames[CUR_FRAME].tf.f->height == h;
610 if (get_bits1(&s->gb)) // display size
611 skip_bits(&s->gb, 32);
612 s->s.h.highprecisionmvs = get_bits1(&s->gb);
613 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
614 get_bits(&s->gb, 2);
615 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
616 s->s.h.signbias[0] != s->s.h.signbias[2];
617 if (s->s.h.allowcompinter) {
618 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
619 s->s.h.fixcompref = 2;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 1;
622 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
623 s->s.h.fixcompref = 1;
624 s->s.h.varcompref[0] = 0;
625 s->s.h.varcompref[1] = 2;
626 } else {
627 s->s.h.fixcompref = 0;
628 s->s.h.varcompref[0] = 1;
629 s->s.h.varcompref[1] = 2;
630 }
631 }
632 }
633 }
634 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
635 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
636 s->s.h.framectxid = c = get_bits(&s->gb, 2);
637 if (s->s.h.keyframe || s->s.h.intraonly)
638 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
639
640 /* loopfilter header data */
641 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
642 // reset loopfilter defaults
643 s->s.h.lf_delta.ref[0] = 1;
644 s->s.h.lf_delta.ref[1] = 0;
645 s->s.h.lf_delta.ref[2] = -1;
646 s->s.h.lf_delta.ref[3] = -1;
647 s->s.h.lf_delta.mode[0] = 0;
648 s->s.h.lf_delta.mode[1] = 0;
649 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
650 }
651 s->s.h.filter.level = get_bits(&s->gb, 6);
652 sharp = get_bits(&s->gb, 3);
653 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
654 // the old cache values since they are still valid
655 if (s->s.h.filter.sharpness != sharp) {
656 for (i = 1; i <= 63; i++) {
657 int limit = i;
658
659 if (sharp > 0) {
660 limit >>= (sharp + 3) >> 2;
661 limit = FFMIN(limit, 9 - sharp);
662 }
663 limit = FFMAX(limit, 1);
664
665 s->filter_lut.lim_lut[i] = limit;
666 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
667 }
668 }
669 s->s.h.filter.sharpness = sharp;
670 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
671 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
672 for (i = 0; i < 4; i++)
673 if (get_bits1(&s->gb))
674 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
675 for (i = 0; i < 2; i++)
676 if (get_bits1(&s->gb))
677 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
678 }
679 }
680
681 /* quantization header data */
682 s->s.h.yac_qi = get_bits(&s->gb, 8);
683 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
684 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
685 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
686 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
687 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
688 if (s->s.h.lossless)
689 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
690
691 /* segmentation header info */
692 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
693 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
694 for (i = 0; i < 7; i++)
695 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
696 get_bits(&s->gb, 8) : 255;
697 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
698 for (i = 0; i < 3; i++)
699 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
700 get_bits(&s->gb, 8) : 255;
701 }
702
703 if (get_bits1(&s->gb)) {
704 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
705 for (i = 0; i < 8; i++) {
706 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
707 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
708 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
709 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
710 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
711 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
712 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
713 }
714 }
715 }
716
717 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
718 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
719 int qyac, qydc, quvac, quvdc, lflvl, sh;
720
721 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
722 if (s->s.h.segmentation.absolute_vals)
723 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
724 else
725 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
726 } else {
727 qyac = s->s.h.yac_qi;
728 }
729 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
730 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
731 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
732 qyac = av_clip_uintp2(qyac, 8);
733
734 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
735 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
736 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
737 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
738
739 sh = s->s.h.filter.level >= 32;
740 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
741 if (s->s.h.segmentation.absolute_vals)
742 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
743 else
744 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
745 } else {
746 lflvl = s->s.h.filter.level;
747 }
748 if (s->s.h.lf_delta.enabled) {
749 s->s.h.segmentation.feat[i].lflvl[0][0] =
750 s->s.h.segmentation.feat[i].lflvl[0][1] =
751 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
752 for (j = 1; j < 4; j++) {
753 s->s.h.segmentation.feat[i].lflvl[j][0] =
754 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
755 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
756 s->s.h.segmentation.feat[i].lflvl[j][1] =
757 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
759 }
760 } else {
761 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
762 sizeof(s->s.h.segmentation.feat[i].lflvl));
763 }
764 }
765
766 /* tiling info */
767 if ((ret = update_size(avctx, w, h)) < 0) {
768 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
769 w, h, s->pix_fmt);
770 return ret;
771 }
772 for (s->s.h.tiling.log2_tile_cols = 0;
773 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
774 s->s.h.tiling.log2_tile_cols++) ;
775 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
776 max = FFMAX(0, max - 1);
777 while (max > s->s.h.tiling.log2_tile_cols) {
778 if (get_bits1(&s->gb))
779 s->s.h.tiling.log2_tile_cols++;
780 else
781 break;
782 }
783 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
784 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
785 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
786 int n_range_coders;
787 VP56RangeCoder *rc;
788
789 if (s->td) {
790 for (i = 0; i < s->active_tile_cols; i++)
791 vp9_tile_data_free(&s->td[i]);
792 av_freep(&s->td);
793 }
794
795 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
796 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
797 s->s.h.tiling.tile_cols : 1;
798 vp9_alloc_entries(avctx, s->sb_rows);
799 if (avctx->active_thread_type == FF_THREAD_SLICE) {
800 n_range_coders = 4; // max_tile_rows
801 } else {
802 n_range_coders = s->s.h.tiling.tile_cols;
803 }
804 s->td = av_calloc(s->active_tile_cols, sizeof(VP9TileData) +
805 n_range_coders * sizeof(VP56RangeCoder));
806 if (!s->td)
807 return AVERROR(ENOMEM);
808 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
809 for (i = 0; i < s->active_tile_cols; i++) {
810 s->td[i].s = s;
811 s->td[i].c_b = rc;
812 rc += n_range_coders;
813 }
814 }
815
816 /* check reference frames */
817 if (!s->s.h.keyframe && !s->s.h.intraonly) {
818 int valid_ref_frame = 0;
819 for (i = 0; i < 3; i++) {
820 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
821 int refw = ref->width, refh = ref->height;
822
823 if (ref->format != avctx->pix_fmt) {
824 av_log(avctx, AV_LOG_ERROR,
825 "Ref pixfmt (%s) did not match current frame (%s)",
826 av_get_pix_fmt_name(ref->format),
827 av_get_pix_fmt_name(avctx->pix_fmt));
828 return AVERROR_INVALIDDATA;
829 } else if (refw == w && refh == h) {
830 s->mvscale[i][0] = s->mvscale[i][1] = 0;
831 } else {
832 /* Check to make sure at least one of frames that */
833 /* this frame references has valid dimensions */
834 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
835 av_log(avctx, AV_LOG_WARNING,
836 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
837 refw, refh, w, h);
838 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
839 continue;
840 }
841 s->mvscale[i][0] = (refw << 14) / w;
842 s->mvscale[i][1] = (refh << 14) / h;
843 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
844 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
845 }
846 valid_ref_frame++;
847 }
848 if (!valid_ref_frame) {
849 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
850 return AVERROR_INVALIDDATA;
851 }
852 }
853
854 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
855 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
856 s->prob_ctx[3].p = ff_vp9_default_probs;
857 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
858 sizeof(ff_vp9_default_coef_probs));
859 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
860 sizeof(ff_vp9_default_coef_probs));
861 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
862 sizeof(ff_vp9_default_coef_probs));
863 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
864 sizeof(ff_vp9_default_coef_probs));
865 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
866 s->prob_ctx[c].p = ff_vp9_default_probs;
867 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
868 sizeof(ff_vp9_default_coef_probs));
869 }
870
871 // next 16 bits is size of the rest of the header (arith-coded)
872 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
873 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
874
875 data2 = align_get_bits(&s->gb);
876 if (size2 > size - (data2 - data)) {
877 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
878 return AVERROR_INVALIDDATA;
879 }
880 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
881 if (ret < 0)
882 return ret;
883
884 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
885 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
886 return AVERROR_INVALIDDATA;
887 }
888
889 for (i = 0; i < s->active_tile_cols; i++) {
890 if (s->s.h.keyframe || s->s.h.intraonly) {
891 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
892 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
893 } else {
894 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
895 }
896 s->td[i].nb_block_structure = 0;
897 }
898
899 /* FIXME is it faster to not copy here, but do it down in the fw updates
900 * as explicit copies if the fw update is missing (and skip the copy upon
901 * fw update)? */
902 s->prob.p = s->prob_ctx[c].p;
903
904 // txfm updates
905 if (s->s.h.lossless) {
906 s->s.h.txfmmode = TX_4X4;
907 } else {
908 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
909 if (s->s.h.txfmmode == 3)
910 s->s.h.txfmmode += vp8_rac_get(&s->c);
911
912 if (s->s.h.txfmmode == TX_SWITCHABLE) {
913 for (i = 0; i < 2; i++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
916 for (i = 0; i < 2; i++)
917 for (j = 0; j < 2; j++)
918 if (vp56_rac_get_prob_branchy(&s->c, 252))
919 s->prob.p.tx16p[i][j] =
920 update_prob(&s->c, s->prob.p.tx16p[i][j]);
921 for (i = 0; i < 2; i++)
922 for (j = 0; j < 3; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.tx32p[i][j] =
925 update_prob(&s->c, s->prob.p.tx32p[i][j]);
926 }
927 }
928
929 // coef updates
930 for (i = 0; i < 4; i++) {
931 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
932 if (vp8_rac_get(&s->c)) {
933 for (j = 0; j < 2; j++)
934 for (k = 0; k < 2; k++)
935 for (l = 0; l < 6; l++)
936 for (m = 0; m < 6; m++) {
937 uint8_t *p = s->prob.coef[i][j][k][l][m];
938 uint8_t *r = ref[j][k][l][m];
939 if (m >= 3 && l == 0) // dc only has 3 pt
940 break;
941 for (n = 0; n < 3; n++) {
942 if (vp56_rac_get_prob_branchy(&s->c, 252))
943 p[n] = update_prob(&s->c, r[n]);
944 else
945 p[n] = r[n];
946 }
947 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
948 }
949 } else {
950 for (j = 0; j < 2; j++)
951 for (k = 0; k < 2; k++)
952 for (l = 0; l < 6; l++)
953 for (m = 0; m < 6; m++) {
954 uint8_t *p = s->prob.coef[i][j][k][l][m];
955 uint8_t *r = ref[j][k][l][m];
956 if (m > 3 && l == 0) // dc only has 3 pt
957 break;
958 memcpy(p, r, 3);
959 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
960 }
961 }
962 if (s->s.h.txfmmode == i)
963 break;
964 }
965
966 // mode updates
967 for (i = 0; i < 3; i++)
968 if (vp56_rac_get_prob_branchy(&s->c, 252))
969 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
970 if (!s->s.h.keyframe && !s->s.h.intraonly) {
971 for (i = 0; i < 7; i++)
972 for (j = 0; j < 3; j++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
974 s->prob.p.mv_mode[i][j] =
975 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
976
977 if (s->s.h.filtermode == FILTER_SWITCHABLE)
978 for (i = 0; i < 4; i++)
979 for (j = 0; j < 2; j++)
980 if (vp56_rac_get_prob_branchy(&s->c, 252))
981 s->prob.p.filter[i][j] =
982 update_prob(&s->c, s->prob.p.filter[i][j]);
983
984 for (i = 0; i < 4; i++)
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
987
988 if (s->s.h.allowcompinter) {
989 s->s.h.comppredmode = vp8_rac_get(&s->c);
990 if (s->s.h.comppredmode)
991 s->s.h.comppredmode += vp8_rac_get(&s->c);
992 if (s->s.h.comppredmode == PRED_SWITCHABLE)
993 for (i = 0; i < 5; i++)
994 if (vp56_rac_get_prob_branchy(&s->c, 252))
995 s->prob.p.comp[i] =
996 update_prob(&s->c, s->prob.p.comp[i]);
997 } else {
998 s->s.h.comppredmode = PRED_SINGLEREF;
999 }
1000
1001 if (s->s.h.comppredmode != PRED_COMPREF) {
1002 for (i = 0; i < 5; i++) {
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.single_ref[i][0] =
1005 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1006 if (vp56_rac_get_prob_branchy(&s->c, 252))
1007 s->prob.p.single_ref[i][1] =
1008 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1009 }
1010 }
1011
1012 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1013 for (i = 0; i < 5; i++)
1014 if (vp56_rac_get_prob_branchy(&s->c, 252))
1015 s->prob.p.comp_ref[i] =
1016 update_prob(&s->c, s->prob.p.comp_ref[i]);
1017 }
1018
1019 for (i = 0; i < 4; i++)
1020 for (j = 0; j < 9; j++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.y_mode[i][j] =
1023 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1024
1025 for (i = 0; i < 4; i++)
1026 for (j = 0; j < 4; j++)
1027 for (k = 0; k < 3; k++)
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.partition[3 - i][j][k] =
1030 update_prob(&s->c,
1031 s->prob.p.partition[3 - i][j][k]);
1032
1033 // mv fields don't use the update_prob subexp model for some reason
1034 for (i = 0; i < 3; i++)
1035 if (vp56_rac_get_prob_branchy(&s->c, 252))
1036 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037
1038 for (i = 0; i < 2; i++) {
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].sign =
1041 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042
1043 for (j = 0; j < 10; j++)
1044 if (vp56_rac_get_prob_branchy(&s->c, 252))
1045 s->prob.p.mv_comp[i].classes[j] =
1046 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047
1048 if (vp56_rac_get_prob_branchy(&s->c, 252))
1049 s->prob.p.mv_comp[i].class0 =
1050 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1051
1052 for (j = 0; j < 10; j++)
1053 if (vp56_rac_get_prob_branchy(&s->c, 252))
1054 s->prob.p.mv_comp[i].bits[j] =
1055 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056 }
1057
1058 for (i = 0; i < 2; i++) {
1059 for (j = 0; j < 2; j++)
1060 for (k = 0; k < 3; k++)
1061 if (vp56_rac_get_prob_branchy(&s->c, 252))
1062 s->prob.p.mv_comp[i].class0_fp[j][k] =
1063 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1064
1065 for (j = 0; j < 3; j++)
1066 if (vp56_rac_get_prob_branchy(&s->c, 252))
1067 s->prob.p.mv_comp[i].fp[j] =
1068 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1069 }
1070
1071 if (s->s.h.highprecisionmvs) {
1072 for (i = 0; i < 2; i++) {
1073 if (vp56_rac_get_prob_branchy(&s->c, 252))
1074 s->prob.p.mv_comp[i].class0_hp =
1075 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1076
1077 if (vp56_rac_get_prob_branchy(&s->c, 252))
1078 s->prob.p.mv_comp[i].hp =
1079 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1080 }
1081 }
1082 }
1083
1084 return (data2 - data) + size2;
1085 }
1086
decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)1087 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1088 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1089 {
1090 const VP9Context *s = td->s;
1091 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1092 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1093 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1094 s->prob.p.partition[bl][c];
1095 enum BlockPartition bp;
1096 ptrdiff_t hbs = 4 >> bl;
1097 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1098 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1099 int bytesperpixel = s->bytesperpixel;
1100
1101 if (bl == BL_8X8) {
1102 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1103 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1104 } else if (col + hbs < s->cols) { // FIXME why not <=?
1105 if (row + hbs < s->rows) { // FIXME why not <=?
1106 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1107 switch (bp) {
1108 case PARTITION_NONE:
1109 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1110 break;
1111 case PARTITION_H:
1112 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1113 yoff += hbs * 8 * y_stride;
1114 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1115 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1116 break;
1117 case PARTITION_V:
1118 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1119 yoff += hbs * 8 * bytesperpixel;
1120 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1121 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1122 break;
1123 case PARTITION_SPLIT:
1124 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1125 decode_sb(td, row, col + hbs, lflvl,
1126 yoff + 8 * hbs * bytesperpixel,
1127 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1128 yoff += hbs * 8 * y_stride;
1129 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1130 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1131 decode_sb(td, row + hbs, col + hbs, lflvl,
1132 yoff + 8 * hbs * bytesperpixel,
1133 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1134 break;
1135 default:
1136 av_assert0(0);
1137 }
1138 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1139 bp = PARTITION_SPLIT;
1140 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1141 decode_sb(td, row, col + hbs, lflvl,
1142 yoff + 8 * hbs * bytesperpixel,
1143 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1144 } else {
1145 bp = PARTITION_H;
1146 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1147 }
1148 } else if (row + hbs < s->rows) { // FIXME why not <=?
1149 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1150 bp = PARTITION_SPLIT;
1151 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1152 yoff += hbs * 8 * y_stride;
1153 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1154 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1155 } else {
1156 bp = PARTITION_V;
1157 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1158 }
1159 } else {
1160 bp = PARTITION_SPLIT;
1161 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1162 }
1163 td->counts.partition[bl][c][bp]++;
1164 }
1165
decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)1166 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1167 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1168 {
1169 const VP9Context *s = td->s;
1170 VP9Block *b = td->b;
1171 ptrdiff_t hbs = 4 >> bl;
1172 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1173 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1174 int bytesperpixel = s->bytesperpixel;
1175
1176 if (bl == BL_8X8) {
1177 av_assert2(b->bl == BL_8X8);
1178 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1179 } else if (td->b->bl == bl) {
1180 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1181 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1182 yoff += hbs * 8 * y_stride;
1183 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1184 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1185 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1186 yoff += hbs * 8 * bytesperpixel;
1187 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1188 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1189 }
1190 } else {
1191 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1192 if (col + hbs < s->cols) { // FIXME why not <=?
1193 if (row + hbs < s->rows) {
1194 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1195 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1196 yoff += hbs * 8 * y_stride;
1197 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1198 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1199 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1200 yoff + 8 * hbs * bytesperpixel,
1201 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1202 } else {
1203 yoff += hbs * 8 * bytesperpixel;
1204 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1205 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1206 }
1207 } else if (row + hbs < s->rows) {
1208 yoff += hbs * 8 * y_stride;
1209 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1210 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1211 }
1212 }
1213 }
1214
set_tile_offset(int *start, int *end, int idx, int log2_n, int n)1215 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1216 {
1217 int sb_start = ( idx * n) >> log2_n;
1218 int sb_end = ((idx + 1) * n) >> log2_n;
1219 *start = FFMIN(sb_start, n) << 3;
1220 *end = FFMIN(sb_end, n) << 3;
1221 }
1222
free_buffers(VP9Context *s)1223 static void free_buffers(VP9Context *s)
1224 {
1225 int i;
1226
1227 av_freep(&s->intra_pred_data[0]);
1228 for (i = 0; i < s->active_tile_cols; i++)
1229 vp9_tile_data_free(&s->td[i]);
1230 }
1231
vp9_decode_free(AVCodecContext *avctx)1232 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1233 {
1234 VP9Context *s = avctx->priv_data;
1235 int i;
1236
1237 for (i = 0; i < 3; i++) {
1238 vp9_frame_unref(avctx, &s->s.frames[i]);
1239 av_frame_free(&s->s.frames[i].tf.f);
1240 }
1241 av_buffer_pool_uninit(&s->frame_extradata_pool);
1242 for (i = 0; i < 8; i++) {
1243 ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
1244 av_frame_free(&s->s.refs[i].f);
1245 ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
1246 av_frame_free(&s->next_refs[i].f);
1247 }
1248
1249 free_buffers(s);
1250 #if HAVE_THREADS
1251 av_freep(&s->entries);
1252 ff_pthread_free(s, vp9_context_offsets);
1253 #endif
1254 av_freep(&s->td);
1255 return 0;
1256 }
1257
decode_tiles(AVCodecContext *avctx, const uint8_t *data, int size)1258 static int decode_tiles(AVCodecContext *avctx,
1259 const uint8_t *data, int size)
1260 {
1261 VP9Context *s = avctx->priv_data;
1262 VP9TileData *td = &s->td[0];
1263 int row, col, tile_row, tile_col, ret;
1264 int bytesperpixel;
1265 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1266 AVFrame *f;
1267 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1268
1269 f = s->s.frames[CUR_FRAME].tf.f;
1270 ls_y = f->linesize[0];
1271 ls_uv =f->linesize[1];
1272 bytesperpixel = s->bytesperpixel;
1273
1274 yoff = uvoff = 0;
1275 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1276 set_tile_offset(&tile_row_start, &tile_row_end,
1277 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1278
1279 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1280 int64_t tile_size;
1281
1282 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1283 tile_row == s->s.h.tiling.tile_rows - 1) {
1284 tile_size = size;
1285 } else {
1286 tile_size = AV_RB32(data);
1287 data += 4;
1288 size -= 4;
1289 }
1290 if (tile_size > size) {
1291 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1292 return AVERROR_INVALIDDATA;
1293 }
1294 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1295 if (ret < 0)
1296 return ret;
1297 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1298 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1299 return AVERROR_INVALIDDATA;
1300 }
1301 data += tile_size;
1302 size -= tile_size;
1303 }
1304
1305 for (row = tile_row_start; row < tile_row_end;
1306 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1307 VP9Filter *lflvl_ptr = s->lflvl;
1308 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1309
1310 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1311 set_tile_offset(&tile_col_start, &tile_col_end,
1312 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1313 td->tile_col_start = tile_col_start;
1314 if (s->pass != 2) {
1315 memset(td->left_partition_ctx, 0, 8);
1316 memset(td->left_skip_ctx, 0, 8);
1317 if (s->s.h.keyframe || s->s.h.intraonly) {
1318 memset(td->left_mode_ctx, DC_PRED, 16);
1319 } else {
1320 memset(td->left_mode_ctx, NEARESTMV, 8);
1321 }
1322 memset(td->left_y_nnz_ctx, 0, 16);
1323 memset(td->left_uv_nnz_ctx, 0, 32);
1324 memset(td->left_segpred_ctx, 0, 8);
1325
1326 td->c = &td->c_b[tile_col];
1327 }
1328
1329 for (col = tile_col_start;
1330 col < tile_col_end;
1331 col += 8, yoff2 += 64 * bytesperpixel,
1332 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1333 // FIXME integrate with lf code (i.e. zero after each
1334 // use, similar to invtxfm coefficients, or similar)
1335 if (s->pass != 1) {
1336 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1337 }
1338
1339 if (s->pass == 2) {
1340 decode_sb_mem(td, row, col, lflvl_ptr,
1341 yoff2, uvoff2, BL_64X64);
1342 } else {
1343 if (vpX_rac_is_end(td->c)) {
1344 return AVERROR_INVALIDDATA;
1345 }
1346 decode_sb(td, row, col, lflvl_ptr,
1347 yoff2, uvoff2, BL_64X64);
1348 }
1349 }
1350 }
1351
1352 if (s->pass == 1)
1353 continue;
1354
1355 // backup pre-loopfilter reconstruction data for intra
1356 // prediction of next row of sb64s
1357 if (row + 8 < s->rows) {
1358 memcpy(s->intra_pred_data[0],
1359 f->data[0] + yoff + 63 * ls_y,
1360 8 * s->cols * bytesperpixel);
1361 memcpy(s->intra_pred_data[1],
1362 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1363 8 * s->cols * bytesperpixel >> s->ss_h);
1364 memcpy(s->intra_pred_data[2],
1365 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1366 8 * s->cols * bytesperpixel >> s->ss_h);
1367 }
1368
1369 // loopfilter one row
1370 if (s->s.h.filter.level) {
1371 yoff2 = yoff;
1372 uvoff2 = uvoff;
1373 lflvl_ptr = s->lflvl;
1374 for (col = 0; col < s->cols;
1375 col += 8, yoff2 += 64 * bytesperpixel,
1376 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1377 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1378 yoff2, uvoff2);
1379 }
1380 }
1381
1382 // FIXME maybe we can make this more finegrained by running the
1383 // loopfilter per-block instead of after each sbrow
1384 // In fact that would also make intra pred left preparation easier?
1385 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1386 }
1387 }
1388 return 0;
1389 }
1390
1391 #if HAVE_THREADS
1392 static av_always_inline
decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr)1393 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1394 int threadnr)
1395 {
1396 VP9Context *s = avctx->priv_data;
1397 VP9TileData *td = &s->td[jobnr];
1398 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1399 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1400 unsigned tile_cols_len;
1401 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1402 VP9Filter *lflvl_ptr_base;
1403 AVFrame *f;
1404
1405 f = s->s.frames[CUR_FRAME].tf.f;
1406 ls_y = f->linesize[0];
1407 ls_uv =f->linesize[1];
1408
1409 set_tile_offset(&tile_col_start, &tile_col_end,
1410 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1411 td->tile_col_start = tile_col_start;
1412 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1413 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1414 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1415
1416 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1417 set_tile_offset(&tile_row_start, &tile_row_end,
1418 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1419
1420 td->c = &td->c_b[tile_row];
1421 for (row = tile_row_start; row < tile_row_end;
1422 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1423 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1424 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1425
1426 memset(td->left_partition_ctx, 0, 8);
1427 memset(td->left_skip_ctx, 0, 8);
1428 if (s->s.h.keyframe || s->s.h.intraonly) {
1429 memset(td->left_mode_ctx, DC_PRED, 16);
1430 } else {
1431 memset(td->left_mode_ctx, NEARESTMV, 8);
1432 }
1433 memset(td->left_y_nnz_ctx, 0, 16);
1434 memset(td->left_uv_nnz_ctx, 0, 32);
1435 memset(td->left_segpred_ctx, 0, 8);
1436
1437 for (col = tile_col_start;
1438 col < tile_col_end;
1439 col += 8, yoff2 += 64 * bytesperpixel,
1440 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1441 // FIXME integrate with lf code (i.e. zero after each
1442 // use, similar to invtxfm coefficients, or similar)
1443 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1444 decode_sb(td, row, col, lflvl_ptr,
1445 yoff2, uvoff2, BL_64X64);
1446 }
1447
1448 // backup pre-loopfilter reconstruction data for intra
1449 // prediction of next row of sb64s
1450 tile_cols_len = tile_col_end - tile_col_start;
1451 if (row + 8 < s->rows) {
1452 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1453 f->data[0] + yoff + 63 * ls_y,
1454 8 * tile_cols_len * bytesperpixel);
1455 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1456 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1457 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1458 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1459 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1460 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1461 }
1462
1463 vp9_report_tile_progress(s, row >> 3, 1);
1464 }
1465 }
1466 return 0;
1467 }
1468
1469 static av_always_inline
loopfilter_proc(AVCodecContext *avctx)1470 int loopfilter_proc(AVCodecContext *avctx)
1471 {
1472 VP9Context *s = avctx->priv_data;
1473 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1474 VP9Filter *lflvl_ptr;
1475 int bytesperpixel = s->bytesperpixel, col, i;
1476 AVFrame *f;
1477
1478 f = s->s.frames[CUR_FRAME].tf.f;
1479 ls_y = f->linesize[0];
1480 ls_uv =f->linesize[1];
1481
1482 for (i = 0; i < s->sb_rows; i++) {
1483 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1484
1485 if (s->s.h.filter.level) {
1486 yoff = (ls_y * 64)*i;
1487 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1488 lflvl_ptr = s->lflvl+s->sb_cols*i;
1489 for (col = 0; col < s->cols;
1490 col += 8, yoff += 64 * bytesperpixel,
1491 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1492 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1493 yoff, uvoff);
1494 }
1495 }
1496 }
1497 return 0;
1498 }
1499 #endif
1500
vp9_export_enc_params(VP9Context *s, VP9Frame *frame)1501 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1502 {
1503 AVVideoEncParams *par;
1504 unsigned int tile, nb_blocks = 0;
1505
1506 if (s->s.h.segmentation.enabled) {
1507 for (tile = 0; tile < s->active_tile_cols; tile++)
1508 nb_blocks += s->td[tile].nb_block_structure;
1509 }
1510
1511 par = av_video_enc_params_create_side_data(frame->tf.f,
1512 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1513 if (!par)
1514 return AVERROR(ENOMEM);
1515
1516 par->qp = s->s.h.yac_qi;
1517 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1518 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1519 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1520 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1521 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1522
1523 if (nb_blocks) {
1524 unsigned int block = 0;
1525 unsigned int tile, block_tile;
1526
1527 for (tile = 0; tile < s->active_tile_cols; tile++) {
1528 VP9TileData *td = &s->td[tile];
1529
1530 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1531 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1532 unsigned int row = td->block_structure[block_tile].row;
1533 unsigned int col = td->block_structure[block_tile].col;
1534 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1535
1536 b->src_x = col * 8;
1537 b->src_y = row * 8;
1538 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1539 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1540
1541 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1542 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1543 if (s->s.h.segmentation.absolute_vals)
1544 b->delta_qp -= par->qp;
1545 }
1546 }
1547 }
1548 }
1549
1550 return 0;
1551 }
1552
vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)1553 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1554 int *got_frame, AVPacket *pkt)
1555 {
1556 const uint8_t *data = pkt->data;
1557 int size = pkt->size;
1558 VP9Context *s = avctx->priv_data;
1559 int ret, i, j, ref;
1560 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1561 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1562 AVFrame *f;
1563
1564 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1565 return ret;
1566 } else if (ret == 0) {
1567 if (!s->s.refs[ref].f->buf[0]) {
1568 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1569 return AVERROR_INVALIDDATA;
1570 }
1571 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1572 return ret;
1573 frame->pts = pkt->pts;
1574 frame->pkt_dts = pkt->dts;
1575 for (i = 0; i < 8; i++) {
1576 if (s->next_refs[i].f->buf[0])
1577 ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
1578 if (s->s.refs[i].f->buf[0] &&
1579 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1580 return ret;
1581 }
1582 *got_frame = 1;
1583 return pkt->size;
1584 }
1585 data += ret;
1586 size -= ret;
1587
1588 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1589 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1590 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1591 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1592 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1593 return ret;
1594 }
1595 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1596 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1597 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1598 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1599 return ret;
1600 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1601 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1602 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1603 return ret;
1604 f = s->s.frames[CUR_FRAME].tf.f;
1605 f->key_frame = s->s.h.keyframe;
1606 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1607
1608 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1609 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1610 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1611 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1612 }
1613
1614 // ref frame setup
1615 for (i = 0; i < 8; i++) {
1616 if (s->next_refs[i].f->buf[0])
1617 ff_thread_release_ext_buffer(avctx, &s->next_refs[i]);
1618 if (s->s.h.refreshrefmask & (1 << i)) {
1619 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1620 } else if (s->s.refs[i].f->buf[0]) {
1621 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1622 }
1623 if (ret < 0)
1624 return ret;
1625 }
1626
1627 if (avctx->hwaccel) {
1628 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1629 if (ret < 0)
1630 return ret;
1631 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1632 if (ret < 0)
1633 return ret;
1634 ret = avctx->hwaccel->end_frame(avctx);
1635 if (ret < 0)
1636 return ret;
1637 goto finish;
1638 }
1639
1640 // main tile decode loop
1641 memset(s->above_partition_ctx, 0, s->cols);
1642 memset(s->above_skip_ctx, 0, s->cols);
1643 if (s->s.h.keyframe || s->s.h.intraonly) {
1644 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1645 } else {
1646 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1647 }
1648 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1649 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1650 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1651 memset(s->above_segpred_ctx, 0, s->cols);
1652 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1653 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1654 if ((ret = update_block_buffers(avctx)) < 0) {
1655 av_log(avctx, AV_LOG_ERROR,
1656 "Failed to allocate block buffers\n");
1657 return ret;
1658 }
1659 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1660 int j, k, l, m;
1661
1662 for (i = 0; i < 4; i++) {
1663 for (j = 0; j < 2; j++)
1664 for (k = 0; k < 2; k++)
1665 for (l = 0; l < 6; l++)
1666 for (m = 0; m < 6; m++)
1667 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1668 s->prob.coef[i][j][k][l][m], 3);
1669 if (s->s.h.txfmmode == i)
1670 break;
1671 }
1672 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1673 ff_thread_finish_setup(avctx);
1674 } else if (!s->s.h.refreshctx) {
1675 ff_thread_finish_setup(avctx);
1676 }
1677
1678 #if HAVE_THREADS
1679 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1680 for (i = 0; i < s->sb_rows; i++)
1681 atomic_store(&s->entries[i], 0);
1682 }
1683 #endif
1684
1685 do {
1686 for (i = 0; i < s->active_tile_cols; i++) {
1687 s->td[i].b = s->td[i].b_base;
1688 s->td[i].block = s->td[i].block_base;
1689 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1690 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1691 s->td[i].eob = s->td[i].eob_base;
1692 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1693 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1694 s->td[i].error_info = 0;
1695 }
1696
1697 #if HAVE_THREADS
1698 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1699 int tile_row, tile_col;
1700
1701 av_assert1(!s->pass);
1702
1703 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1704 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1705 int64_t tile_size;
1706
1707 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1708 tile_row == s->s.h.tiling.tile_rows - 1) {
1709 tile_size = size;
1710 } else {
1711 tile_size = AV_RB32(data);
1712 data += 4;
1713 size -= 4;
1714 }
1715 if (tile_size > size)
1716 return AVERROR_INVALIDDATA;
1717 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1718 if (ret < 0)
1719 return ret;
1720 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1721 return AVERROR_INVALIDDATA;
1722 data += tile_size;
1723 size -= tile_size;
1724 }
1725 }
1726
1727 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1728 } else
1729 #endif
1730 {
1731 ret = decode_tiles(avctx, data, size);
1732 if (ret < 0) {
1733 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1734 return ret;
1735 }
1736 }
1737
1738 // Sum all counts fields into td[0].counts for tile threading
1739 if (avctx->active_thread_type == FF_THREAD_SLICE)
1740 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1741 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1742 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1743
1744 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1745 ff_vp9_adapt_probs(s);
1746 ff_thread_finish_setup(avctx);
1747 }
1748 } while (s->pass++ == 1);
1749 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1750
1751 if (s->td->error_info < 0) {
1752 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1753 s->td->error_info = 0;
1754 return AVERROR_INVALIDDATA;
1755 }
1756 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1757 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1758 if (ret < 0)
1759 return ret;
1760 }
1761
1762 finish:
1763 // ref frame setup
1764 for (i = 0; i < 8; i++) {
1765 if (s->s.refs[i].f->buf[0])
1766 ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
1767 if (s->next_refs[i].f->buf[0] &&
1768 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1769 return ret;
1770 }
1771
1772 if (!s->s.h.invisible) {
1773 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1774 return ret;
1775 *got_frame = 1;
1776 }
1777
1778 return pkt->size;
1779 }
1780
vp9_decode_flush(AVCodecContext *avctx)1781 static void vp9_decode_flush(AVCodecContext *avctx)
1782 {
1783 VP9Context *s = avctx->priv_data;
1784 int i;
1785
1786 for (i = 0; i < 3; i++)
1787 vp9_frame_unref(avctx, &s->s.frames[i]);
1788 for (i = 0; i < 8; i++)
1789 ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
1790 }
1791
vp9_decode_init(AVCodecContext *avctx)1792 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1793 {
1794 VP9Context *s = avctx->priv_data;
1795 int ret;
1796
1797 s->last_bpp = 0;
1798 s->s.h.filter.sharpness = -1;
1799
1800 #if HAVE_THREADS
1801 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1802 ret = ff_pthread_init(s, vp9_context_offsets);
1803 if (ret < 0)
1804 return ret;
1805 }
1806 #endif
1807
1808 for (int i = 0; i < 3; i++) {
1809 s->s.frames[i].tf.f = av_frame_alloc();
1810 if (!s->s.frames[i].tf.f)
1811 return AVERROR(ENOMEM);
1812 }
1813 for (int i = 0; i < 8; i++) {
1814 s->s.refs[i].f = av_frame_alloc();
1815 s->next_refs[i].f = av_frame_alloc();
1816 if (!s->s.refs[i].f || !s->next_refs[i].f)
1817 return AVERROR(ENOMEM);
1818 }
1819 return 0;
1820 }
1821
1822 #if HAVE_THREADS
vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)1823 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1824 {
1825 int i, ret;
1826 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1827
1828 for (i = 0; i < 3; i++) {
1829 if (s->s.frames[i].tf.f->buf[0])
1830 vp9_frame_unref(dst, &s->s.frames[i]);
1831 if (ssrc->s.frames[i].tf.f->buf[0]) {
1832 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1833 return ret;
1834 }
1835 }
1836 for (i = 0; i < 8; i++) {
1837 if (s->s.refs[i].f->buf[0])
1838 ff_thread_release_ext_buffer(dst, &s->s.refs[i]);
1839 if (ssrc->next_refs[i].f->buf[0]) {
1840 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1841 return ret;
1842 }
1843 }
1844
1845 s->s.h.invisible = ssrc->s.h.invisible;
1846 s->s.h.keyframe = ssrc->s.h.keyframe;
1847 s->s.h.intraonly = ssrc->s.h.intraonly;
1848 s->ss_v = ssrc->ss_v;
1849 s->ss_h = ssrc->ss_h;
1850 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1851 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1852 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1853 s->bytesperpixel = ssrc->bytesperpixel;
1854 s->gf_fmt = ssrc->gf_fmt;
1855 s->w = ssrc->w;
1856 s->h = ssrc->h;
1857 s->s.h.bpp = ssrc->s.h.bpp;
1858 s->bpp_index = ssrc->bpp_index;
1859 s->pix_fmt = ssrc->pix_fmt;
1860 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1861 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1862 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1863 sizeof(s->s.h.segmentation.feat));
1864
1865 return 0;
1866 }
1867 #endif
1868
1869 const FFCodec ff_vp9_decoder = {
1870 .p.name = "vp9",
1871 .p.long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1872 .p.type = AVMEDIA_TYPE_VIDEO,
1873 .p.id = AV_CODEC_ID_VP9,
1874 .priv_data_size = sizeof(VP9Context),
1875 .init = vp9_decode_init,
1876 .close = vp9_decode_free,
1877 FF_CODEC_DECODE_CB(vp9_decode_frame),
1878 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1879 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP |
1880 FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1881 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1882 .flush = vp9_decode_flush,
1883 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1884 .p.profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1885 .bsfs = "vp9_superframe_split",
1886 .hw_configs = (const AVCodecHWConfigInternal *const []) {
1887 #if CONFIG_VP9_DXVA2_HWACCEL
1888 HWACCEL_DXVA2(vp9),
1889 #endif
1890 #if CONFIG_VP9_D3D11VA_HWACCEL
1891 HWACCEL_D3D11VA(vp9),
1892 #endif
1893 #if CONFIG_VP9_D3D11VA2_HWACCEL
1894 HWACCEL_D3D11VA2(vp9),
1895 #endif
1896 #if CONFIG_VP9_NVDEC_HWACCEL
1897 HWACCEL_NVDEC(vp9),
1898 #endif
1899 #if CONFIG_VP9_VAAPI_HWACCEL
1900 HWACCEL_VAAPI(vp9),
1901 #endif
1902 #if CONFIG_VP9_VDPAU_HWACCEL
1903 HWACCEL_VDPAU(vp9),
1904 #endif
1905 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
1906 HWACCEL_VIDEOTOOLBOX(vp9),
1907 #endif
1908 NULL
1909 },
1910 };
1911