1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * RTP parser for VP9 payload format (draft version 02) - experimental 3cabdff1aSopenharmony_ci * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "avio_internal.h" 25cabdff1aSopenharmony_ci#include "rtpdec_formats.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#define RTP_VP9_DESC_REQUIRED_SIZE 1 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_cistruct PayloadContext { 30cabdff1aSopenharmony_ci AVIOContext *buf; 31cabdff1aSopenharmony_ci uint32_t timestamp; 32cabdff1aSopenharmony_ci}; 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_cistatic av_cold int vp9_init(AVFormatContext *ctx, int st_index, 35cabdff1aSopenharmony_ci PayloadContext *data) 36cabdff1aSopenharmony_ci{ 37cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_WARNING, 38cabdff1aSopenharmony_ci "RTP/VP9 support is still experimental\n"); 39cabdff1aSopenharmony_ci 40cabdff1aSopenharmony_ci return 0; 41cabdff1aSopenharmony_ci} 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_cistatic int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, 44cabdff1aSopenharmony_ci AVStream *st, AVPacket *pkt, uint32_t *timestamp, 45cabdff1aSopenharmony_ci const uint8_t *buf, int len, uint16_t seq, 46cabdff1aSopenharmony_ci int flags) 47cabdff1aSopenharmony_ci{ 48cabdff1aSopenharmony_ci int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data; 49cabdff1aSopenharmony_ci av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame; 50cabdff1aSopenharmony_ci av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; 51cabdff1aSopenharmony_ci int ref_fields = 0, has_ref_field_ext_pic_id = 0; 52cabdff1aSopenharmony_ci int first_fragment, last_fragment; 53cabdff1aSopenharmony_ci int rtp_m; 54cabdff1aSopenharmony_ci int res = 0; 55cabdff1aSopenharmony_ci 56cabdff1aSopenharmony_ci /* drop data of previous packets in case of non-continuous (lossy) packet stream */ 57cabdff1aSopenharmony_ci if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) 58cabdff1aSopenharmony_ci ffio_free_dyn_buf(&rtp_vp9_ctx->buf); 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci /* sanity check for size of input packet: 1 byte payload at least */ 61cabdff1aSopenharmony_ci if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { 62cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); 63cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 64cabdff1aSopenharmony_ci } 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ci /* 67cabdff1aSopenharmony_ci * decode the required VP9 payload descriptor according to section 4.2 of the spec.: 68cabdff1aSopenharmony_ci * 69cabdff1aSopenharmony_ci * 0 1 2 3 4 5 6 7 70cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 71cabdff1aSopenharmony_ci * |I|P|L|F|B|E|V|-| (REQUIRED) 72cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 73cabdff1aSopenharmony_ci * 74cabdff1aSopenharmony_ci * I: PictureID present 75cabdff1aSopenharmony_ci * P: Inter-picture predicted layer frame 76cabdff1aSopenharmony_ci * L: Layer indices present 77cabdff1aSopenharmony_ci * F: Flexible mode 78cabdff1aSopenharmony_ci * B: Start of VP9 frame 79cabdff1aSopenharmony_ci * E: End of picture 80cabdff1aSopenharmony_ci * V: Scalability Structure (SS) present 81cabdff1aSopenharmony_ci */ 82cabdff1aSopenharmony_ci has_pic_id = !!(buf[0] & 0x80); 83cabdff1aSopenharmony_ci inter_picture_layer_frame = !!(buf[0] & 0x40); 84cabdff1aSopenharmony_ci has_layer_idc = !!(buf[0] & 0x20); 85cabdff1aSopenharmony_ci has_ref_idc = !!(buf[0] & 0x10); 86cabdff1aSopenharmony_ci first_fragment = !!(buf[0] & 0x08); 87cabdff1aSopenharmony_ci last_fragment = !!(buf[0] & 0x04); 88cabdff1aSopenharmony_ci has_ss_data = !!(buf[0] & 0x02); 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci rtp_m = !!(flags & RTP_FLAG_MARKER); 91cabdff1aSopenharmony_ci 92cabdff1aSopenharmony_ci /* sanity check for markers: B should always be equal to the RTP M marker */ 93cabdff1aSopenharmony_ci if (last_fragment != rtp_m) { 94cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m); 95cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 96cabdff1aSopenharmony_ci } 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci /* pass the extensions field */ 99cabdff1aSopenharmony_ci buf += RTP_VP9_DESC_REQUIRED_SIZE; 100cabdff1aSopenharmony_ci len -= RTP_VP9_DESC_REQUIRED_SIZE; 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci /* 103cabdff1aSopenharmony_ci * decode the 1-byte/2-byte picture ID: 104cabdff1aSopenharmony_ci * 105cabdff1aSopenharmony_ci * 0 1 2 3 4 5 6 7 106cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 107cabdff1aSopenharmony_ci * I: |M|PICTURE ID | (RECOMMENDED) 108cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 109cabdff1aSopenharmony_ci * M: | EXTENDED PID | (RECOMMENDED) 110cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 111cabdff1aSopenharmony_ci * 112cabdff1aSopenharmony_ci * M: The most significant bit of the first octet is an extension flag. 113cabdff1aSopenharmony_ci * PictureID: 8 or 16 bits including the M bit. 114cabdff1aSopenharmony_ci */ 115cabdff1aSopenharmony_ci if (has_pic_id) { 116cabdff1aSopenharmony_ci /* check for 1-byte or 2-byte picture index */ 117cabdff1aSopenharmony_ci if (buf[0] & 0x80) { 118cabdff1aSopenharmony_ci if (len < 2) { 119cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 120cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 121cabdff1aSopenharmony_ci } 122cabdff1aSopenharmony_ci pic_id = AV_RB16(buf) & 0x7fff; 123cabdff1aSopenharmony_ci buf += 2; 124cabdff1aSopenharmony_ci len -= 2; 125cabdff1aSopenharmony_ci } else { 126cabdff1aSopenharmony_ci pic_id = buf[0] & 0x7f; 127cabdff1aSopenharmony_ci buf++; 128cabdff1aSopenharmony_ci len--; 129cabdff1aSopenharmony_ci } 130cabdff1aSopenharmony_ci } 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci /* 133cabdff1aSopenharmony_ci * decode layer indices 134cabdff1aSopenharmony_ci * 135cabdff1aSopenharmony_ci * 0 1 2 3 4 5 6 7 136cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 137cabdff1aSopenharmony_ci * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) 138cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 139cabdff1aSopenharmony_ci * 140cabdff1aSopenharmony_ci * T, S and Q are 2-bit indices for temporal, spatial, and quality layers. 141cabdff1aSopenharmony_ci * If "F" is set in the initial octet, R is 2 bits representing the number 142cabdff1aSopenharmony_ci * of reference fields this frame refers to. 143cabdff1aSopenharmony_ci */ 144cabdff1aSopenharmony_ci if (has_layer_idc) { 145cabdff1aSopenharmony_ci if (len < 1) { 146cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 147cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 148cabdff1aSopenharmony_ci } 149cabdff1aSopenharmony_ci layer_temporal = buf[0] & 0xC0; 150cabdff1aSopenharmony_ci layer_spatial = buf[0] & 0x30; 151cabdff1aSopenharmony_ci layer_quality = buf[0] & 0x0C; 152cabdff1aSopenharmony_ci if (has_ref_idc) { 153cabdff1aSopenharmony_ci ref_fields = buf[0] & 0x03; 154cabdff1aSopenharmony_ci if (ref_fields) 155cabdff1aSopenharmony_ci non_key_frame = 1; 156cabdff1aSopenharmony_ci } 157cabdff1aSopenharmony_ci buf++; 158cabdff1aSopenharmony_ci len--; 159cabdff1aSopenharmony_ci } 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci /* 162cabdff1aSopenharmony_ci * decode the reference fields 163cabdff1aSopenharmony_ci * 164cabdff1aSopenharmony_ci * 0 1 2 3 4 5 6 7 165cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ -\ 166cabdff1aSopenharmony_ci * F: | PID |X| RS| RQ| (OPTIONAL) . 167cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ . - R times 168cabdff1aSopenharmony_ci * X: | EXTENDED PID | (OPTIONAL) . 169cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ -/ 170cabdff1aSopenharmony_ci * 171cabdff1aSopenharmony_ci * PID: The relative Picture ID referred to by this frame. 172cabdff1aSopenharmony_ci * RS and RQ: The spatial and quality layer IDs. 173cabdff1aSopenharmony_ci * X: 1 if this layer index has an extended relative Picture ID. 174cabdff1aSopenharmony_ci */ 175cabdff1aSopenharmony_ci if (has_ref_idc) { 176cabdff1aSopenharmony_ci while (ref_fields) { 177cabdff1aSopenharmony_ci if (len < 1) { 178cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 179cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 180cabdff1aSopenharmony_ci } 181cabdff1aSopenharmony_ci 182cabdff1aSopenharmony_ci has_ref_field_ext_pic_id = buf[0] & 0x10; 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci /* pass ref. field */ 185cabdff1aSopenharmony_ci if (has_ref_field_ext_pic_id) { 186cabdff1aSopenharmony_ci if (len < 2) { 187cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 188cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 189cabdff1aSopenharmony_ci } 190cabdff1aSopenharmony_ci 191cabdff1aSopenharmony_ci /* ignore ref. data */ 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci buf += 2; 194cabdff1aSopenharmony_ci len -= 2; 195cabdff1aSopenharmony_ci } else { 196cabdff1aSopenharmony_ci 197cabdff1aSopenharmony_ci /* ignore ref. data */ 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci buf++; 200cabdff1aSopenharmony_ci len--; 201cabdff1aSopenharmony_ci } 202cabdff1aSopenharmony_ci ref_fields--; 203cabdff1aSopenharmony_ci } 204cabdff1aSopenharmony_ci } 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci /* 207cabdff1aSopenharmony_ci * decode the scalability structure (SS) 208cabdff1aSopenharmony_ci * 209cabdff1aSopenharmony_ci * 0 1 2 3 4 5 6 7 210cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ 211cabdff1aSopenharmony_ci * V: | PATTERN LENGTH| 212cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ -\ 213cabdff1aSopenharmony_ci * | T | S | Q | R | (OPTIONAL) . 214cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ -\ . 215cabdff1aSopenharmony_ci * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times 216cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ . - R times . 217cabdff1aSopenharmony_ci * X: | EXTENDED PID | (OPTIONAL) . . 218cabdff1aSopenharmony_ci * +-+-+-+-+-+-+-+-+ -/ -/ 219cabdff1aSopenharmony_ci * 220cabdff1aSopenharmony_ci * PID: The relative Picture ID referred to by this frame. 221cabdff1aSopenharmony_ci * RS and RQ: The spatial and quality layer IDs. 222cabdff1aSopenharmony_ci * X: 1 if this layer index has an extended relative Picture ID. 223cabdff1aSopenharmony_ci */ 224cabdff1aSopenharmony_ci if (has_ss_data) { 225cabdff1aSopenharmony_ci int n_s, y, g, i; 226cabdff1aSopenharmony_ci if (len < 1) { 227cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 228cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 229cabdff1aSopenharmony_ci } 230cabdff1aSopenharmony_ci n_s = buf[0] >> 5; 231cabdff1aSopenharmony_ci y = !!(buf[0] & 0x10); 232cabdff1aSopenharmony_ci g = !!(buf[0] & 0x08); 233cabdff1aSopenharmony_ci buf++; 234cabdff1aSopenharmony_ci len--; 235cabdff1aSopenharmony_ci if (n_s > 0) { 236cabdff1aSopenharmony_ci avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers"); 237cabdff1aSopenharmony_ci return AVERROR_PATCHWELCOME; 238cabdff1aSopenharmony_ci } 239cabdff1aSopenharmony_ci if (y) { 240cabdff1aSopenharmony_ci if (len < 4 * (n_s + 1)) { 241cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 242cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 243cabdff1aSopenharmony_ci } 244cabdff1aSopenharmony_ci for (i = 0; i < n_s + 1; i++) { 245cabdff1aSopenharmony_ci av_unused int w, h; 246cabdff1aSopenharmony_ci w = AV_RB16(buf); 247cabdff1aSopenharmony_ci h = AV_RB16(buf + 2); 248cabdff1aSopenharmony_ci buf += 4; 249cabdff1aSopenharmony_ci len -= 4; 250cabdff1aSopenharmony_ci } 251cabdff1aSopenharmony_ci } 252cabdff1aSopenharmony_ci if (g) { 253cabdff1aSopenharmony_ci int n_g; 254cabdff1aSopenharmony_ci if (len < 1) { 255cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 256cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 257cabdff1aSopenharmony_ci } 258cabdff1aSopenharmony_ci n_g = buf[0]; 259cabdff1aSopenharmony_ci buf++; 260cabdff1aSopenharmony_ci len--; 261cabdff1aSopenharmony_ci for (i = 0; i < n_g; i++) { 262cabdff1aSopenharmony_ci av_unused int t, u, r, j; 263cabdff1aSopenharmony_ci if (len < 1) { 264cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 265cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 266cabdff1aSopenharmony_ci } 267cabdff1aSopenharmony_ci t = buf[0] >> 5; 268cabdff1aSopenharmony_ci u = !!(buf[0] & 0x10); 269cabdff1aSopenharmony_ci r = (buf[0] >> 2) & 0x03; 270cabdff1aSopenharmony_ci buf++; 271cabdff1aSopenharmony_ci len--; 272cabdff1aSopenharmony_ci if (len < r) { 273cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 274cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 275cabdff1aSopenharmony_ci } 276cabdff1aSopenharmony_ci for (j = 0; j < r; j++) { 277cabdff1aSopenharmony_ci av_unused int p_diff = buf[0]; 278cabdff1aSopenharmony_ci buf++; 279cabdff1aSopenharmony_ci len--; 280cabdff1aSopenharmony_ci } 281cabdff1aSopenharmony_ci } 282cabdff1aSopenharmony_ci } 283cabdff1aSopenharmony_ci } 284cabdff1aSopenharmony_ci 285cabdff1aSopenharmony_ci /* 286cabdff1aSopenharmony_ci * decode the VP9 payload header 287cabdff1aSopenharmony_ci * 288cabdff1aSopenharmony_ci * spec. is tbd 289cabdff1aSopenharmony_ci */ 290cabdff1aSopenharmony_ci //XXX: implement when specified 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_ci /* sanity check: 1 byte payload as minimum */ 293cabdff1aSopenharmony_ci if (len < 1) { 294cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 295cabdff1aSopenharmony_ci return AVERROR_INVALIDDATA; 296cabdff1aSopenharmony_ci } 297cabdff1aSopenharmony_ci 298cabdff1aSopenharmony_ci /* start frame buffering with new dynamic buffer */ 299cabdff1aSopenharmony_ci if (!rtp_vp9_ctx->buf) { 300cabdff1aSopenharmony_ci /* sanity check: a new frame should have started */ 301cabdff1aSopenharmony_ci if (first_fragment) { 302cabdff1aSopenharmony_ci res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); 303cabdff1aSopenharmony_ci if (res < 0) 304cabdff1aSopenharmony_ci return res; 305cabdff1aSopenharmony_ci /* update the timestamp in the frame packet with the one from the RTP packet */ 306cabdff1aSopenharmony_ci rtp_vp9_ctx->timestamp = *timestamp; 307cabdff1aSopenharmony_ci } else { 308cabdff1aSopenharmony_ci /* frame not started yet, need more packets */ 309cabdff1aSopenharmony_ci return AVERROR(EAGAIN); 310cabdff1aSopenharmony_ci } 311cabdff1aSopenharmony_ci } 312cabdff1aSopenharmony_ci 313cabdff1aSopenharmony_ci /* write the fragment to the dyn. buffer */ 314cabdff1aSopenharmony_ci avio_write(rtp_vp9_ctx->buf, buf, len); 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci /* do we need more fragments? */ 317cabdff1aSopenharmony_ci if (!last_fragment) 318cabdff1aSopenharmony_ci return AVERROR(EAGAIN); 319cabdff1aSopenharmony_ci 320cabdff1aSopenharmony_ci /* close frame buffering and create resulting A/V packet */ 321cabdff1aSopenharmony_ci res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); 322cabdff1aSopenharmony_ci if (res < 0) 323cabdff1aSopenharmony_ci return res; 324cabdff1aSopenharmony_ci 325cabdff1aSopenharmony_ci return 0; 326cabdff1aSopenharmony_ci} 327cabdff1aSopenharmony_ci 328cabdff1aSopenharmony_cistatic void vp9_close_context(PayloadContext *vp9) 329cabdff1aSopenharmony_ci{ 330cabdff1aSopenharmony_ci ffio_free_dyn_buf(&vp9->buf); 331cabdff1aSopenharmony_ci} 332cabdff1aSopenharmony_ci 333cabdff1aSopenharmony_ciconst RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { 334cabdff1aSopenharmony_ci .enc_name = "VP9", 335cabdff1aSopenharmony_ci .codec_type = AVMEDIA_TYPE_VIDEO, 336cabdff1aSopenharmony_ci .codec_id = AV_CODEC_ID_VP9, 337cabdff1aSopenharmony_ci .priv_data_size = sizeof(PayloadContext), 338cabdff1aSopenharmony_ci .init = vp9_init, 339cabdff1aSopenharmony_ci .close = vp9_close_context, 340cabdff1aSopenharmony_ci .parse_packet = vp9_handle_packet 341cabdff1aSopenharmony_ci}; 342