1/* 2 * RTP parser for VP9 payload format (draft version 02) - experimental 3 * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavutil/intreadwrite.h" 23 24#include "avio_internal.h" 25#include "rtpdec_formats.h" 26 27#define RTP_VP9_DESC_REQUIRED_SIZE 1 28 29struct PayloadContext { 30 AVIOContext *buf; 31 uint32_t timestamp; 32}; 33 34static av_cold int vp9_init(AVFormatContext *ctx, int st_index, 35 PayloadContext *data) 36{ 37 av_log(ctx, AV_LOG_WARNING, 38 "RTP/VP9 support is still experimental\n"); 39 40 return 0; 41} 42 43static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, 44 AVStream *st, AVPacket *pkt, uint32_t *timestamp, 45 const uint8_t *buf, int len, uint16_t seq, 46 int flags) 47{ 48 int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data; 49 av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame; 50 av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; 51 int ref_fields = 0, has_ref_field_ext_pic_id = 0; 52 int first_fragment, last_fragment; 53 int rtp_m; 54 int res = 0; 55 56 /* drop data of previous packets in case of non-continuous (lossy) packet stream */ 57 if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) 58 ffio_free_dyn_buf(&rtp_vp9_ctx->buf); 59 60 /* sanity check for size of input packet: 1 byte payload at least */ 61 if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { 62 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); 63 return AVERROR_INVALIDDATA; 64 } 65 66 /* 67 * decode the required VP9 payload descriptor according to section 4.2 of the spec.: 68 * 69 * 0 1 2 3 4 5 6 7 70 * +-+-+-+-+-+-+-+-+ 71 * |I|P|L|F|B|E|V|-| (REQUIRED) 72 * +-+-+-+-+-+-+-+-+ 73 * 74 * I: PictureID present 75 * P: Inter-picture predicted layer frame 76 * L: Layer indices present 77 * F: Flexible mode 78 * B: Start of VP9 frame 79 * E: End of picture 80 * V: Scalability Structure (SS) present 81 */ 82 has_pic_id = !!(buf[0] & 0x80); 83 inter_picture_layer_frame = !!(buf[0] & 0x40); 84 has_layer_idc = !!(buf[0] & 0x20); 85 has_ref_idc = !!(buf[0] & 0x10); 86 first_fragment = !!(buf[0] & 0x08); 87 last_fragment = !!(buf[0] & 0x04); 88 has_ss_data = !!(buf[0] & 0x02); 89 90 rtp_m = !!(flags & RTP_FLAG_MARKER); 91 92 /* sanity check for markers: B should always be equal to the RTP M marker */ 93 if (last_fragment != rtp_m) { 94 av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m); 95 return AVERROR_INVALIDDATA; 96 } 97 98 /* pass the extensions field */ 99 buf += RTP_VP9_DESC_REQUIRED_SIZE; 100 len -= RTP_VP9_DESC_REQUIRED_SIZE; 101 102 /* 103 * decode the 1-byte/2-byte picture ID: 104 * 105 * 0 1 2 3 4 5 6 7 106 * +-+-+-+-+-+-+-+-+ 107 * I: |M|PICTURE ID | (RECOMMENDED) 108 * +-+-+-+-+-+-+-+-+ 109 * M: | EXTENDED PID | (RECOMMENDED) 110 * +-+-+-+-+-+-+-+-+ 111 * 112 * M: The most significant bit of the first octet is an extension flag. 113 * PictureID: 8 or 16 bits including the M bit. 114 */ 115 if (has_pic_id) { 116 /* check for 1-byte or 2-byte picture index */ 117 if (buf[0] & 0x80) { 118 if (len < 2) { 119 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 120 return AVERROR_INVALIDDATA; 121 } 122 pic_id = AV_RB16(buf) & 0x7fff; 123 buf += 2; 124 len -= 2; 125 } else { 126 pic_id = buf[0] & 0x7f; 127 buf++; 128 len--; 129 } 130 } 131 132 /* 133 * decode layer indices 134 * 135 * 0 1 2 3 4 5 6 7 136 * +-+-+-+-+-+-+-+-+ 137 * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) 138 * +-+-+-+-+-+-+-+-+ 139 * 140 * T, S and Q are 2-bit indices for temporal, spatial, and quality layers. 141 * If "F" is set in the initial octet, R is 2 bits representing the number 142 * of reference fields this frame refers to. 143 */ 144 if (has_layer_idc) { 145 if (len < 1) { 146 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 147 return AVERROR_INVALIDDATA; 148 } 149 layer_temporal = buf[0] & 0xC0; 150 layer_spatial = buf[0] & 0x30; 151 layer_quality = buf[0] & 0x0C; 152 if (has_ref_idc) { 153 ref_fields = buf[0] & 0x03; 154 if (ref_fields) 155 non_key_frame = 1; 156 } 157 buf++; 158 len--; 159 } 160 161 /* 162 * decode the reference fields 163 * 164 * 0 1 2 3 4 5 6 7 165 * +-+-+-+-+-+-+-+-+ -\ 166 * F: | PID |X| RS| RQ| (OPTIONAL) . 167 * +-+-+-+-+-+-+-+-+ . - R times 168 * X: | EXTENDED PID | (OPTIONAL) . 169 * +-+-+-+-+-+-+-+-+ -/ 170 * 171 * PID: The relative Picture ID referred to by this frame. 172 * RS and RQ: The spatial and quality layer IDs. 173 * X: 1 if this layer index has an extended relative Picture ID. 174 */ 175 if (has_ref_idc) { 176 while (ref_fields) { 177 if (len < 1) { 178 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 179 return AVERROR_INVALIDDATA; 180 } 181 182 has_ref_field_ext_pic_id = buf[0] & 0x10; 183 184 /* pass ref. field */ 185 if (has_ref_field_ext_pic_id) { 186 if (len < 2) { 187 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 188 return AVERROR_INVALIDDATA; 189 } 190 191 /* ignore ref. data */ 192 193 buf += 2; 194 len -= 2; 195 } else { 196 197 /* ignore ref. data */ 198 199 buf++; 200 len--; 201 } 202 ref_fields--; 203 } 204 } 205 206 /* 207 * decode the scalability structure (SS) 208 * 209 * 0 1 2 3 4 5 6 7 210 * +-+-+-+-+-+-+-+-+ 211 * V: | PATTERN LENGTH| 212 * +-+-+-+-+-+-+-+-+ -\ 213 * | T | S | Q | R | (OPTIONAL) . 214 * +-+-+-+-+-+-+-+-+ -\ . 215 * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times 216 * +-+-+-+-+-+-+-+-+ . - R times . 217 * X: | EXTENDED PID | (OPTIONAL) . . 218 * +-+-+-+-+-+-+-+-+ -/ -/ 219 * 220 * PID: The relative Picture ID referred to by this frame. 221 * RS and RQ: The spatial and quality layer IDs. 222 * X: 1 if this layer index has an extended relative Picture ID. 223 */ 224 if (has_ss_data) { 225 int n_s, y, g, i; 226 if (len < 1) { 227 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 228 return AVERROR_INVALIDDATA; 229 } 230 n_s = buf[0] >> 5; 231 y = !!(buf[0] & 0x10); 232 g = !!(buf[0] & 0x08); 233 buf++; 234 len--; 235 if (n_s > 0) { 236 avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers"); 237 return AVERROR_PATCHWELCOME; 238 } 239 if (y) { 240 if (len < 4 * (n_s + 1)) { 241 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 242 return AVERROR_INVALIDDATA; 243 } 244 for (i = 0; i < n_s + 1; i++) { 245 av_unused int w, h; 246 w = AV_RB16(buf); 247 h = AV_RB16(buf + 2); 248 buf += 4; 249 len -= 4; 250 } 251 } 252 if (g) { 253 int n_g; 254 if (len < 1) { 255 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 256 return AVERROR_INVALIDDATA; 257 } 258 n_g = buf[0]; 259 buf++; 260 len--; 261 for (i = 0; i < n_g; i++) { 262 av_unused int t, u, r, j; 263 if (len < 1) { 264 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 265 return AVERROR_INVALIDDATA; 266 } 267 t = buf[0] >> 5; 268 u = !!(buf[0] & 0x10); 269 r = (buf[0] >> 2) & 0x03; 270 buf++; 271 len--; 272 if (len < r) { 273 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 274 return AVERROR_INVALIDDATA; 275 } 276 for (j = 0; j < r; j++) { 277 av_unused int p_diff = buf[0]; 278 buf++; 279 len--; 280 } 281 } 282 } 283 } 284 285 /* 286 * decode the VP9 payload header 287 * 288 * spec. is tbd 289 */ 290 //XXX: implement when specified 291 292 /* sanity check: 1 byte payload as minimum */ 293 if (len < 1) { 294 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); 295 return AVERROR_INVALIDDATA; 296 } 297 298 /* start frame buffering with new dynamic buffer */ 299 if (!rtp_vp9_ctx->buf) { 300 /* sanity check: a new frame should have started */ 301 if (first_fragment) { 302 res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); 303 if (res < 0) 304 return res; 305 /* update the timestamp in the frame packet with the one from the RTP packet */ 306 rtp_vp9_ctx->timestamp = *timestamp; 307 } else { 308 /* frame not started yet, need more packets */ 309 return AVERROR(EAGAIN); 310 } 311 } 312 313 /* write the fragment to the dyn. buffer */ 314 avio_write(rtp_vp9_ctx->buf, buf, len); 315 316 /* do we need more fragments? */ 317 if (!last_fragment) 318 return AVERROR(EAGAIN); 319 320 /* close frame buffering and create resulting A/V packet */ 321 res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); 322 if (res < 0) 323 return res; 324 325 return 0; 326} 327 328static void vp9_close_context(PayloadContext *vp9) 329{ 330 ffio_free_dyn_buf(&vp9->buf); 331} 332 333const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { 334 .enc_name = "VP9", 335 .codec_type = AVMEDIA_TYPE_VIDEO, 336 .codec_id = AV_CODEC_ID_VP9, 337 .priv_data_size = sizeof(PayloadContext), 338 .init = vp9_init, 339 .close = vp9_close_context, 340 .parse_packet = vp9_handle_packet 341}; 342