1/*
2 * RTP parser for VP9 payload format (draft version 02) - experimental
3 * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/intreadwrite.h"
23
24#include "avio_internal.h"
25#include "rtpdec_formats.h"
26
27#define RTP_VP9_DESC_REQUIRED_SIZE 1
28
29struct PayloadContext {
30    AVIOContext *buf;
31    uint32_t     timestamp;
32};
33
34static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
35                            PayloadContext *data)
36{
37    av_log(ctx, AV_LOG_WARNING,
38           "RTP/VP9 support is still experimental\n");
39
40    return 0;
41}
42
43static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
44                             AVStream *st, AVPacket *pkt, uint32_t *timestamp,
45                             const uint8_t *buf, int len, uint16_t seq,
46                             int flags)
47{
48    int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
49    av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
50    av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
51    int ref_fields = 0, has_ref_field_ext_pic_id = 0;
52    int first_fragment, last_fragment;
53    int rtp_m;
54    int res = 0;
55
56    /* drop data of previous packets in case of non-continuous (lossy) packet stream */
57    if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp)
58        ffio_free_dyn_buf(&rtp_vp9_ctx->buf);
59
60    /* sanity check for size of input packet: 1 byte payload at least */
61    if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
62        av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
63        return AVERROR_INVALIDDATA;
64    }
65
66    /*
67     *     decode the required VP9 payload descriptor according to section 4.2 of the spec.:
68     *
69     *      0 1 2 3 4 5 6 7
70     *     +-+-+-+-+-+-+-+-+
71     *     |I|P|L|F|B|E|V|-| (REQUIRED)
72     *     +-+-+-+-+-+-+-+-+
73     *
74     *     I: PictureID present
75     *     P: Inter-picture predicted layer frame
76     *     L: Layer indices present
77     *     F: Flexible mode
78     *     B: Start of VP9 frame
79     *     E: End of picture
80     *     V: Scalability Structure (SS) present
81     */
82    has_pic_id     = !!(buf[0] & 0x80);
83    inter_picture_layer_frame = !!(buf[0] & 0x40);
84    has_layer_idc  = !!(buf[0] & 0x20);
85    has_ref_idc    = !!(buf[0] & 0x10);
86    first_fragment = !!(buf[0] & 0x08);
87    last_fragment  = !!(buf[0] & 0x04);
88    has_ss_data    = !!(buf[0] & 0x02);
89
90    rtp_m = !!(flags & RTP_FLAG_MARKER);
91
92    /* sanity check for markers: B should always be equal to the RTP M marker */
93    if (last_fragment != rtp_m) {
94        av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
95        return AVERROR_INVALIDDATA;
96    }
97
98    /* pass the extensions field */
99    buf += RTP_VP9_DESC_REQUIRED_SIZE;
100    len -= RTP_VP9_DESC_REQUIRED_SIZE;
101
102    /*
103     *         decode the 1-byte/2-byte picture ID:
104     *
105     *          0 1 2 3 4 5 6 7
106     *         +-+-+-+-+-+-+-+-+
107     *   I:    |M|PICTURE ID   | (RECOMMENDED)
108     *         +-+-+-+-+-+-+-+-+
109     *   M:    | EXTENDED PID  | (RECOMMENDED)
110     *         +-+-+-+-+-+-+-+-+
111     *
112     *   M: The most significant bit of the first octet is an extension flag.
113     *   PictureID:  8 or 16 bits including the M bit.
114     */
115    if (has_pic_id) {
116        /* check for 1-byte or 2-byte picture index */
117        if (buf[0] & 0x80) {
118            if (len < 2) {
119                av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
120                return AVERROR_INVALIDDATA;
121            }
122            pic_id = AV_RB16(buf) & 0x7fff;
123            buf += 2;
124            len -= 2;
125        } else {
126            pic_id = buf[0] & 0x7f;
127            buf++;
128            len--;
129        }
130    }
131
132    /*
133     *         decode layer indices
134     *
135     *          0 1 2 3 4 5 6 7
136     *         +-+-+-+-+-+-+-+-+
137     *   L:    | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
138     *         +-+-+-+-+-+-+-+-+
139     *
140     *   T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
141     *   If "F" is set in the initial octet, R is 2 bits representing the number
142     *   of reference fields this frame refers to.
143     */
144    if (has_layer_idc) {
145        if (len < 1) {
146            av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
147            return AVERROR_INVALIDDATA;
148        }
149        layer_temporal = buf[0] & 0xC0;
150        layer_spatial  = buf[0] & 0x30;
151        layer_quality  = buf[0] & 0x0C;
152        if (has_ref_idc) {
153            ref_fields = buf[0] & 0x03;
154            if (ref_fields)
155                non_key_frame = 1;
156        }
157        buf++;
158        len--;
159    }
160
161    /*
162     *         decode the reference fields
163     *
164     *          0 1 2 3 4 5 6 7
165     *         +-+-+-+-+-+-+-+-+              -\
166     *   F:    | PID |X| RS| RQ| (OPTIONAL)    .
167     *         +-+-+-+-+-+-+-+-+               . - R times
168     *   X:    | EXTENDED PID  | (OPTIONAL)    .
169     *         +-+-+-+-+-+-+-+-+              -/
170     *
171     *   PID:  The relative Picture ID referred to by this frame.
172     *   RS and RQ:  The spatial and quality layer IDs.
173     *   X: 1 if this layer index has an extended relative Picture ID.
174     */
175    if (has_ref_idc) {
176        while (ref_fields) {
177            if (len < 1) {
178                av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
179                return AVERROR_INVALIDDATA;
180            }
181
182            has_ref_field_ext_pic_id = buf[0] & 0x10;
183
184            /* pass ref. field */
185            if (has_ref_field_ext_pic_id) {
186                if (len < 2) {
187                    av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
188                    return AVERROR_INVALIDDATA;
189                }
190
191                /* ignore ref. data */
192
193                buf += 2;
194                len -= 2;
195            } else {
196
197                /* ignore ref. data */
198
199                buf++;
200                len--;
201            }
202            ref_fields--;
203        }
204    }
205
206    /*
207     *         decode the scalability structure (SS)
208     *
209     *          0 1 2 3 4 5 6 7
210     *         +-+-+-+-+-+-+-+-+
211     *   V:    | PATTERN LENGTH|
212     *         +-+-+-+-+-+-+-+-+                           -\
213     *         | T | S | Q | R | (OPTIONAL)                 .
214     *         +-+-+-+-+-+-+-+-+              -\            .
215     *         | PID |X| RS| RQ| (OPTIONAL)    .            . - PAT. LEN. times
216     *         +-+-+-+-+-+-+-+-+               . - R times  .
217     *   X:    | EXTENDED PID  | (OPTIONAL)    .            .
218     *         +-+-+-+-+-+-+-+-+              -/           -/
219     *
220     *   PID:  The relative Picture ID referred to by this frame.
221     *   RS and RQ:  The spatial and quality layer IDs.
222     *   X: 1 if this layer index has an extended relative Picture ID.
223     */
224    if (has_ss_data) {
225        int n_s, y, g, i;
226        if (len < 1) {
227            av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
228            return AVERROR_INVALIDDATA;
229        }
230        n_s = buf[0] >> 5;
231        y = !!(buf[0] & 0x10);
232        g = !!(buf[0] & 0x08);
233        buf++;
234        len--;
235        if (n_s > 0) {
236            avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers");
237            return AVERROR_PATCHWELCOME;
238        }
239        if (y) {
240            if (len < 4 * (n_s + 1)) {
241                av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
242                return AVERROR_INVALIDDATA;
243            }
244            for (i = 0; i < n_s + 1; i++) {
245                av_unused int w, h;
246                w = AV_RB16(buf);
247                h = AV_RB16(buf + 2);
248                buf += 4;
249                len -= 4;
250            }
251        }
252        if (g) {
253            int n_g;
254            if (len < 1) {
255                av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
256                return AVERROR_INVALIDDATA;
257            }
258            n_g = buf[0];
259            buf++;
260            len--;
261            for (i = 0; i < n_g; i++) {
262                av_unused int t, u, r, j;
263                if (len < 1) {
264                    av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
265                    return AVERROR_INVALIDDATA;
266                }
267                t = buf[0] >> 5;
268                u = !!(buf[0] & 0x10);
269                r = (buf[0] >> 2) & 0x03;
270                buf++;
271                len--;
272                if (len < r) {
273                    av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
274                    return AVERROR_INVALIDDATA;
275                }
276                for (j = 0; j < r; j++) {
277                    av_unused int p_diff = buf[0];
278                    buf++;
279                    len--;
280                }
281            }
282        }
283    }
284
285    /*
286     * decode the VP9 payload header
287     *
288     *  spec. is tbd
289     */
290    //XXX: implement when specified
291
292    /* sanity check: 1 byte payload as minimum */
293    if (len < 1) {
294        av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
295        return AVERROR_INVALIDDATA;
296    }
297
298    /* start frame buffering with new dynamic buffer */
299    if (!rtp_vp9_ctx->buf) {
300        /* sanity check: a new frame should have started */
301        if (first_fragment) {
302            res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
303            if (res < 0)
304                return res;
305            /* update the timestamp in the frame packet with the one from the RTP packet */
306            rtp_vp9_ctx->timestamp = *timestamp;
307        } else {
308            /* frame not started yet, need more packets */
309            return AVERROR(EAGAIN);
310        }
311    }
312
313    /* write the fragment to the dyn. buffer */
314    avio_write(rtp_vp9_ctx->buf, buf, len);
315
316    /* do we need more fragments? */
317    if (!last_fragment)
318        return AVERROR(EAGAIN);
319
320    /* close frame buffering and create resulting A/V packet */
321    res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
322    if (res < 0)
323        return res;
324
325    return 0;
326}
327
328static void vp9_close_context(PayloadContext *vp9)
329{
330    ffio_free_dyn_buf(&vp9->buf);
331}
332
333const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
334    .enc_name         = "VP9",
335    .codec_type       = AVMEDIA_TYPE_VIDEO,
336    .codec_id         = AV_CODEC_ID_VP9,
337    .priv_data_size   = sizeof(PayloadContext),
338    .init             = vp9_init,
339    .close            = vp9_close_context,
340    .parse_packet     = vp9_handle_packet
341};
342