xref: /third_party/ffmpeg/libavcodec/webvttdec.c (revision cabdff1a)
1/*
2 * Copyright (c) 2012 Clément Bœsch
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/**
22 * @file
23 * WebVTT subtitle decoder
24 * @see http://dev.w3.org/html5/webvtt/
25 * @todo need to support extended markups and cue settings
26 */
27
28#include "avcodec.h"
29#include "ass.h"
30#include "codec_internal.h"
31#include "libavutil/bprint.h"
32
33static const struct {
34    const char *from;
35    const char *to;
36} webvtt_tag_replace[] = {
37    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
38    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
39    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
40    {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts
41    {"&gt;", ">"}, {"&lt;", "<"},
42    {"&lrm;", ""}, {"&rlm;", ""}, // FIXME: properly honor bidi marks
43    {"&amp;", "&"}, {"&nbsp;", "\\h"},
44};
45
46static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
47{
48    int i, again = 0, skip = 0;
49
50    while (*p) {
51
52        for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
53            const char *from = webvtt_tag_replace[i].from;
54            const size_t len = strlen(from);
55            if (!strncmp(p, from, len)) {
56                av_bprintf(buf, "%s", webvtt_tag_replace[i].to);
57                p += len;
58                again = 1;
59                break;
60            }
61        }
62        if (!*p)
63            break;
64
65        if (again) {
66            again = 0;
67            skip = 0;
68            continue;
69        }
70        if (*p == '<')
71            skip = 1;
72        else if (*p == '>')
73            skip = 0;
74        else if (p[0] == '\n' && p[1])
75            av_bprintf(buf, "\\N");
76        else if (!skip && *p != '\r')
77            av_bprint_chars(buf, *p, 1);
78        p++;
79    }
80    return 0;
81}
82
83static int webvtt_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
84                               int *got_sub_ptr, const AVPacket *avpkt)
85{
86    int ret = 0;
87    const char *ptr = avpkt->data;
88    FFASSDecoderContext *s = avctx->priv_data;
89    AVBPrint buf;
90
91    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
92    if (ptr && avpkt->size > 0 && !webvtt_event_to_ass(&buf, ptr))
93        ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
94    av_bprint_finalize(&buf, NULL);
95    if (ret < 0)
96        return ret;
97    *got_sub_ptr = sub->num_rects > 0;
98    return avpkt->size;
99}
100
101const FFCodec ff_webvtt_decoder = {
102    .p.name         = "webvtt",
103    .p.long_name    = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
104    .p.type         = AVMEDIA_TYPE_SUBTITLE,
105    .p.id           = AV_CODEC_ID_WEBVTT,
106    FF_CODEC_DECODE_SUB_CB(webvtt_decode_frame),
107    .init           = ff_ass_subtitle_header_default,
108    .flush          = ff_ass_decoder_flush,
109    .priv_data_size = sizeof(FFASSDecoderContext),
110    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
111};
112