xref: /third_party/ffmpeg/libavcodec/samidec.c (revision cabdff1a)
1/*
2 * Copyright (c) 2012 Clément Bœsch
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/**
22 * @file
23 * SAMI subtitle decoder
24 * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25 */
26
27#include "ass.h"
28#include "libavutil/avstring.h"
29#include "libavutil/bprint.h"
30#include "codec_internal.h"
31#include "htmlsubtitles.h"
32
33typedef struct {
34    AVBPrint source;
35    AVBPrint content;
36    AVBPrint encoded_source;
37    AVBPrint encoded_content;
38    AVBPrint full;
39    int readorder;
40} SAMIContext;
41
42static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
43{
44    SAMIContext *sami = avctx->priv_data;
45    int ret = 0;
46    char *tag = NULL;
47    char *dupsrc = av_strdup(src);
48    char *p = dupsrc;
49    AVBPrint *dst_content = &sami->encoded_content;
50    AVBPrint *dst_source = &sami->encoded_source;
51
52    if (!dupsrc)
53        return AVERROR(ENOMEM);
54
55    av_bprint_clear(&sami->encoded_content);
56    av_bprint_clear(&sami->content);
57    av_bprint_clear(&sami->encoded_source);
58    for (;;) {
59        char *saveptr = NULL;
60        int prev_chr_is_space = 0;
61        AVBPrint *dst = &sami->content;
62
63        /* parse & extract paragraph tag */
64        p = av_stristr(p, "<P");
65        if (!p)
66            break;
67        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
68            p++;
69            continue;
70        }
71        if (dst->len) // add a separator with the previous paragraph if there was one
72            av_bprintf(dst, "\\N");
73        tag = av_strtok(p, ">", &saveptr);
74        if (!tag || !saveptr)
75            break;
76        p = saveptr;
77
78        /* check if the current paragraph is the "source" (speaker name) */
79        if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
80            dst = &sami->source;
81            av_bprint_clear(dst);
82        }
83
84        /* if empty event -> skip subtitle */
85        while (av_isspace(*p))
86            p++;
87        if (!strncmp(p, "&nbsp;", 6)) {
88            ret = -1;
89            goto end;
90        }
91
92        /* extract the text, stripping most of the tags */
93        while (*p) {
94            if (*p == '<') {
95                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
96                    break;
97            }
98            if (!av_strncasecmp(p, "<BR", 3)) {
99                av_bprintf(dst, "\\N");
100                p++;
101                while (*p && *p != '>')
102                    p++;
103                if (!*p)
104                    break;
105                if (*p == '>')
106                    p++;
107                continue;
108            }
109            if (!av_isspace(*p))
110                av_bprint_chars(dst, *p, 1);
111            else if (!prev_chr_is_space)
112                av_bprint_chars(dst, ' ', 1);
113            prev_chr_is_space = av_isspace(*p);
114            p++;
115        }
116    }
117
118    av_bprint_clear(&sami->full);
119    if (sami->source.len) {
120        ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
121        if (ret < 0)
122            goto end;
123        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
124    }
125    ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
126    if (ret < 0)
127        goto end;
128    av_bprintf(&sami->full, "%s", sami->encoded_content.str);
129
130end:
131    av_free(dupsrc);
132    return ret;
133}
134
135static int sami_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
136                             int *got_sub_ptr, const AVPacket *avpkt)
137{
138    const char *ptr = avpkt->data;
139    SAMIContext *sami = avctx->priv_data;
140
141    if (ptr && avpkt->size > 0) {
142        int ret = sami_paragraph_to_ass(avctx, ptr);
143        if (ret < 0)
144            return ret;
145        // TODO: pass escaped sami->encoded_source.str as source
146        ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147        if (ret < 0)
148            return ret;
149    }
150    *got_sub_ptr = sub->num_rects > 0;
151    return avpkt->size;
152}
153
154static av_cold int sami_init(AVCodecContext *avctx)
155{
156    SAMIContext *sami = avctx->priv_data;
157    av_bprint_init(&sami->source,  0, 2048);
158    av_bprint_init(&sami->content, 0, 2048);
159    av_bprint_init(&sami->encoded_source,  0, 2048);
160    av_bprint_init(&sami->encoded_content, 0, 2048);
161    av_bprint_init(&sami->full,    0, 2048);
162    return ff_ass_subtitle_header_default(avctx);
163}
164
165static av_cold int sami_close(AVCodecContext *avctx)
166{
167    SAMIContext *sami = avctx->priv_data;
168    av_bprint_finalize(&sami->source,  NULL);
169    av_bprint_finalize(&sami->content, NULL);
170    av_bprint_finalize(&sami->encoded_source,  NULL);
171    av_bprint_finalize(&sami->encoded_content, NULL);
172    av_bprint_finalize(&sami->full,    NULL);
173    return 0;
174}
175
176static void sami_flush(AVCodecContext *avctx)
177{
178    SAMIContext *sami = avctx->priv_data;
179    if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180        sami->readorder = 0;
181}
182
183const FFCodec ff_sami_decoder = {
184    .p.name         = "sami",
185    .p.long_name    = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186    .p.type         = AVMEDIA_TYPE_SUBTITLE,
187    .p.id           = AV_CODEC_ID_SAMI,
188    .priv_data_size = sizeof(SAMIContext),
189    .init           = sami_init,
190    .close          = sami_close,
191    FF_CODEC_DECODE_SUB_CB(sami_decode_frame),
192    .flush          = sami_flush,
193    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
194};
195