xref: /third_party/ffmpeg/libavcodec/samidec.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2012 Clément Bœsch
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci/**
22cabdff1aSopenharmony_ci * @file
23cabdff1aSopenharmony_ci * SAMI subtitle decoder
24cabdff1aSopenharmony_ci * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "ass.h"
28cabdff1aSopenharmony_ci#include "libavutil/avstring.h"
29cabdff1aSopenharmony_ci#include "libavutil/bprint.h"
30cabdff1aSopenharmony_ci#include "codec_internal.h"
31cabdff1aSopenharmony_ci#include "htmlsubtitles.h"
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_citypedef struct {
34cabdff1aSopenharmony_ci    AVBPrint source;
35cabdff1aSopenharmony_ci    AVBPrint content;
36cabdff1aSopenharmony_ci    AVBPrint encoded_source;
37cabdff1aSopenharmony_ci    AVBPrint encoded_content;
38cabdff1aSopenharmony_ci    AVBPrint full;
39cabdff1aSopenharmony_ci    int readorder;
40cabdff1aSopenharmony_ci} SAMIContext;
41cabdff1aSopenharmony_ci
42cabdff1aSopenharmony_cistatic int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
43cabdff1aSopenharmony_ci{
44cabdff1aSopenharmony_ci    SAMIContext *sami = avctx->priv_data;
45cabdff1aSopenharmony_ci    int ret = 0;
46cabdff1aSopenharmony_ci    char *tag = NULL;
47cabdff1aSopenharmony_ci    char *dupsrc = av_strdup(src);
48cabdff1aSopenharmony_ci    char *p = dupsrc;
49cabdff1aSopenharmony_ci    AVBPrint *dst_content = &sami->encoded_content;
50cabdff1aSopenharmony_ci    AVBPrint *dst_source = &sami->encoded_source;
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci    if (!dupsrc)
53cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci    av_bprint_clear(&sami->encoded_content);
56cabdff1aSopenharmony_ci    av_bprint_clear(&sami->content);
57cabdff1aSopenharmony_ci    av_bprint_clear(&sami->encoded_source);
58cabdff1aSopenharmony_ci    for (;;) {
59cabdff1aSopenharmony_ci        char *saveptr = NULL;
60cabdff1aSopenharmony_ci        int prev_chr_is_space = 0;
61cabdff1aSopenharmony_ci        AVBPrint *dst = &sami->content;
62cabdff1aSopenharmony_ci
63cabdff1aSopenharmony_ci        /* parse & extract paragraph tag */
64cabdff1aSopenharmony_ci        p = av_stristr(p, "<P");
65cabdff1aSopenharmony_ci        if (!p)
66cabdff1aSopenharmony_ci            break;
67cabdff1aSopenharmony_ci        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
68cabdff1aSopenharmony_ci            p++;
69cabdff1aSopenharmony_ci            continue;
70cabdff1aSopenharmony_ci        }
71cabdff1aSopenharmony_ci        if (dst->len) // add a separator with the previous paragraph if there was one
72cabdff1aSopenharmony_ci            av_bprintf(dst, "\\N");
73cabdff1aSopenharmony_ci        tag = av_strtok(p, ">", &saveptr);
74cabdff1aSopenharmony_ci        if (!tag || !saveptr)
75cabdff1aSopenharmony_ci            break;
76cabdff1aSopenharmony_ci        p = saveptr;
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci        /* check if the current paragraph is the "source" (speaker name) */
79cabdff1aSopenharmony_ci        if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
80cabdff1aSopenharmony_ci            dst = &sami->source;
81cabdff1aSopenharmony_ci            av_bprint_clear(dst);
82cabdff1aSopenharmony_ci        }
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_ci        /* if empty event -> skip subtitle */
85cabdff1aSopenharmony_ci        while (av_isspace(*p))
86cabdff1aSopenharmony_ci            p++;
87cabdff1aSopenharmony_ci        if (!strncmp(p, "&nbsp;", 6)) {
88cabdff1aSopenharmony_ci            ret = -1;
89cabdff1aSopenharmony_ci            goto end;
90cabdff1aSopenharmony_ci        }
91cabdff1aSopenharmony_ci
92cabdff1aSopenharmony_ci        /* extract the text, stripping most of the tags */
93cabdff1aSopenharmony_ci        while (*p) {
94cabdff1aSopenharmony_ci            if (*p == '<') {
95cabdff1aSopenharmony_ci                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
96cabdff1aSopenharmony_ci                    break;
97cabdff1aSopenharmony_ci            }
98cabdff1aSopenharmony_ci            if (!av_strncasecmp(p, "<BR", 3)) {
99cabdff1aSopenharmony_ci                av_bprintf(dst, "\\N");
100cabdff1aSopenharmony_ci                p++;
101cabdff1aSopenharmony_ci                while (*p && *p != '>')
102cabdff1aSopenharmony_ci                    p++;
103cabdff1aSopenharmony_ci                if (!*p)
104cabdff1aSopenharmony_ci                    break;
105cabdff1aSopenharmony_ci                if (*p == '>')
106cabdff1aSopenharmony_ci                    p++;
107cabdff1aSopenharmony_ci                continue;
108cabdff1aSopenharmony_ci            }
109cabdff1aSopenharmony_ci            if (!av_isspace(*p))
110cabdff1aSopenharmony_ci                av_bprint_chars(dst, *p, 1);
111cabdff1aSopenharmony_ci            else if (!prev_chr_is_space)
112cabdff1aSopenharmony_ci                av_bprint_chars(dst, ' ', 1);
113cabdff1aSopenharmony_ci            prev_chr_is_space = av_isspace(*p);
114cabdff1aSopenharmony_ci            p++;
115cabdff1aSopenharmony_ci        }
116cabdff1aSopenharmony_ci    }
117cabdff1aSopenharmony_ci
118cabdff1aSopenharmony_ci    av_bprint_clear(&sami->full);
119cabdff1aSopenharmony_ci    if (sami->source.len) {
120cabdff1aSopenharmony_ci        ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
121cabdff1aSopenharmony_ci        if (ret < 0)
122cabdff1aSopenharmony_ci            goto end;
123cabdff1aSopenharmony_ci        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
124cabdff1aSopenharmony_ci    }
125cabdff1aSopenharmony_ci    ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
126cabdff1aSopenharmony_ci    if (ret < 0)
127cabdff1aSopenharmony_ci        goto end;
128cabdff1aSopenharmony_ci    av_bprintf(&sami->full, "%s", sami->encoded_content.str);
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_ciend:
131cabdff1aSopenharmony_ci    av_free(dupsrc);
132cabdff1aSopenharmony_ci    return ret;
133cabdff1aSopenharmony_ci}
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_cistatic int sami_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
136cabdff1aSopenharmony_ci                             int *got_sub_ptr, const AVPacket *avpkt)
137cabdff1aSopenharmony_ci{
138cabdff1aSopenharmony_ci    const char *ptr = avpkt->data;
139cabdff1aSopenharmony_ci    SAMIContext *sami = avctx->priv_data;
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ci    if (ptr && avpkt->size > 0) {
142cabdff1aSopenharmony_ci        int ret = sami_paragraph_to_ass(avctx, ptr);
143cabdff1aSopenharmony_ci        if (ret < 0)
144cabdff1aSopenharmony_ci            return ret;
145cabdff1aSopenharmony_ci        // TODO: pass escaped sami->encoded_source.str as source
146cabdff1aSopenharmony_ci        ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147cabdff1aSopenharmony_ci        if (ret < 0)
148cabdff1aSopenharmony_ci            return ret;
149cabdff1aSopenharmony_ci    }
150cabdff1aSopenharmony_ci    *got_sub_ptr = sub->num_rects > 0;
151cabdff1aSopenharmony_ci    return avpkt->size;
152cabdff1aSopenharmony_ci}
153cabdff1aSopenharmony_ci
154cabdff1aSopenharmony_cistatic av_cold int sami_init(AVCodecContext *avctx)
155cabdff1aSopenharmony_ci{
156cabdff1aSopenharmony_ci    SAMIContext *sami = avctx->priv_data;
157cabdff1aSopenharmony_ci    av_bprint_init(&sami->source,  0, 2048);
158cabdff1aSopenharmony_ci    av_bprint_init(&sami->content, 0, 2048);
159cabdff1aSopenharmony_ci    av_bprint_init(&sami->encoded_source,  0, 2048);
160cabdff1aSopenharmony_ci    av_bprint_init(&sami->encoded_content, 0, 2048);
161cabdff1aSopenharmony_ci    av_bprint_init(&sami->full,    0, 2048);
162cabdff1aSopenharmony_ci    return ff_ass_subtitle_header_default(avctx);
163cabdff1aSopenharmony_ci}
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_cistatic av_cold int sami_close(AVCodecContext *avctx)
166cabdff1aSopenharmony_ci{
167cabdff1aSopenharmony_ci    SAMIContext *sami = avctx->priv_data;
168cabdff1aSopenharmony_ci    av_bprint_finalize(&sami->source,  NULL);
169cabdff1aSopenharmony_ci    av_bprint_finalize(&sami->content, NULL);
170cabdff1aSopenharmony_ci    av_bprint_finalize(&sami->encoded_source,  NULL);
171cabdff1aSopenharmony_ci    av_bprint_finalize(&sami->encoded_content, NULL);
172cabdff1aSopenharmony_ci    av_bprint_finalize(&sami->full,    NULL);
173cabdff1aSopenharmony_ci    return 0;
174cabdff1aSopenharmony_ci}
175cabdff1aSopenharmony_ci
176cabdff1aSopenharmony_cistatic void sami_flush(AVCodecContext *avctx)
177cabdff1aSopenharmony_ci{
178cabdff1aSopenharmony_ci    SAMIContext *sami = avctx->priv_data;
179cabdff1aSopenharmony_ci    if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180cabdff1aSopenharmony_ci        sami->readorder = 0;
181cabdff1aSopenharmony_ci}
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ciconst FFCodec ff_sami_decoder = {
184cabdff1aSopenharmony_ci    .p.name         = "sami",
185cabdff1aSopenharmony_ci    .p.long_name    = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186cabdff1aSopenharmony_ci    .p.type         = AVMEDIA_TYPE_SUBTITLE,
187cabdff1aSopenharmony_ci    .p.id           = AV_CODEC_ID_SAMI,
188cabdff1aSopenharmony_ci    .priv_data_size = sizeof(SAMIContext),
189cabdff1aSopenharmony_ci    .init           = sami_init,
190cabdff1aSopenharmony_ci    .close          = sami_close,
191cabdff1aSopenharmony_ci    FF_CODEC_DECODE_SUB_CB(sami_decode_frame),
192cabdff1aSopenharmony_ci    .flush          = sami_flush,
193cabdff1aSopenharmony_ci    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
194cabdff1aSopenharmony_ci};
195