1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * SAMI subtitle decoder
24  * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25  */
26 
27 #include "ass.h"
28 #include "libavutil/avstring.h"
29 #include "libavutil/bprint.h"
30 #include "codec_internal.h"
31 #include "htmlsubtitles.h"
32 
33 typedef struct {
34     AVBPrint source;
35     AVBPrint content;
36     AVBPrint encoded_source;
37     AVBPrint encoded_content;
38     AVBPrint full;
39     int readorder;
40 } SAMIContext;
41 
sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)42 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
43 {
44     SAMIContext *sami = avctx->priv_data;
45     int ret = 0;
46     char *tag = NULL;
47     char *dupsrc = av_strdup(src);
48     char *p = dupsrc;
49     AVBPrint *dst_content = &sami->encoded_content;
50     AVBPrint *dst_source = &sami->encoded_source;
51 
52     if (!dupsrc)
53         return AVERROR(ENOMEM);
54 
55     av_bprint_clear(&sami->encoded_content);
56     av_bprint_clear(&sami->content);
57     av_bprint_clear(&sami->encoded_source);
58     for (;;) {
59         char *saveptr = NULL;
60         int prev_chr_is_space = 0;
61         AVBPrint *dst = &sami->content;
62 
63         /* parse & extract paragraph tag */
64         p = av_stristr(p, "<P");
65         if (!p)
66             break;
67         if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
68             p++;
69             continue;
70         }
71         if (dst->len) // add a separator with the previous paragraph if there was one
72             av_bprintf(dst, "\\N");
73         tag = av_strtok(p, ">", &saveptr);
74         if (!tag || !saveptr)
75             break;
76         p = saveptr;
77 
78         /* check if the current paragraph is the "source" (speaker name) */
79         if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
80             dst = &sami->source;
81             av_bprint_clear(dst);
82         }
83 
84         /* if empty event -> skip subtitle */
85         while (av_isspace(*p))
86             p++;
87         if (!strncmp(p, "&nbsp;", 6)) {
88             ret = -1;
89             goto end;
90         }
91 
92         /* extract the text, stripping most of the tags */
93         while (*p) {
94             if (*p == '<') {
95                 if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
96                     break;
97             }
98             if (!av_strncasecmp(p, "<BR", 3)) {
99                 av_bprintf(dst, "\\N");
100                 p++;
101                 while (*p && *p != '>')
102                     p++;
103                 if (!*p)
104                     break;
105                 if (*p == '>')
106                     p++;
107                 continue;
108             }
109             if (!av_isspace(*p))
110                 av_bprint_chars(dst, *p, 1);
111             else if (!prev_chr_is_space)
112                 av_bprint_chars(dst, ' ', 1);
113             prev_chr_is_space = av_isspace(*p);
114             p++;
115         }
116     }
117 
118     av_bprint_clear(&sami->full);
119     if (sami->source.len) {
120         ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
121         if (ret < 0)
122             goto end;
123         av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
124     }
125     ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
126     if (ret < 0)
127         goto end;
128     av_bprintf(&sami->full, "%s", sami->encoded_content.str);
129 
130 end:
131     av_free(dupsrc);
132     return ret;
133 }
134 
sami_decode_frame(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, const AVPacket *avpkt)135 static int sami_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
136                              int *got_sub_ptr, const AVPacket *avpkt)
137 {
138     const char *ptr = avpkt->data;
139     SAMIContext *sami = avctx->priv_data;
140 
141     if (ptr && avpkt->size > 0) {
142         int ret = sami_paragraph_to_ass(avctx, ptr);
143         if (ret < 0)
144             return ret;
145         // TODO: pass escaped sami->encoded_source.str as source
146         ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147         if (ret < 0)
148             return ret;
149     }
150     *got_sub_ptr = sub->num_rects > 0;
151     return avpkt->size;
152 }
153 
sami_init(AVCodecContext *avctx)154 static av_cold int sami_init(AVCodecContext *avctx)
155 {
156     SAMIContext *sami = avctx->priv_data;
157     av_bprint_init(&sami->source,  0, 2048);
158     av_bprint_init(&sami->content, 0, 2048);
159     av_bprint_init(&sami->encoded_source,  0, 2048);
160     av_bprint_init(&sami->encoded_content, 0, 2048);
161     av_bprint_init(&sami->full,    0, 2048);
162     return ff_ass_subtitle_header_default(avctx);
163 }
164 
sami_close(AVCodecContext *avctx)165 static av_cold int sami_close(AVCodecContext *avctx)
166 {
167     SAMIContext *sami = avctx->priv_data;
168     av_bprint_finalize(&sami->source,  NULL);
169     av_bprint_finalize(&sami->content, NULL);
170     av_bprint_finalize(&sami->encoded_source,  NULL);
171     av_bprint_finalize(&sami->encoded_content, NULL);
172     av_bprint_finalize(&sami->full,    NULL);
173     return 0;
174 }
175 
sami_flush(AVCodecContext *avctx)176 static void sami_flush(AVCodecContext *avctx)
177 {
178     SAMIContext *sami = avctx->priv_data;
179     if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180         sami->readorder = 0;
181 }
182 
183 const FFCodec ff_sami_decoder = {
184     .p.name         = "sami",
185     .p.long_name    = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186     .p.type         = AVMEDIA_TYPE_SUBTITLE,
187     .p.id           = AV_CODEC_ID_SAMI,
188     .priv_data_size = sizeof(SAMIContext),
189     .init           = sami_init,
190     .close          = sami_close,
191     FF_CODEC_DECODE_SUB_CB(sami_decode_frame),
192     .flush          = sami_flush,
193     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
194 };
195