xref: /third_party/ffmpeg/libavcodec/ttmlenc.c (revision cabdff1a)
1/*
2 * TTML subtitle encoder
3 * Copyright (c) 2020 24i
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * TTML subtitle encoder
25 * @see https://www.w3.org/TR/ttml1/
26 * @see https://www.w3.org/TR/ttml2/
27 * @see https://www.w3.org/TR/ttml-imsc/rec
28 */
29
30#include "avcodec.h"
31#include "codec_internal.h"
32#include "libavutil/avstring.h"
33#include "libavutil/bprint.h"
34#include "libavutil/internal.h"
35#include "ass_split.h"
36#include "ass.h"
37#include "ttmlenc.h"
38
39typedef struct {
40    AVCodecContext *avctx;
41    ASSSplitContext *ass_ctx;
42    AVBPrint buffer;
43} TTMLContext;
44
45static void ttml_text_cb(void *priv, const char *text, int len)
46{
47    TTMLContext *s = priv;
48    AVBPrint cur_line = { 0 };
49    AVBPrint *buffer = &s->buffer;
50
51    av_bprint_init(&cur_line, len, AV_BPRINT_SIZE_UNLIMITED);
52
53    av_bprint_append_data(&cur_line, text, len);
54    if (!av_bprint_is_complete(&cur_line)) {
55        av_log(s->avctx, AV_LOG_ERROR,
56               "Failed to move the current subtitle dialog to AVBPrint!\n");
57        av_bprint_finalize(&cur_line, NULL);
58        return;
59    }
60
61
62    av_bprint_escape(buffer, cur_line.str, NULL, AV_ESCAPE_MODE_XML,
63                     0);
64
65    av_bprint_finalize(&cur_line, NULL);
66}
67
68static void ttml_new_line_cb(void *priv, int forced)
69{
70    TTMLContext *s = priv;
71
72    av_bprintf(&s->buffer, "<br/>");
73}
74
75static const ASSCodesCallbacks ttml_callbacks = {
76    .text             = ttml_text_cb,
77    .new_line         = ttml_new_line_cb,
78};
79
80static int ttml_encode_frame(AVCodecContext *avctx, uint8_t *buf,
81                             int bufsize, const AVSubtitle *sub)
82{
83    TTMLContext *s = avctx->priv_data;
84    ASSDialog *dialog;
85    int i;
86
87    av_bprint_clear(&s->buffer);
88
89    for (i=0; i<sub->num_rects; i++) {
90        const char *ass = sub->rects[i]->ass;
91        int ret;
92
93        if (sub->rects[i]->type != SUBTITLE_ASS) {
94            av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
95            return AVERROR(EINVAL);
96        }
97
98        dialog = ff_ass_split_dialog(s->ass_ctx, ass);
99        if (!dialog)
100            return AVERROR(ENOMEM);
101
102        if (dialog->style) {
103            av_bprintf(&s->buffer, "<span region=\"");
104            av_bprint_escape(&s->buffer, dialog->style, NULL,
105                             AV_ESCAPE_MODE_XML,
106                             AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
107            av_bprintf(&s->buffer, "\">");
108        }
109
110        ret = ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text);
111        if (ret < 0) {
112            int log_level = (ret != AVERROR_INVALIDDATA ||
113                             avctx->err_recognition & AV_EF_EXPLODE) ?
114                            AV_LOG_ERROR : AV_LOG_WARNING;
115            av_log(avctx, log_level,
116                   "Splitting received ASS dialog text %s failed: %s\n",
117                   dialog->text,
118                   av_err2str(ret));
119
120            if (log_level == AV_LOG_ERROR) {
121                ff_ass_free_dialog(&dialog);
122                return ret;
123            }
124        }
125
126        if (dialog->style)
127            av_bprintf(&s->buffer, "</span>");
128
129        ff_ass_free_dialog(&dialog);
130    }
131
132    if (!av_bprint_is_complete(&s->buffer))
133        return AVERROR(ENOMEM);
134    if (!s->buffer.len)
135        return 0;
136
137    // force null-termination, so in case our destination buffer is
138    // too small, the return value is larger than bufsize minus null.
139    if (av_strlcpy(buf, s->buffer.str, bufsize) > bufsize - 1) {
140        av_log(avctx, AV_LOG_ERROR, "Buffer too small for TTML event.\n");
141        return AVERROR_BUFFER_TOO_SMALL;
142    }
143
144    return s->buffer.len;
145}
146
147static av_cold int ttml_encode_close(AVCodecContext *avctx)
148{
149    TTMLContext *s = avctx->priv_data;
150
151    ff_ass_split_free(s->ass_ctx);
152
153    av_bprint_finalize(&s->buffer, NULL);
154
155    return 0;
156}
157
158static const char *ttml_get_display_alignment(int alignment)
159{
160    switch (alignment) {
161    case 1:
162    case 2:
163    case 3:
164        return "after";
165    case 4:
166    case 5:
167    case 6:
168        return "center";
169    case 7:
170    case 8:
171    case 9:
172        return "before";
173    default:
174        return NULL;
175    }
176}
177
178static const char *ttml_get_text_alignment(int alignment)
179{
180    switch (alignment) {
181    case 1:
182    case 4:
183    case 7:
184        return "left";
185    case 2:
186    case 5:
187    case 8:
188        return "center";
189    case 3:
190    case 6:
191    case 9:
192        return "right";
193    default:
194        return NULL;
195    }
196}
197
198static void ttml_get_origin(ASSScriptInfo script_info, ASSStyle style,
199                           int *origin_left, int *origin_top)
200{
201    *origin_left = av_rescale(style.margin_l, 100, script_info.play_res_x);
202    *origin_top  =
203        av_rescale((style.alignment >= 7) ? style.margin_v : 0,
204                   100, script_info.play_res_y);
205}
206
207static void ttml_get_extent(ASSScriptInfo script_info, ASSStyle style,
208                           int *width, int *height)
209{
210    *width  = av_rescale(script_info.play_res_x - style.margin_r,
211                         100, script_info.play_res_x);
212    *height = av_rescale((style.alignment <= 3) ?
213                         script_info.play_res_y - style.margin_v :
214                         script_info.play_res_y,
215                         100, script_info.play_res_y);
216}
217
218static int ttml_write_region(AVCodecContext *avctx, AVBPrint *buf,
219                             ASSScriptInfo script_info, ASSStyle style)
220{
221    const char *display_alignment = NULL;
222    const char *text_alignment = NULL;
223    int origin_left = 0;
224    int origin_top  = 0;
225    int width = 0;
226    int height = 0;
227
228    if (!style.name) {
229        av_log(avctx, AV_LOG_ERROR, "Subtitle style name not set!\n");
230        return AVERROR_INVALIDDATA;
231    }
232
233    if (style.font_size < 0) {
234        av_log(avctx, AV_LOG_ERROR, "Invalid font size for TTML: %d!\n",
235               style.font_size);
236        return AVERROR_INVALIDDATA;
237    }
238
239    if (style.margin_l < 0 || style.margin_r < 0 || style.margin_v < 0) {
240        av_log(avctx, AV_LOG_ERROR,
241               "One or more negative margin values in subtitle style: "
242               "left: %d, right: %d, vertical: %d!\n",
243               style.margin_l, style.margin_r, style.margin_v);
244        return AVERROR_INVALIDDATA;
245    }
246
247    display_alignment = ttml_get_display_alignment(style.alignment);
248    text_alignment = ttml_get_text_alignment(style.alignment);
249    if (!display_alignment || !text_alignment) {
250        av_log(avctx, AV_LOG_ERROR,
251               "Failed to convert ASS style alignment %d of style %s to "
252               "TTML display and text alignment!\n",
253               style.alignment,
254               style.name);
255        return AVERROR_INVALIDDATA;
256    }
257
258    ttml_get_origin(script_info, style, &origin_left, &origin_top);
259    ttml_get_extent(script_info, style, &width, &height);
260
261    av_bprintf(buf, "      <region xml:id=\"");
262    av_bprint_escape(buf, style.name, NULL, AV_ESCAPE_MODE_XML,
263                     AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
264    av_bprintf(buf, "\"\n");
265
266    av_bprintf(buf, "        tts:origin=\"%d%% %d%%\"\n",
267               origin_left, origin_top);
268    av_bprintf(buf, "        tts:extent=\"%d%% %d%%\"\n",
269               width, height);
270
271    av_bprintf(buf, "        tts:displayAlign=\"");
272    av_bprint_escape(buf, display_alignment, NULL, AV_ESCAPE_MODE_XML,
273                     AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
274    av_bprintf(buf, "\"\n");
275
276    av_bprintf(buf, "        tts:textAlign=\"");
277    av_bprint_escape(buf, text_alignment, NULL, AV_ESCAPE_MODE_XML,
278                     AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
279    av_bprintf(buf, "\"\n");
280
281    // if we set cell resolution to our script reference resolution,
282    // then a single line is a single "point" on our canvas. Thus, by setting
283    // our font size to font size in cells, we should gain a similar enough
284    // scale without resorting to explicit pixel based font sizing, which is
285    // frowned upon in the TTML community.
286    av_bprintf(buf, "        tts:fontSize=\"%dc\"\n",
287               style.font_size);
288
289    if (style.font_name) {
290        av_bprintf(buf, "        tts:fontFamily=\"");
291        av_bprint_escape(buf, style.font_name, NULL, AV_ESCAPE_MODE_XML,
292                         AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
293        av_bprintf(buf, "\"\n");
294    }
295
296    av_bprintf(buf, "        tts:overflow=\"visible\" />\n");
297
298    return 0;
299}
300
301static int ttml_write_header_content(AVCodecContext *avctx)
302{
303    TTMLContext *s = avctx->priv_data;
304    ASS *ass = (ASS *)s->ass_ctx;
305    ASSScriptInfo script_info = ass->script_info;
306    const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE + 1 +
307                                       AV_INPUT_BUFFER_PADDING_SIZE;
308    size_t additional_extradata_size = 0;
309
310    if (script_info.play_res_x <= 0 || script_info.play_res_y <= 0) {
311        av_log(avctx, AV_LOG_ERROR,
312               "Invalid subtitle reference resolution %dx%d!\n",
313               script_info.play_res_x, script_info.play_res_y);
314        return AVERROR_INVALIDDATA;
315    }
316
317    // write the first string in extradata, attributes in the base "tt" element.
318    av_bprintf(&s->buffer, ttml_default_namespacing);
319    // the cell resolution is in character cells, so not exactly 1:1 against
320    // a pixel based resolution, but as the tts:extent in the root
321    // "tt" element is frowned upon (and disallowed in the EBU-TT profile),
322    // we mimic the reference resolution by setting it as the cell resolution.
323    av_bprintf(&s->buffer, "  ttp:cellResolution=\"%d %d\"\n",
324               script_info.play_res_x, script_info.play_res_y);
325    av_bprint_chars(&s->buffer, '\0', 1);
326
327    // write the second string in extradata, head element containing the styles
328    av_bprintf(&s->buffer, "  <head>\n");
329    av_bprintf(&s->buffer, "    <layout>\n");
330
331    for (int i = 0; i < ass->styles_count; i++) {
332        int ret = ttml_write_region(avctx, &s->buffer, script_info,
333                                    ass->styles[i]);
334        if (ret < 0)
335            return ret;
336    }
337
338    av_bprintf(&s->buffer, "    </layout>\n");
339    av_bprintf(&s->buffer, "  </head>\n");
340    av_bprint_chars(&s->buffer, '\0', 1);
341
342    if (!av_bprint_is_complete(&s->buffer)) {
343        return AVERROR(ENOMEM);
344    }
345
346    additional_extradata_size = s->buffer.len;
347
348    if (!(avctx->extradata =
349            av_mallocz(base_extradata_size + additional_extradata_size))) {
350        return AVERROR(ENOMEM);
351    }
352
353    avctx->extradata_size =
354        TTMLENC_EXTRADATA_SIGNATURE_SIZE + additional_extradata_size;
355    memcpy(avctx->extradata, TTMLENC_EXTRADATA_SIGNATURE,
356           TTMLENC_EXTRADATA_SIGNATURE_SIZE);
357
358    if (additional_extradata_size)
359        memcpy(avctx->extradata + TTMLENC_EXTRADATA_SIGNATURE_SIZE,
360               s->buffer.str, additional_extradata_size);
361
362    av_bprint_clear(&s->buffer);
363
364    return 0;
365}
366
367static av_cold int ttml_encode_init(AVCodecContext *avctx)
368{
369    TTMLContext *s = avctx->priv_data;
370    int ret = AVERROR_BUG;
371    s->avctx   = avctx;
372
373    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
374
375    if (!(s->ass_ctx = ff_ass_split(avctx->subtitle_header))) {
376        return AVERROR_INVALIDDATA;
377    }
378
379    if ((ret = ttml_write_header_content(avctx)) < 0) {
380        return ret;
381    }
382
383    return 0;
384}
385
386const FFCodec ff_ttml_encoder = {
387    .p.name         = "ttml",
388    .p.long_name    = NULL_IF_CONFIG_SMALL("TTML subtitle"),
389    .p.type         = AVMEDIA_TYPE_SUBTITLE,
390    .p.id           = AV_CODEC_ID_TTML,
391    .priv_data_size = sizeof(TTMLContext),
392    .init           = ttml_encode_init,
393    FF_CODEC_ENCODE_SUB_CB(ttml_encode_frame),
394    .close          = ttml_encode_close,
395    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
396};
397