1/* 2 * TTML subtitle encoder 3 * Copyright (c) 2020 24i 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * TTML subtitle encoder 25 * @see https://www.w3.org/TR/ttml1/ 26 * @see https://www.w3.org/TR/ttml2/ 27 * @see https://www.w3.org/TR/ttml-imsc/rec 28 */ 29 30#include "avcodec.h" 31#include "codec_internal.h" 32#include "libavutil/avstring.h" 33#include "libavutil/bprint.h" 34#include "libavutil/internal.h" 35#include "ass_split.h" 36#include "ass.h" 37#include "ttmlenc.h" 38 39typedef struct { 40 AVCodecContext *avctx; 41 ASSSplitContext *ass_ctx; 42 AVBPrint buffer; 43} TTMLContext; 44 45static void ttml_text_cb(void *priv, const char *text, int len) 46{ 47 TTMLContext *s = priv; 48 AVBPrint cur_line = { 0 }; 49 AVBPrint *buffer = &s->buffer; 50 51 av_bprint_init(&cur_line, len, AV_BPRINT_SIZE_UNLIMITED); 52 53 av_bprint_append_data(&cur_line, text, len); 54 if (!av_bprint_is_complete(&cur_line)) { 55 av_log(s->avctx, AV_LOG_ERROR, 56 "Failed to move the current subtitle dialog to AVBPrint!\n"); 57 av_bprint_finalize(&cur_line, NULL); 58 return; 59 } 60 61 62 av_bprint_escape(buffer, cur_line.str, NULL, AV_ESCAPE_MODE_XML, 63 0); 64 65 av_bprint_finalize(&cur_line, NULL); 66} 67 68static void ttml_new_line_cb(void *priv, int forced) 69{ 70 TTMLContext *s = priv; 71 72 av_bprintf(&s->buffer, "<br/>"); 73} 74 75static const ASSCodesCallbacks ttml_callbacks = { 76 .text = ttml_text_cb, 77 .new_line = ttml_new_line_cb, 78}; 79 80static int ttml_encode_frame(AVCodecContext *avctx, uint8_t *buf, 81 int bufsize, const AVSubtitle *sub) 82{ 83 TTMLContext *s = avctx->priv_data; 84 ASSDialog *dialog; 85 int i; 86 87 av_bprint_clear(&s->buffer); 88 89 for (i=0; i<sub->num_rects; i++) { 90 const char *ass = sub->rects[i]->ass; 91 int ret; 92 93 if (sub->rects[i]->type != SUBTITLE_ASS) { 94 av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n"); 95 return AVERROR(EINVAL); 96 } 97 98 dialog = ff_ass_split_dialog(s->ass_ctx, ass); 99 if (!dialog) 100 return AVERROR(ENOMEM); 101 102 if (dialog->style) { 103 av_bprintf(&s->buffer, "<span region=\""); 104 av_bprint_escape(&s->buffer, dialog->style, NULL, 105 AV_ESCAPE_MODE_XML, 106 AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES); 107 av_bprintf(&s->buffer, "\">"); 108 } 109 110 ret = ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text); 111 if (ret < 0) { 112 int log_level = (ret != AVERROR_INVALIDDATA || 113 avctx->err_recognition & AV_EF_EXPLODE) ? 114 AV_LOG_ERROR : AV_LOG_WARNING; 115 av_log(avctx, log_level, 116 "Splitting received ASS dialog text %s failed: %s\n", 117 dialog->text, 118 av_err2str(ret)); 119 120 if (log_level == AV_LOG_ERROR) { 121 ff_ass_free_dialog(&dialog); 122 return ret; 123 } 124 } 125 126 if (dialog->style) 127 av_bprintf(&s->buffer, "</span>"); 128 129 ff_ass_free_dialog(&dialog); 130 } 131 132 if (!av_bprint_is_complete(&s->buffer)) 133 return AVERROR(ENOMEM); 134 if (!s->buffer.len) 135 return 0; 136 137 // force null-termination, so in case our destination buffer is 138 // too small, the return value is larger than bufsize minus null. 139 if (av_strlcpy(buf, s->buffer.str, bufsize) > bufsize - 1) { 140 av_log(avctx, AV_LOG_ERROR, "Buffer too small for TTML event.\n"); 141 return AVERROR_BUFFER_TOO_SMALL; 142 } 143 144 return s->buffer.len; 145} 146 147static av_cold int ttml_encode_close(AVCodecContext *avctx) 148{ 149 TTMLContext *s = avctx->priv_data; 150 151 ff_ass_split_free(s->ass_ctx); 152 153 av_bprint_finalize(&s->buffer, NULL); 154 155 return 0; 156} 157 158static const char *ttml_get_display_alignment(int alignment) 159{ 160 switch (alignment) { 161 case 1: 162 case 2: 163 case 3: 164 return "after"; 165 case 4: 166 case 5: 167 case 6: 168 return "center"; 169 case 7: 170 case 8: 171 case 9: 172 return "before"; 173 default: 174 return NULL; 175 } 176} 177 178static const char *ttml_get_text_alignment(int alignment) 179{ 180 switch (alignment) { 181 case 1: 182 case 4: 183 case 7: 184 return "left"; 185 case 2: 186 case 5: 187 case 8: 188 return "center"; 189 case 3: 190 case 6: 191 case 9: 192 return "right"; 193 default: 194 return NULL; 195 } 196} 197 198static void ttml_get_origin(ASSScriptInfo script_info, ASSStyle style, 199 int *origin_left, int *origin_top) 200{ 201 *origin_left = av_rescale(style.margin_l, 100, script_info.play_res_x); 202 *origin_top = 203 av_rescale((style.alignment >= 7) ? style.margin_v : 0, 204 100, script_info.play_res_y); 205} 206 207static void ttml_get_extent(ASSScriptInfo script_info, ASSStyle style, 208 int *width, int *height) 209{ 210 *width = av_rescale(script_info.play_res_x - style.margin_r, 211 100, script_info.play_res_x); 212 *height = av_rescale((style.alignment <= 3) ? 213 script_info.play_res_y - style.margin_v : 214 script_info.play_res_y, 215 100, script_info.play_res_y); 216} 217 218static int ttml_write_region(AVCodecContext *avctx, AVBPrint *buf, 219 ASSScriptInfo script_info, ASSStyle style) 220{ 221 const char *display_alignment = NULL; 222 const char *text_alignment = NULL; 223 int origin_left = 0; 224 int origin_top = 0; 225 int width = 0; 226 int height = 0; 227 228 if (!style.name) { 229 av_log(avctx, AV_LOG_ERROR, "Subtitle style name not set!\n"); 230 return AVERROR_INVALIDDATA; 231 } 232 233 if (style.font_size < 0) { 234 av_log(avctx, AV_LOG_ERROR, "Invalid font size for TTML: %d!\n", 235 style.font_size); 236 return AVERROR_INVALIDDATA; 237 } 238 239 if (style.margin_l < 0 || style.margin_r < 0 || style.margin_v < 0) { 240 av_log(avctx, AV_LOG_ERROR, 241 "One or more negative margin values in subtitle style: " 242 "left: %d, right: %d, vertical: %d!\n", 243 style.margin_l, style.margin_r, style.margin_v); 244 return AVERROR_INVALIDDATA; 245 } 246 247 display_alignment = ttml_get_display_alignment(style.alignment); 248 text_alignment = ttml_get_text_alignment(style.alignment); 249 if (!display_alignment || !text_alignment) { 250 av_log(avctx, AV_LOG_ERROR, 251 "Failed to convert ASS style alignment %d of style %s to " 252 "TTML display and text alignment!\n", 253 style.alignment, 254 style.name); 255 return AVERROR_INVALIDDATA; 256 } 257 258 ttml_get_origin(script_info, style, &origin_left, &origin_top); 259 ttml_get_extent(script_info, style, &width, &height); 260 261 av_bprintf(buf, " <region xml:id=\""); 262 av_bprint_escape(buf, style.name, NULL, AV_ESCAPE_MODE_XML, 263 AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES); 264 av_bprintf(buf, "\"\n"); 265 266 av_bprintf(buf, " tts:origin=\"%d%% %d%%\"\n", 267 origin_left, origin_top); 268 av_bprintf(buf, " tts:extent=\"%d%% %d%%\"\n", 269 width, height); 270 271 av_bprintf(buf, " tts:displayAlign=\""); 272 av_bprint_escape(buf, display_alignment, NULL, AV_ESCAPE_MODE_XML, 273 AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES); 274 av_bprintf(buf, "\"\n"); 275 276 av_bprintf(buf, " tts:textAlign=\""); 277 av_bprint_escape(buf, text_alignment, NULL, AV_ESCAPE_MODE_XML, 278 AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES); 279 av_bprintf(buf, "\"\n"); 280 281 // if we set cell resolution to our script reference resolution, 282 // then a single line is a single "point" on our canvas. Thus, by setting 283 // our font size to font size in cells, we should gain a similar enough 284 // scale without resorting to explicit pixel based font sizing, which is 285 // frowned upon in the TTML community. 286 av_bprintf(buf, " tts:fontSize=\"%dc\"\n", 287 style.font_size); 288 289 if (style.font_name) { 290 av_bprintf(buf, " tts:fontFamily=\""); 291 av_bprint_escape(buf, style.font_name, NULL, AV_ESCAPE_MODE_XML, 292 AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES); 293 av_bprintf(buf, "\"\n"); 294 } 295 296 av_bprintf(buf, " tts:overflow=\"visible\" />\n"); 297 298 return 0; 299} 300 301static int ttml_write_header_content(AVCodecContext *avctx) 302{ 303 TTMLContext *s = avctx->priv_data; 304 ASS *ass = (ASS *)s->ass_ctx; 305 ASSScriptInfo script_info = ass->script_info; 306 const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE + 1 + 307 AV_INPUT_BUFFER_PADDING_SIZE; 308 size_t additional_extradata_size = 0; 309 310 if (script_info.play_res_x <= 0 || script_info.play_res_y <= 0) { 311 av_log(avctx, AV_LOG_ERROR, 312 "Invalid subtitle reference resolution %dx%d!\n", 313 script_info.play_res_x, script_info.play_res_y); 314 return AVERROR_INVALIDDATA; 315 } 316 317 // write the first string in extradata, attributes in the base "tt" element. 318 av_bprintf(&s->buffer, ttml_default_namespacing); 319 // the cell resolution is in character cells, so not exactly 1:1 against 320 // a pixel based resolution, but as the tts:extent in the root 321 // "tt" element is frowned upon (and disallowed in the EBU-TT profile), 322 // we mimic the reference resolution by setting it as the cell resolution. 323 av_bprintf(&s->buffer, " ttp:cellResolution=\"%d %d\"\n", 324 script_info.play_res_x, script_info.play_res_y); 325 av_bprint_chars(&s->buffer, '\0', 1); 326 327 // write the second string in extradata, head element containing the styles 328 av_bprintf(&s->buffer, " <head>\n"); 329 av_bprintf(&s->buffer, " <layout>\n"); 330 331 for (int i = 0; i < ass->styles_count; i++) { 332 int ret = ttml_write_region(avctx, &s->buffer, script_info, 333 ass->styles[i]); 334 if (ret < 0) 335 return ret; 336 } 337 338 av_bprintf(&s->buffer, " </layout>\n"); 339 av_bprintf(&s->buffer, " </head>\n"); 340 av_bprint_chars(&s->buffer, '\0', 1); 341 342 if (!av_bprint_is_complete(&s->buffer)) { 343 return AVERROR(ENOMEM); 344 } 345 346 additional_extradata_size = s->buffer.len; 347 348 if (!(avctx->extradata = 349 av_mallocz(base_extradata_size + additional_extradata_size))) { 350 return AVERROR(ENOMEM); 351 } 352 353 avctx->extradata_size = 354 TTMLENC_EXTRADATA_SIGNATURE_SIZE + additional_extradata_size; 355 memcpy(avctx->extradata, TTMLENC_EXTRADATA_SIGNATURE, 356 TTMLENC_EXTRADATA_SIGNATURE_SIZE); 357 358 if (additional_extradata_size) 359 memcpy(avctx->extradata + TTMLENC_EXTRADATA_SIGNATURE_SIZE, 360 s->buffer.str, additional_extradata_size); 361 362 av_bprint_clear(&s->buffer); 363 364 return 0; 365} 366 367static av_cold int ttml_encode_init(AVCodecContext *avctx) 368{ 369 TTMLContext *s = avctx->priv_data; 370 int ret = AVERROR_BUG; 371 s->avctx = avctx; 372 373 av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED); 374 375 if (!(s->ass_ctx = ff_ass_split(avctx->subtitle_header))) { 376 return AVERROR_INVALIDDATA; 377 } 378 379 if ((ret = ttml_write_header_content(avctx)) < 0) { 380 return ret; 381 } 382 383 return 0; 384} 385 386const FFCodec ff_ttml_encoder = { 387 .p.name = "ttml", 388 .p.long_name = NULL_IF_CONFIG_SMALL("TTML subtitle"), 389 .p.type = AVMEDIA_TYPE_SUBTITLE, 390 .p.id = AV_CODEC_ID_TTML, 391 .priv_data_size = sizeof(TTMLContext), 392 .init = ttml_encode_init, 393 FF_CODEC_ENCODE_SUB_CB(ttml_encode_frame), 394 .close = ttml_encode_close, 395 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 396}; 397