1/* 2 * Copyright (C) 2005 Michael Ahlberg, Måns Rullgård 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25#include <stdlib.h> 26 27#include "libavutil/avstring.h" 28#include "libavutil/base64.h" 29#include "libavutil/dict.h" 30 31#include "libavcodec/bytestream.h" 32#include "libavcodec/vorbis_parser.h" 33 34#include "avformat.h" 35#include "demux.h" 36#include "flac_picture.h" 37#include "internal.h" 38#include "oggdec.h" 39#include "vorbiscomment.h" 40#include "replaygain.h" 41 42static int ogm_chapter(AVFormatContext *as, const uint8_t *key, const uint8_t *val) 43{ 44 int i, cnum, h, m, s, ms, keylen = strlen(key); 45 AVChapter *chapter = NULL; 46 47 if (keylen < 9 || av_strncasecmp(key, "CHAPTER", 7) || sscanf(key+7, "%03d", &cnum) != 1) 48 return 0; 49 50 if (keylen <= 10) { 51 if (sscanf(val, "%02d:%02d:%02d.%03d", &h, &m, &s, &ms) < 4) 52 return 0; 53 54 avpriv_new_chapter(as, cnum, (AVRational) { 1, 1000 }, 55 ms + 1000 * (s + 60 * (m + 60 * h)), 56 AV_NOPTS_VALUE, NULL); 57 } else if (!av_strcasecmp(key + keylen - 4, "NAME")) { 58 for (i = 0; i < as->nb_chapters; i++) 59 if (as->chapters[i]->id == cnum) { 60 chapter = as->chapters[i]; 61 break; 62 } 63 if (!chapter) 64 return 0; 65 66 av_dict_set(&chapter->metadata, "title", val, 0); 67 } else 68 return 0; 69 70 return 1; 71} 72 73int ff_vorbis_stream_comment(AVFormatContext *as, AVStream *st, 74 const uint8_t *buf, int size) 75{ 76 int updates = ff_vorbis_comment(as, &st->metadata, buf, size, 1); 77 78 if (updates > 0) { 79 st->event_flags |= AVSTREAM_EVENT_FLAG_METADATA_UPDATED; 80 } 81 82 return updates; 83} 84 85/** 86 * This function temporarily modifies the (const qualified) input buffer 87 * and reverts its changes before return. The input buffer needs to have 88 * at least one byte of padding. 89 */ 90static int vorbis_parse_single_comment(AVFormatContext *as, AVDictionary **m, 91 const uint8_t *buf, uint32_t size, 92 int *updates, int parse_picture) 93{ 94 char *t = (char*)buf, *v = memchr(t, '=', size); 95 int tl, vl; 96 char backup; 97 98 if (!v) 99 return 0; 100 101 tl = v - t; 102 vl = size - tl - 1; 103 v++; 104 105 if (!tl || !vl) 106 return 0; 107 108 t[tl] = 0; 109 110 backup = v[vl]; 111 v[vl] = 0; 112 113 /* The format in which the pictures are stored is the FLAC format. 114 * Xiph says: "The binary FLAC picture structure is base64 encoded 115 * and placed within a VorbisComment with the tag name 116 * 'METADATA_BLOCK_PICTURE'. This is the preferred and 117 * recommended way of embedding cover art within VorbisComments." 118 */ 119 if (!av_strcasecmp(t, "METADATA_BLOCK_PICTURE") && parse_picture) { 120 int ret, len = AV_BASE64_DECODE_SIZE(vl); 121 uint8_t *pict = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE); 122 123 if (!pict) { 124 av_log(as, AV_LOG_WARNING, "out-of-memory error. Skipping cover art block.\n"); 125 goto end; 126 } 127 ret = av_base64_decode(pict, v, len); 128 if (ret > 0) 129 ret = ff_flac_parse_picture(as, &pict, ret, 0); 130 av_freep(&pict); 131 if (ret < 0) { 132 av_log(as, AV_LOG_WARNING, "Failed to parse cover art block.\n"); 133 goto end; 134 } 135 } else if (!ogm_chapter(as, t, v)) { 136 (*updates)++; 137 if (av_dict_get(*m, t, NULL, 0)) 138 av_dict_set(m, t, ";", AV_DICT_APPEND); 139 av_dict_set(m, t, v, AV_DICT_APPEND); 140 } 141end: 142 t[tl] = '='; 143 v[vl] = backup; 144 145 return 0; 146} 147 148int ff_vorbis_comment(AVFormatContext *as, AVDictionary **m, 149 const uint8_t *buf, int size, 150 int parse_picture) 151{ 152 const uint8_t *p = buf; 153 const uint8_t *end = buf + size; 154 int updates = 0; 155 unsigned n; 156 int s, ret; 157 158 /* must have vendor_length and user_comment_list_length */ 159 if (size < 8) 160 return AVERROR_INVALIDDATA; 161 162 s = bytestream_get_le32(&p); 163 164 if (end - p - 4 < s || s < 0) 165 return AVERROR_INVALIDDATA; 166 167 p += s; 168 169 n = bytestream_get_le32(&p); 170 171 while (end - p >= 4 && n > 0) { 172 s = bytestream_get_le32(&p); 173 174 if (end - p < s || s < 0) 175 break; 176 177 ret = vorbis_parse_single_comment(as, m, p, s, &updates, parse_picture); 178 if (ret < 0) 179 return ret; 180 p += s; 181 n--; 182 } 183 184 if (p != end) 185 av_log(as, AV_LOG_INFO, 186 "%"PTRDIFF_SPECIFIER" bytes of comment header remain\n", end - p); 187 if (n > 0) 188 av_log(as, AV_LOG_INFO, 189 "truncated comment header, %i comments not found\n", n); 190 191 ff_metadata_conv(m, NULL, ff_vorbiscomment_metadata_conv); 192 193 return updates; 194} 195 196/* 197 * Parse the vorbis header 198 * 199 * Vorbis Identification header from Vorbis_I_spec.html#vorbis-spec-codec 200 * [vorbis_version] = read 32 bits as unsigned integer | Not used 201 * [audio_channels] = read 8 bit integer as unsigned | Used 202 * [audio_sample_rate] = read 32 bits as unsigned integer | Used 203 * [bitrate_maximum] = read 32 bits as signed integer | Not used yet 204 * [bitrate_nominal] = read 32 bits as signed integer | Not used yet 205 * [bitrate_minimum] = read 32 bits as signed integer | Used as bitrate 206 * [blocksize_0] = read 4 bits as unsigned integer | Not Used 207 * [blocksize_1] = read 4 bits as unsigned integer | Not Used 208 * [framing_flag] = read one bit | Not Used 209 */ 210 211struct oggvorbis_private { 212 unsigned int len[3]; 213 unsigned char *packet[3]; 214 AVVorbisParseContext *vp; 215 int64_t final_pts; 216 int final_duration; 217}; 218 219static int fixup_vorbis_headers(AVFormatContext *as, 220 struct oggvorbis_private *priv, 221 uint8_t **buf) 222{ 223 int i, offset, len, err; 224 int buf_len; 225 unsigned char *ptr; 226 227 len = priv->len[0] + priv->len[1] + priv->len[2]; 228 buf_len = len + len / 255 + 64; 229 230 if (*buf) 231 return AVERROR_INVALIDDATA; 232 233 ptr = *buf = av_realloc(NULL, buf_len); 234 if (!ptr) 235 return AVERROR(ENOMEM); 236 memset(*buf, '\0', buf_len); 237 238 ptr[0] = 2; 239 offset = 1; 240 offset += av_xiphlacing(&ptr[offset], priv->len[0]); 241 offset += av_xiphlacing(&ptr[offset], priv->len[1]); 242 for (i = 0; i < 3; i++) { 243 memcpy(&ptr[offset], priv->packet[i], priv->len[i]); 244 offset += priv->len[i]; 245 av_freep(&priv->packet[i]); 246 } 247 if ((err = av_reallocp(buf, offset + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) 248 return err; 249 return offset; 250} 251 252static void vorbis_cleanup(AVFormatContext *s, int idx) 253{ 254 struct ogg *ogg = s->priv_data; 255 struct ogg_stream *os = ogg->streams + idx; 256 struct oggvorbis_private *priv = os->private; 257 int i; 258 if (os->private) { 259 av_vorbis_parse_free(&priv->vp); 260 for (i = 0; i < 3; i++) 261 av_freep(&priv->packet[i]); 262 } 263} 264 265static int vorbis_update_metadata(AVFormatContext *s, int idx) 266{ 267 struct ogg *ogg = s->priv_data; 268 struct ogg_stream *os = ogg->streams + idx; 269 AVStream *st = s->streams[idx]; 270 int ret; 271 272 if (os->psize <= 8) 273 return 0; 274 275 /* New metadata packet; release old data. */ 276 av_dict_free(&st->metadata); 277 ret = ff_vorbis_stream_comment(s, st, os->buf + os->pstart + 7, 278 os->psize - 8); 279 if (ret < 0) 280 return ret; 281 282 /* Update the metadata if possible. */ 283 av_freep(&os->new_metadata); 284 if (st->metadata) { 285 os->new_metadata = av_packet_pack_dictionary(st->metadata, &os->new_metadata_size); 286 /* Send an empty dictionary to indicate that metadata has been cleared. */ 287 } else { 288 os->new_metadata = av_mallocz(1); 289 os->new_metadata_size = 0; 290 } 291 292 return ret; 293} 294 295static int vorbis_header(AVFormatContext *s, int idx) 296{ 297 struct ogg *ogg = s->priv_data; 298 AVStream *st = s->streams[idx]; 299 struct ogg_stream *os = ogg->streams + idx; 300 struct oggvorbis_private *priv; 301 int pkt_type = os->buf[os->pstart]; 302 303 if (!os->private) { 304 os->private = av_mallocz(sizeof(struct oggvorbis_private)); 305 if (!os->private) 306 return AVERROR(ENOMEM); 307 } 308 309 priv = os->private; 310 311 if (!(pkt_type & 1)) 312 return priv->vp ? 0 : AVERROR_INVALIDDATA; 313 314 if (os->psize < 1 || pkt_type > 5) 315 return AVERROR_INVALIDDATA; 316 317 if (priv->packet[pkt_type >> 1]) 318 return AVERROR_INVALIDDATA; 319 if (pkt_type > 1 && !priv->packet[0] || pkt_type > 3 && !priv->packet[1]) 320 return priv->vp ? 0 : AVERROR_INVALIDDATA; 321 322 priv->len[pkt_type >> 1] = os->psize; 323 priv->packet[pkt_type >> 1] = av_memdup(os->buf + os->pstart, os->psize); 324 if (!priv->packet[pkt_type >> 1]) 325 return AVERROR(ENOMEM); 326 if (os->buf[os->pstart] == 1) { 327 const uint8_t *p = os->buf + os->pstart + 7; /* skip "\001vorbis" tag */ 328 unsigned blocksize, bs0, bs1; 329 int srate; 330 int channels; 331 332 if (os->psize != 30) 333 return AVERROR_INVALIDDATA; 334 335 if (bytestream_get_le32(&p) != 0) /* vorbis_version */ 336 return AVERROR_INVALIDDATA; 337 338 channels = bytestream_get_byte(&p); 339 if (st->codecpar->ch_layout.nb_channels && 340 channels != st->codecpar->ch_layout.nb_channels) { 341 av_log(s, AV_LOG_ERROR, "Channel change is not supported\n"); 342 return AVERROR_PATCHWELCOME; 343 } 344 st->codecpar->ch_layout.nb_channels = channels; 345 srate = bytestream_get_le32(&p); 346 p += 4; // skip maximum bitrate 347 st->codecpar->bit_rate = bytestream_get_le32(&p); // nominal bitrate 348 p += 4; // skip minimum bitrate 349 350 blocksize = bytestream_get_byte(&p); 351 bs0 = blocksize & 15; 352 bs1 = blocksize >> 4; 353 354 if (bs0 > bs1) 355 return AVERROR_INVALIDDATA; 356 if (bs0 < 6 || bs1 > 13) 357 return AVERROR_INVALIDDATA; 358 359 if (bytestream_get_byte(&p) != 1) /* framing_flag */ 360 return AVERROR_INVALIDDATA; 361 362 st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; 363 st->codecpar->codec_id = AV_CODEC_ID_VORBIS; 364 365 if (srate > 0) { 366 st->codecpar->sample_rate = srate; 367 avpriv_set_pts_info(st, 64, 1, srate); 368 } 369 } else if (os->buf[os->pstart] == 3) { 370 if (vorbis_update_metadata(s, idx) >= 0 && priv->len[1] > 10) { 371 unsigned new_len; 372 373 int ret = ff_replaygain_export(st, st->metadata); 374 if (ret < 0) 375 return ret; 376 377 // drop all metadata we parsed and which is not required by libvorbis 378 new_len = 7 + 4 + AV_RL32(priv->packet[1] + 7) + 4 + 1; 379 if (new_len >= 16 && new_len < os->psize) { 380 AV_WL32(priv->packet[1] + new_len - 5, 0); 381 priv->packet[1][new_len - 1] = 1; 382 priv->len[1] = new_len; 383 } 384 } 385 } else { 386 int ret; 387 388 if (priv->vp) 389 return AVERROR_INVALIDDATA; 390 391 ret = fixup_vorbis_headers(s, priv, &st->codecpar->extradata); 392 if (ret < 0) { 393 st->codecpar->extradata_size = 0; 394 return ret; 395 } 396 st->codecpar->extradata_size = ret; 397 398 priv->vp = av_vorbis_parse_init(st->codecpar->extradata, st->codecpar->extradata_size); 399 if (!priv->vp) { 400 av_freep(&st->codecpar->extradata); 401 st->codecpar->extradata_size = 0; 402 return AVERROR_UNKNOWN; 403 } 404 } 405 406 return 1; 407} 408 409static int vorbis_packet(AVFormatContext *s, int idx) 410{ 411 struct ogg *ogg = s->priv_data; 412 struct ogg_stream *os = ogg->streams + idx; 413 struct oggvorbis_private *priv = os->private; 414 int duration, flags = 0; 415 416 if (!priv->vp) 417 return AVERROR_INVALIDDATA; 418 419 /* first packet handling 420 * here we parse the duration of each packet in the first page and compare 421 * the total duration to the page granule to find the encoder delay and 422 * set the first timestamp */ 423 if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS) && (int64_t)os->granule>=0) { 424 int seg, d; 425 uint8_t *last_pkt = os->buf + os->pstart; 426 uint8_t *next_pkt = last_pkt; 427 428 av_vorbis_parse_reset(priv->vp); 429 duration = 0; 430 seg = os->segp; 431 d = av_vorbis_parse_frame_flags(priv->vp, last_pkt, 1, &flags); 432 if (d < 0) { 433 os->pflags |= AV_PKT_FLAG_CORRUPT; 434 return 0; 435 } else if (flags & VORBIS_FLAG_COMMENT) { 436 vorbis_update_metadata(s, idx); 437 flags = 0; 438 } 439 duration += d; 440 last_pkt = next_pkt = next_pkt + os->psize; 441 for (; seg < os->nsegs; seg++) { 442 if (os->segments[seg] < 255) { 443 int d = av_vorbis_parse_frame_flags(priv->vp, last_pkt, 1, &flags); 444 if (d < 0) { 445 duration = os->granule; 446 break; 447 } else if (flags & VORBIS_FLAG_COMMENT) { 448 vorbis_update_metadata(s, idx); 449 flags = 0; 450 } 451 duration += d; 452 last_pkt = next_pkt + os->segments[seg]; 453 } 454 next_pkt += os->segments[seg]; 455 } 456 os->lastpts = 457 os->lastdts = os->granule - duration; 458 459 if (!os->granule && duration) //hack to deal with broken files (Ticket3710) 460 os->lastpts = os->lastdts = AV_NOPTS_VALUE; 461 462 if (s->streams[idx]->start_time == AV_NOPTS_VALUE) { 463 s->streams[idx]->start_time = FFMAX(os->lastpts, 0); 464 if (s->streams[idx]->duration != AV_NOPTS_VALUE) 465 s->streams[idx]->duration -= s->streams[idx]->start_time; 466 } 467 priv->final_pts = AV_NOPTS_VALUE; 468 av_vorbis_parse_reset(priv->vp); 469 } 470 471 /* parse packet duration */ 472 if (os->psize > 0) { 473 duration = av_vorbis_parse_frame_flags(priv->vp, os->buf + os->pstart, 1, &flags); 474 if (duration < 0) { 475 os->pflags |= AV_PKT_FLAG_CORRUPT; 476 return 0; 477 } else if (flags & VORBIS_FLAG_COMMENT) { 478 vorbis_update_metadata(s, idx); 479 flags = 0; 480 } 481 os->pduration = duration; 482 } 483 484 /* final packet handling 485 * here we save the pts of the first packet in the final page, sum up all 486 * packet durations in the final page except for the last one, and compare 487 * to the page granule to find the duration of the final packet */ 488 if (os->flags & OGG_FLAG_EOS) { 489 if (os->lastpts != AV_NOPTS_VALUE) { 490 priv->final_pts = os->lastpts; 491 priv->final_duration = 0; 492 } 493 if (os->segp == os->nsegs) { 494 int64_t skip = priv->final_pts + priv->final_duration + os->pduration - os->granule; 495 if (skip > 0) 496 os->end_trimming = skip; 497 os->pduration = os->granule - priv->final_pts - priv->final_duration; 498 } 499 priv->final_duration += os->pduration; 500 } 501 502 return 0; 503} 504 505const struct ogg_codec ff_vorbis_codec = { 506 .magic = "\001vorbis", 507 .magicsize = 7, 508 .header = vorbis_header, 509 .packet = vorbis_packet, 510 .cleanup = vorbis_cleanup, 511 .nb_header = 3, 512}; 513