1/* 2 * Microsoft XMV demuxer 3 * Copyright (c) 2011 Sven Hesse <drmccoy@drmccoy.de> 4 * Copyright (c) 2011 Matthew Hoops <clone2727@gmail.com> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * Microsoft XMV demuxer 26 */ 27 28#include <inttypes.h> 29 30#include "libavutil/intreadwrite.h" 31 32#include "avformat.h" 33#include "internal.h" 34#include "riff.h" 35#include "libavutil/avassert.h" 36 37/** The min size of an XMV header. */ 38#define XMV_MIN_HEADER_SIZE 36 39 40/** Audio flag: ADPCM'd 5.1 stream, front left / right channels */ 41#define XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT 1 42/** Audio flag: ADPCM'd 5.1 stream, front center / low frequency channels */ 43#define XMV_AUDIO_ADPCM51_FRONTCENTERLOW 2 44/** Audio flag: ADPCM'd 5.1 stream, rear left / right channels */ 45#define XMV_AUDIO_ADPCM51_REARLEFTRIGHT 4 46 47/** Audio flag: Any of the ADPCM'd 5.1 stream flags. */ 48#define XMV_AUDIO_ADPCM51 (XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT | \ 49 XMV_AUDIO_ADPCM51_FRONTCENTERLOW | \ 50 XMV_AUDIO_ADPCM51_REARLEFTRIGHT) 51 52#define XMV_BLOCK_ALIGN_SIZE 36 53 54/** A video packet with an XMV file. */ 55typedef struct XMVVideoPacket { 56 int created; 57 int stream_index; ///< The decoder stream index for this video packet. 58 59 uint32_t data_size; ///< The size of the remaining video data. 60 uint64_t data_offset; ///< The offset of the video data within the file. 61 62 uint32_t current_frame; ///< The current frame within this video packet. 63 uint32_t frame_count; ///< The amount of frames within this video packet. 64 65 int has_extradata; ///< Does the video packet contain extra data? 66 uint8_t extradata[4]; ///< The extra data 67 68 int64_t last_pts; ///< PTS of the last video frame. 69 int64_t pts; ///< PTS of the most current video frame. 70} XMVVideoPacket; 71 72/** An audio packet with an XMV file. */ 73typedef struct XMVAudioPacket { 74 int created; 75 int stream_index; ///< The decoder stream index for this audio packet. 76 77 /* Stream format properties. */ 78 uint16_t compression; ///< The type of compression. 79 uint16_t channels; ///< Number of channels. 80 int32_t sample_rate; ///< Sampling rate. 81 uint16_t bits_per_sample; ///< Bits per compressed sample. 82 uint64_t bit_rate; ///< Bits of compressed data per second. 83 uint16_t flags; ///< Flags 84 unsigned block_align; ///< Bytes per compressed block. 85 uint16_t block_samples; ///< Decompressed samples per compressed block. 86 87 enum AVCodecID codec_id; ///< The codec ID of the compression scheme. 88 89 uint32_t data_size; ///< The size of the remaining audio data. 90 uint64_t data_offset; ///< The offset of the audio data within the file. 91 92 uint32_t frame_size; ///< Number of bytes to put into an audio frame. 93 94 uint64_t block_count; ///< Running counter of decompressed audio block. 95} XMVAudioPacket; 96 97/** Context for demuxing an XMV file. */ 98typedef struct XMVDemuxContext { 99 uint16_t audio_track_count; ///< Number of audio track in this file. 100 101 uint32_t this_packet_size; ///< Size of the current packet. 102 uint32_t next_packet_size; ///< Size of the next packet. 103 104 uint64_t this_packet_offset; ///< Offset of the current packet. 105 uint64_t next_packet_offset; ///< Offset of the next packet. 106 107 uint16_t current_stream; ///< The index of the stream currently handling. 108 uint16_t stream_count; ///< The number of streams in this file. 109 110 uint32_t video_duration; 111 uint32_t video_width; 112 uint32_t video_height; 113 114 XMVVideoPacket video; ///< The video packet contained in each packet. 115 XMVAudioPacket *audio; ///< The audio packets contained in each packet. 116} XMVDemuxContext; 117 118static int xmv_probe(const AVProbeData *p) 119{ 120 uint32_t file_version; 121 122 if (p->buf_size < XMV_MIN_HEADER_SIZE) 123 return 0; 124 125 file_version = AV_RL32(p->buf + 16); 126 if ((file_version == 0) || (file_version > 4)) 127 return 0; 128 129 if (!memcmp(p->buf + 12, "xobX", 4)) 130 return AVPROBE_SCORE_MAX; 131 132 return 0; 133} 134 135static int xmv_read_close(AVFormatContext *s) 136{ 137 XMVDemuxContext *xmv = s->priv_data; 138 139 av_freep(&xmv->audio); 140 141 return 0; 142} 143 144static int xmv_read_header(AVFormatContext *s) 145{ 146 XMVDemuxContext *xmv = s->priv_data; 147 AVIOContext *pb = s->pb; 148 149 uint32_t file_version; 150 uint32_t this_packet_size; 151 uint16_t audio_track; 152 153 s->ctx_flags |= AVFMTCTX_NOHEADER; 154 155 avio_skip(pb, 4); /* Next packet size */ 156 157 this_packet_size = avio_rl32(pb); 158 159 avio_skip(pb, 4); /* Max packet size */ 160 avio_skip(pb, 4); /* "xobX" */ 161 162 file_version = avio_rl32(pb); 163 if ((file_version != 4) && (file_version != 2)) 164 avpriv_request_sample(s, "Uncommon version %"PRIu32"", file_version); 165 166 /* Video tracks */ 167 168 xmv->video_width = avio_rl32(pb); 169 xmv->video_height = avio_rl32(pb); 170 xmv->video_duration = avio_rl32(pb); 171 172 /* Audio tracks */ 173 174 xmv->audio_track_count = avio_rl16(pb); 175 176 avio_skip(pb, 2); /* Unknown (padding?) */ 177 178 xmv->audio = av_calloc(xmv->audio_track_count, sizeof(*xmv->audio)); 179 if (!xmv->audio) 180 return AVERROR(ENOMEM); 181 182 for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { 183 XMVAudioPacket *packet = &xmv->audio[audio_track]; 184 185 packet->compression = avio_rl16(pb); 186 packet->channels = avio_rl16(pb); 187 packet->sample_rate = avio_rl32(pb); 188 packet->bits_per_sample = avio_rl16(pb); 189 packet->flags = avio_rl16(pb); 190 191 packet->bit_rate = (uint64_t)packet->bits_per_sample * 192 packet->sample_rate * 193 packet->channels; 194 packet->block_align = XMV_BLOCK_ALIGN_SIZE * packet->channels; 195 packet->block_samples = 64; 196 packet->codec_id = ff_wav_codec_get_id(packet->compression, 197 packet->bits_per_sample); 198 199 packet->stream_index = -1; 200 201 packet->frame_size = 0; 202 packet->block_count = 0; 203 204 /* TODO: ADPCM'd 5.1 sound is encoded in three separate streams. 205 * Those need to be interleaved to a proper 5.1 stream. */ 206 if (packet->flags & XMV_AUDIO_ADPCM51) 207 av_log(s, AV_LOG_WARNING, "Unsupported 5.1 ADPCM audio stream " 208 "(0x%04X)\n", packet->flags); 209 210 if (!packet->channels || packet->sample_rate <= 0 || 211 packet->channels >= UINT16_MAX / XMV_BLOCK_ALIGN_SIZE) { 212 av_log(s, AV_LOG_ERROR, "Invalid parameters for audio track %"PRIu16".\n", 213 audio_track); 214 return AVERROR_INVALIDDATA; 215 } 216 } 217 218 219 /* Initialize the packet context */ 220 221 xmv->next_packet_offset = avio_tell(pb); 222 if (this_packet_size < xmv->next_packet_offset) 223 return AVERROR_INVALIDDATA; 224 xmv->next_packet_size = this_packet_size - xmv->next_packet_offset; 225 xmv->stream_count = xmv->audio_track_count + 1; 226 227 return 0; 228} 229 230static void xmv_read_extradata(uint8_t *extradata, AVIOContext *pb) 231{ 232 /* Read the XMV extradata */ 233 234 uint32_t data = avio_rl32(pb); 235 236 int mspel_bit = !!(data & 0x01); 237 int loop_filter = !!(data & 0x02); 238 int abt_flag = !!(data & 0x04); 239 int j_type_bit = !!(data & 0x08); 240 int top_left_mv_flag = !!(data & 0x10); 241 int per_mb_rl_bit = !!(data & 0x20); 242 int slice_count = (data >> 6) & 7; 243 244 /* Write it back as standard WMV2 extradata */ 245 246 data = 0; 247 248 data |= mspel_bit << 15; 249 data |= loop_filter << 14; 250 data |= abt_flag << 13; 251 data |= j_type_bit << 12; 252 data |= top_left_mv_flag << 11; 253 data |= per_mb_rl_bit << 10; 254 data |= slice_count << 7; 255 256 AV_WB32(extradata, data); 257} 258 259static int xmv_process_packet_header(AVFormatContext *s) 260{ 261 XMVDemuxContext *xmv = s->priv_data; 262 AVIOContext *pb = s->pb; 263 int ret; 264 265 uint8_t data[8]; 266 uint16_t audio_track; 267 uint64_t data_offset; 268 269 /* Next packet size */ 270 xmv->next_packet_size = avio_rl32(pb); 271 272 /* Packet video header */ 273 274 if (avio_read(pb, data, 8) != 8) 275 return AVERROR(EIO); 276 277 xmv->video.data_size = AV_RL32(data) & 0x007FFFFF; 278 279 xmv->video.current_frame = 0; 280 xmv->video.frame_count = (AV_RL32(data) >> 23) & 0xFF; 281 282 xmv->video.has_extradata = (data[3] & 0x80) != 0; 283 284 if (!xmv->video.created) { 285 AVStream *vst = avformat_new_stream(s, NULL); 286 if (!vst) 287 return AVERROR(ENOMEM); 288 289 avpriv_set_pts_info(vst, 32, 1, 1000); 290 291 vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; 292 vst->codecpar->codec_id = AV_CODEC_ID_WMV2; 293 vst->codecpar->codec_tag = MKBETAG('W', 'M', 'V', '2'); 294 vst->codecpar->width = xmv->video_width; 295 vst->codecpar->height = xmv->video_height; 296 297 vst->duration = xmv->video_duration; 298 299 xmv->video.stream_index = vst->index; 300 301 xmv->video.created = 1; 302 } 303 304 /* Adding the audio data sizes and the video data size keeps you 4 bytes 305 * short for every audio track. But as playing around with XMV files with 306 * ADPCM audio showed, taking the extra 4 bytes from the audio data gives 307 * you either completely distorted audio or click (when skipping the 308 * remaining 68 bytes of the ADPCM block). Subtracting 4 bytes for every 309 * audio track from the video data works at least for the audio. Probably 310 * some alignment thing? 311 * The video data has (always?) lots of padding, so it should work out... 312 */ 313 xmv->video.data_size -= xmv->audio_track_count * 4; 314 315 xmv->current_stream = 0; 316 if (!xmv->video.frame_count) { 317 xmv->video.frame_count = 1; 318 xmv->current_stream = xmv->stream_count > 1; 319 } 320 321 /* Packet audio header */ 322 323 for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { 324 XMVAudioPacket *packet = &xmv->audio[audio_track]; 325 326 if (avio_read(pb, data, 4) != 4) 327 return AVERROR(EIO); 328 329 if (!packet->created) { 330 AVStream *ast = avformat_new_stream(s, NULL); 331 if (!ast) 332 return AVERROR(ENOMEM); 333 334 ast->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; 335 ast->codecpar->codec_id = packet->codec_id; 336 ast->codecpar->codec_tag = packet->compression; 337 ast->codecpar->ch_layout.nb_channels = packet->channels; 338 ast->codecpar->sample_rate = packet->sample_rate; 339 ast->codecpar->bits_per_coded_sample = packet->bits_per_sample; 340 ast->codecpar->bit_rate = packet->bit_rate; 341 ast->codecpar->block_align = 36 * packet->channels; 342 343 avpriv_set_pts_info(ast, 32, packet->block_samples, packet->sample_rate); 344 345 packet->stream_index = ast->index; 346 347 ast->duration = xmv->video_duration; 348 349 packet->created = 1; 350 } 351 352 packet->data_size = AV_RL32(data) & 0x007FFFFF; 353 if ((packet->data_size == 0) && (audio_track != 0)) 354 /* This happens when I create an XMV with several identical audio 355 * streams. From the size calculations, duplicating the previous 356 * stream's size works out, but the track data itself is silent. 357 * Maybe this should also redirect the offset to the previous track? 358 */ 359 packet->data_size = xmv->audio[audio_track - 1].data_size; 360 361 /* Carve up the audio data in frame_count slices */ 362 packet->frame_size = packet->data_size / xmv->video.frame_count; 363 packet->frame_size -= packet->frame_size % packet->block_align; 364 } 365 366 /* Packet data offsets */ 367 368 data_offset = avio_tell(pb); 369 370 xmv->video.data_offset = data_offset; 371 data_offset += xmv->video.data_size; 372 373 for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { 374 xmv->audio[audio_track].data_offset = data_offset; 375 data_offset += xmv->audio[audio_track].data_size; 376 } 377 378 /* Video frames header */ 379 380 /* Read new video extra data */ 381 if (xmv->video.data_size > 0) { 382 if (xmv->video.has_extradata) { 383 xmv_read_extradata(xmv->video.extradata, pb); 384 385 xmv->video.data_size -= 4; 386 xmv->video.data_offset += 4; 387 388 if (xmv->video.stream_index >= 0) { 389 AVStream *vst = s->streams[xmv->video.stream_index]; 390 391 av_assert0(xmv->video.stream_index < s->nb_streams); 392 393 if (vst->codecpar->extradata_size < 4) { 394 if ((ret = ff_alloc_extradata(vst->codecpar, 4)) < 0) 395 return ret; 396 } 397 398 memcpy(vst->codecpar->extradata, xmv->video.extradata, 4); 399 } 400 } 401 } 402 403 return 0; 404} 405 406static int xmv_fetch_new_packet(AVFormatContext *s) 407{ 408 XMVDemuxContext *xmv = s->priv_data; 409 AVIOContext *pb = s->pb; 410 int result; 411 412 if (xmv->this_packet_offset == xmv->next_packet_offset) 413 return AVERROR_EOF; 414 415 /* Seek to it */ 416 xmv->this_packet_offset = xmv->next_packet_offset; 417 if (avio_seek(pb, xmv->this_packet_offset, SEEK_SET) != xmv->this_packet_offset) 418 return AVERROR(EIO); 419 420 /* Update the size */ 421 xmv->this_packet_size = xmv->next_packet_size; 422 if (xmv->this_packet_size < (12 + xmv->audio_track_count * 4)) 423 return AVERROR(EIO); 424 425 /* Process the header */ 426 result = xmv_process_packet_header(s); 427 if (result) 428 return result; 429 430 /* Update the offset */ 431 xmv->next_packet_offset = xmv->this_packet_offset + xmv->this_packet_size; 432 433 return 0; 434} 435 436static int xmv_fetch_audio_packet(AVFormatContext *s, 437 AVPacket *pkt, uint32_t stream) 438{ 439 XMVDemuxContext *xmv = s->priv_data; 440 AVIOContext *pb = s->pb; 441 XMVAudioPacket *audio = &xmv->audio[stream]; 442 443 uint32_t data_size; 444 uint32_t block_count; 445 int result; 446 447 /* Seek to it */ 448 if (avio_seek(pb, audio->data_offset, SEEK_SET) != audio->data_offset) 449 return AVERROR(EIO); 450 451 if ((xmv->video.current_frame + 1) < xmv->video.frame_count) 452 /* Not the last frame, get at most frame_size bytes. */ 453 data_size = FFMIN(audio->frame_size, audio->data_size); 454 else 455 /* Last frame, get the rest. */ 456 data_size = audio->data_size; 457 458 /* Read the packet */ 459 result = av_get_packet(pb, pkt, data_size); 460 if (result <= 0) 461 return result; 462 463 pkt->stream_index = audio->stream_index; 464 465 /* Calculate the PTS */ 466 467 block_count = data_size / audio->block_align; 468 469 pkt->duration = block_count; 470 pkt->pts = audio->block_count; 471 pkt->dts = AV_NOPTS_VALUE; 472 473 audio->block_count += block_count; 474 475 /* Advance offset */ 476 audio->data_size -= data_size; 477 audio->data_offset += data_size; 478 479 return 0; 480} 481 482static int xmv_fetch_video_packet(AVFormatContext *s, 483 AVPacket *pkt) 484{ 485 XMVDemuxContext *xmv = s->priv_data; 486 AVIOContext *pb = s->pb; 487 XMVVideoPacket *video = &xmv->video; 488 489 int result; 490 uint32_t frame_header; 491 uint32_t frame_size, frame_timestamp; 492 uint8_t *data, *end; 493 494 /* Seek to it */ 495 if (avio_seek(pb, video->data_offset, SEEK_SET) != video->data_offset) 496 return AVERROR(EIO); 497 498 /* Read the frame header */ 499 frame_header = avio_rl32(pb); 500 501 frame_size = (frame_header & 0x1FFFF) * 4 + 4; 502 frame_timestamp = (frame_header >> 17); 503 504 if ((frame_size + 4) > video->data_size) 505 return AVERROR(EIO); 506 507 /* Get the packet data */ 508 result = av_get_packet(pb, pkt, frame_size); 509 if (result != frame_size) 510 return result; 511 512 /* Contrary to normal WMV2 video, the bit stream in XMV's 513 * WMV2 is little-endian. 514 * TODO: This manual swap is of course suboptimal. 515 */ 516 for (data = pkt->data, end = pkt->data + frame_size; data < end; data += 4) 517 AV_WB32(data, AV_RL32(data)); 518 519 pkt->stream_index = video->stream_index; 520 521 /* Calculate the PTS */ 522 523 video->last_pts = frame_timestamp + video->pts; 524 525 pkt->duration = 0; 526 pkt->pts = video->last_pts; 527 pkt->dts = AV_NOPTS_VALUE; 528 529 video->pts += frame_timestamp; 530 531 /* Keyframe? */ 532 pkt->flags = (pkt->data[0] & 0x80) ? 0 : AV_PKT_FLAG_KEY; 533 534 /* Advance offset */ 535 video->data_size -= frame_size + 4; 536 video->data_offset += frame_size + 4; 537 538 return 0; 539} 540 541static int xmv_read_packet(AVFormatContext *s, 542 AVPacket *pkt) 543{ 544 XMVDemuxContext *xmv = s->priv_data; 545 int result; 546 547 if (xmv->video.current_frame == xmv->video.frame_count) { 548 /* No frames left in this packet, so we fetch a new one */ 549 550 result = xmv_fetch_new_packet(s); 551 if (result) 552 return result; 553 } 554 555 if (xmv->current_stream == 0) { 556 /* Fetch a video frame */ 557 558 result = xmv_fetch_video_packet(s, pkt); 559 } else { 560 /* Fetch an audio frame */ 561 562 result = xmv_fetch_audio_packet(s, pkt, xmv->current_stream - 1); 563 } 564 if (result) { 565 xmv->current_stream = 0; 566 xmv->video.current_frame = xmv->video.frame_count; 567 return result; 568 } 569 570 571 /* Increase our counters */ 572 if (++xmv->current_stream >= xmv->stream_count) { 573 xmv->current_stream = 0; 574 xmv->video.current_frame += 1; 575 } 576 577 return 0; 578} 579 580const AVInputFormat ff_xmv_demuxer = { 581 .name = "xmv", 582 .long_name = NULL_IF_CONFIG_SMALL("Microsoft XMV"), 583 .extensions = "xmv", 584 .priv_data_size = sizeof(XMVDemuxContext), 585 .flags_internal = FF_FMT_INIT_CLEANUP, 586 .read_probe = xmv_probe, 587 .read_header = xmv_read_header, 588 .read_packet = xmv_read_packet, 589 .read_close = xmv_read_close, 590}; 591