xref: /third_party/ffmpeg/libavformat/xwma.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * xWMA demuxer
3cabdff1aSopenharmony_ci * Copyright (c) 2011 Max Horn
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include <inttypes.h>
23cabdff1aSopenharmony_ci#include <stdint.h>
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#include "avformat.h"
26cabdff1aSopenharmony_ci#include "internal.h"
27cabdff1aSopenharmony_ci#include "riff.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci/*
30cabdff1aSopenharmony_ci * Demuxer for xWMA, a Microsoft audio container used by XAudio 2.
31cabdff1aSopenharmony_ci */
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_citypedef struct XWMAContext {
34cabdff1aSopenharmony_ci    int64_t data_end;
35cabdff1aSopenharmony_ci} XWMAContext;
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_cistatic int xwma_probe(const AVProbeData *p)
38cabdff1aSopenharmony_ci{
39cabdff1aSopenharmony_ci    if (!memcmp(p->buf, "RIFF", 4) && !memcmp(p->buf + 8, "XWMA", 4))
40cabdff1aSopenharmony_ci        return AVPROBE_SCORE_MAX;
41cabdff1aSopenharmony_ci    return 0;
42cabdff1aSopenharmony_ci}
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_cistatic int xwma_read_header(AVFormatContext *s)
45cabdff1aSopenharmony_ci{
46cabdff1aSopenharmony_ci    int64_t size;
47cabdff1aSopenharmony_ci    int ret = 0;
48cabdff1aSopenharmony_ci    uint32_t dpds_table_size = 0;
49cabdff1aSopenharmony_ci    uint32_t *dpds_table = NULL;
50cabdff1aSopenharmony_ci    unsigned int tag;
51cabdff1aSopenharmony_ci    AVIOContext *pb = s->pb;
52cabdff1aSopenharmony_ci    AVStream *st;
53cabdff1aSopenharmony_ci    XWMAContext *xwma = s->priv_data;
54cabdff1aSopenharmony_ci    int i;
55cabdff1aSopenharmony_ci
56cabdff1aSopenharmony_ci    /* The following code is mostly copied from wav.c, with some
57cabdff1aSopenharmony_ci     * minor alterations.
58cabdff1aSopenharmony_ci     */
59cabdff1aSopenharmony_ci
60cabdff1aSopenharmony_ci    /* check RIFF header */
61cabdff1aSopenharmony_ci    tag = avio_rl32(pb);
62cabdff1aSopenharmony_ci    if (tag != MKTAG('R', 'I', 'F', 'F'))
63cabdff1aSopenharmony_ci        return AVERROR_INVALIDDATA;
64cabdff1aSopenharmony_ci    avio_rl32(pb); /* file size */
65cabdff1aSopenharmony_ci    tag = avio_rl32(pb);
66cabdff1aSopenharmony_ci    if (tag != MKTAG('X', 'W', 'M', 'A'))
67cabdff1aSopenharmony_ci        return AVERROR_INVALIDDATA;
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci    /* parse fmt header */
70cabdff1aSopenharmony_ci    tag = avio_rl32(pb);
71cabdff1aSopenharmony_ci    if (tag != MKTAG('f', 'm', 't', ' '))
72cabdff1aSopenharmony_ci        return AVERROR_INVALIDDATA;
73cabdff1aSopenharmony_ci    size = avio_rl32(pb);
74cabdff1aSopenharmony_ci    st = avformat_new_stream(s, NULL);
75cabdff1aSopenharmony_ci    if (!st)
76cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci    ret = ff_get_wav_header(s, pb, st->codecpar, size, 0);
79cabdff1aSopenharmony_ci    if (ret < 0)
80cabdff1aSopenharmony_ci        return ret;
81cabdff1aSopenharmony_ci    ffstream(st)->need_parsing = AVSTREAM_PARSE_NONE;
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ci    /* XWMA encoder only allows a few channel/sample rate/bitrate combinations,
84cabdff1aSopenharmony_ci     * but some create identical files with fake bitrate (1ch 22050hz at
85cabdff1aSopenharmony_ci     * 20/48/192kbps are all 20kbps, with the exact same codec data).
86cabdff1aSopenharmony_ci     * Decoder needs correct bitrate to work, so it's normalized here. */
87cabdff1aSopenharmony_ci    if (st->codecpar->codec_id == AV_CODEC_ID_WMAV2) {
88cabdff1aSopenharmony_ci        int ch = st->codecpar->ch_layout.nb_channels;
89cabdff1aSopenharmony_ci        int sr = st->codecpar->sample_rate;
90cabdff1aSopenharmony_ci        int br = st->codecpar->bit_rate;
91cabdff1aSopenharmony_ci
92cabdff1aSopenharmony_ci        if (ch == 1) {
93cabdff1aSopenharmony_ci            if (sr == 22050 && (br==48000 || br==192000))
94cabdff1aSopenharmony_ci                br = 20000;
95cabdff1aSopenharmony_ci            else if (sr == 32000 && (br==48000 || br==192000))
96cabdff1aSopenharmony_ci                br = 20000;
97cabdff1aSopenharmony_ci            else if (sr == 44100 && (br==96000 || br==192000))
98cabdff1aSopenharmony_ci                br = 48000;
99cabdff1aSopenharmony_ci        }
100cabdff1aSopenharmony_ci        else if (ch == 2) {
101cabdff1aSopenharmony_ci            if (sr == 22050 && (br==48000 || br==192000))
102cabdff1aSopenharmony_ci                br = 32000;
103cabdff1aSopenharmony_ci            else if (sr == 32000 && (br==192000))
104cabdff1aSopenharmony_ci                br = 48000;
105cabdff1aSopenharmony_ci        }
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci        st->codecpar->bit_rate = br;
108cabdff1aSopenharmony_ci    }
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci    /* Normally xWMA can only contain WMAv2 with 1/2 channels,
111cabdff1aSopenharmony_ci     * and WMAPRO with 6 channels. */
112cabdff1aSopenharmony_ci    if (st->codecpar->codec_id != AV_CODEC_ID_WMAV2 &&
113cabdff1aSopenharmony_ci        st->codecpar->codec_id != AV_CODEC_ID_WMAPRO) {
114cabdff1aSopenharmony_ci        avpriv_request_sample(s, "Unexpected codec (tag %s; id %d)",
115cabdff1aSopenharmony_ci                              av_fourcc2str(st->codecpar->codec_tag),
116cabdff1aSopenharmony_ci                              st->codecpar->codec_id);
117cabdff1aSopenharmony_ci    } else {
118cabdff1aSopenharmony_ci        /* xWMA shouldn't have extradata. But the WMA codecs require it,
119cabdff1aSopenharmony_ci         * so we provide our own fake extradata.
120cabdff1aSopenharmony_ci         *
121cabdff1aSopenharmony_ci         * First, check that there really was no extradata in the header. If
122cabdff1aSopenharmony_ci         * there was, then try to use it, after asking the user to provide a
123cabdff1aSopenharmony_ci         * sample of this unusual file.
124cabdff1aSopenharmony_ci         */
125cabdff1aSopenharmony_ci        if (st->codecpar->extradata_size != 0) {
126cabdff1aSopenharmony_ci            /* Surprise, surprise: We *did* get some extradata. No idea
127cabdff1aSopenharmony_ci             * if it will work, but just go on and try it, after asking
128cabdff1aSopenharmony_ci             * the user for a sample.
129cabdff1aSopenharmony_ci             */
130cabdff1aSopenharmony_ci            avpriv_request_sample(s, "Unexpected extradata (%d bytes)",
131cabdff1aSopenharmony_ci                                  st->codecpar->extradata_size);
132cabdff1aSopenharmony_ci        } else if (st->codecpar->codec_id == AV_CODEC_ID_WMAPRO) {
133cabdff1aSopenharmony_ci            if ((ret = ff_alloc_extradata(st->codecpar, 18)) < 0)
134cabdff1aSopenharmony_ci                return ret;
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci            memset(st->codecpar->extradata, 0, st->codecpar->extradata_size);
137cabdff1aSopenharmony_ci            st->codecpar->extradata[ 0] = st->codecpar->bits_per_coded_sample;
138cabdff1aSopenharmony_ci            st->codecpar->extradata[14] = 224;
139cabdff1aSopenharmony_ci        } else {
140cabdff1aSopenharmony_ci            if ((ret = ff_alloc_extradata(st->codecpar, 6)) < 0)
141cabdff1aSopenharmony_ci                return ret;
142cabdff1aSopenharmony_ci
143cabdff1aSopenharmony_ci            memset(st->codecpar->extradata, 0, st->codecpar->extradata_size);
144cabdff1aSopenharmony_ci            /* setup extradata with our experimentally obtained value */
145cabdff1aSopenharmony_ci            st->codecpar->extradata[4] = 31;
146cabdff1aSopenharmony_ci        }
147cabdff1aSopenharmony_ci    }
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci    if (!av_channel_layout_check(&st->codecpar->ch_layout)) {
150cabdff1aSopenharmony_ci        av_log(s, AV_LOG_WARNING, "Invalid channel count: %d\n",
151cabdff1aSopenharmony_ci               st->codecpar->ch_layout.nb_channels);
152cabdff1aSopenharmony_ci        return AVERROR_INVALIDDATA;
153cabdff1aSopenharmony_ci    }
154cabdff1aSopenharmony_ci    if (!st->codecpar->bits_per_coded_sample || st->codecpar->bits_per_coded_sample > 64) {
155cabdff1aSopenharmony_ci        av_log(s, AV_LOG_WARNING, "Invalid bits_per_coded_sample: %d\n",
156cabdff1aSopenharmony_ci               st->codecpar->bits_per_coded_sample);
157cabdff1aSopenharmony_ci        return AVERROR_INVALIDDATA;
158cabdff1aSopenharmony_ci    }
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci    /* set the sample rate */
161cabdff1aSopenharmony_ci    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
162cabdff1aSopenharmony_ci
163cabdff1aSopenharmony_ci    /* parse the remaining RIFF chunks */
164cabdff1aSopenharmony_ci    for (;;) {
165cabdff1aSopenharmony_ci        if (pb->eof_reached) {
166cabdff1aSopenharmony_ci            ret = AVERROR_EOF;
167cabdff1aSopenharmony_ci            goto fail;
168cabdff1aSopenharmony_ci        }
169cabdff1aSopenharmony_ci        /* read next chunk tag */
170cabdff1aSopenharmony_ci        tag = avio_rl32(pb);
171cabdff1aSopenharmony_ci        size = avio_rl32(pb);
172cabdff1aSopenharmony_ci        if (tag == MKTAG('d', 'a', 't', 'a')) {
173cabdff1aSopenharmony_ci            /* We assume that the data chunk comes last. */
174cabdff1aSopenharmony_ci            break;
175cabdff1aSopenharmony_ci        } else if (tag == MKTAG('d','p','d','s')) {
176cabdff1aSopenharmony_ci            /* Quoting the MSDN xWMA docs on the dpds chunk: "Contains the
177cabdff1aSopenharmony_ci             * decoded packet cumulative data size array, each element is the
178cabdff1aSopenharmony_ci             * number of bytes accumulated after the corresponding xWMA packet
179cabdff1aSopenharmony_ci             * is decoded in order."
180cabdff1aSopenharmony_ci             *
181cabdff1aSopenharmony_ci             * Each packet has size equal to st->codecpar->block_align, which in
182cabdff1aSopenharmony_ci             * all cases I saw so far was always 2230. Thus, we can use the
183cabdff1aSopenharmony_ci             * dpds data to compute a seeking index.
184cabdff1aSopenharmony_ci             */
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_ci            /* Error out if there is more than one dpds chunk. */
187cabdff1aSopenharmony_ci            if (dpds_table) {
188cabdff1aSopenharmony_ci                av_log(s, AV_LOG_ERROR, "two dpds chunks present\n");
189cabdff1aSopenharmony_ci                ret = AVERROR_INVALIDDATA;
190cabdff1aSopenharmony_ci                goto fail;
191cabdff1aSopenharmony_ci            }
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci            /* Compute the number of entries in the dpds chunk. */
194cabdff1aSopenharmony_ci            if (size & 3) {  /* Size should be divisible by four */
195cabdff1aSopenharmony_ci                av_log(s, AV_LOG_WARNING,
196cabdff1aSopenharmony_ci                       "dpds chunk size %"PRId64" not divisible by 4\n", size);
197cabdff1aSopenharmony_ci            }
198cabdff1aSopenharmony_ci            dpds_table_size = size / 4;
199cabdff1aSopenharmony_ci            if (dpds_table_size == 0 || dpds_table_size >= INT_MAX / 4) {
200cabdff1aSopenharmony_ci                av_log(s, AV_LOG_ERROR,
201cabdff1aSopenharmony_ci                       "dpds chunk size %"PRId64" invalid\n", size);
202cabdff1aSopenharmony_ci                return AVERROR_INVALIDDATA;
203cabdff1aSopenharmony_ci            }
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci            /* Allocate some temporary storage to keep the dpds data around.
206cabdff1aSopenharmony_ci             * for processing later on.
207cabdff1aSopenharmony_ci             */
208cabdff1aSopenharmony_ci            dpds_table = av_malloc_array(dpds_table_size, sizeof(uint32_t));
209cabdff1aSopenharmony_ci            if (!dpds_table) {
210cabdff1aSopenharmony_ci                return AVERROR(ENOMEM);
211cabdff1aSopenharmony_ci            }
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_ci            for (i = 0; i < dpds_table_size; ++i) {
214cabdff1aSopenharmony_ci                if (avio_feof(pb)) {
215cabdff1aSopenharmony_ci                    ret = AVERROR_INVALIDDATA;
216cabdff1aSopenharmony_ci                    goto fail;
217cabdff1aSopenharmony_ci                }
218cabdff1aSopenharmony_ci                dpds_table[i] = avio_rl32(pb);
219cabdff1aSopenharmony_ci                size -= 4;
220cabdff1aSopenharmony_ci            }
221cabdff1aSopenharmony_ci        }
222cabdff1aSopenharmony_ci        avio_skip(pb, size);
223cabdff1aSopenharmony_ci    }
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci    /* Determine overall data length */
226cabdff1aSopenharmony_ci    if (size < 0) {
227cabdff1aSopenharmony_ci        ret = AVERROR_INVALIDDATA;
228cabdff1aSopenharmony_ci        goto fail;
229cabdff1aSopenharmony_ci    }
230cabdff1aSopenharmony_ci    if (!size) {
231cabdff1aSopenharmony_ci        xwma->data_end = INT64_MAX;
232cabdff1aSopenharmony_ci    } else
233cabdff1aSopenharmony_ci        xwma->data_end = avio_tell(pb) + size;
234cabdff1aSopenharmony_ci
235cabdff1aSopenharmony_ci
236cabdff1aSopenharmony_ci    if (dpds_table && dpds_table_size) {
237cabdff1aSopenharmony_ci        int64_t cur_pos;
238cabdff1aSopenharmony_ci        const uint32_t bytes_per_sample
239cabdff1aSopenharmony_ci                = (st->codecpar->ch_layout.nb_channels * st->codecpar->bits_per_coded_sample) >> 3;
240cabdff1aSopenharmony_ci
241cabdff1aSopenharmony_ci        /* Estimate the duration from the total number of output bytes. */
242cabdff1aSopenharmony_ci        const uint64_t total_decoded_bytes = dpds_table[dpds_table_size - 1];
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_ci        if (!bytes_per_sample) {
245cabdff1aSopenharmony_ci            av_log(s, AV_LOG_ERROR,
246cabdff1aSopenharmony_ci                   "Invalid bits_per_coded_sample %d for %d channels\n",
247cabdff1aSopenharmony_ci                   st->codecpar->bits_per_coded_sample, st->codecpar->ch_layout.nb_channels);
248cabdff1aSopenharmony_ci            ret = AVERROR_INVALIDDATA;
249cabdff1aSopenharmony_ci            goto fail;
250cabdff1aSopenharmony_ci        }
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci        st->duration = total_decoded_bytes / bytes_per_sample;
253cabdff1aSopenharmony_ci
254cabdff1aSopenharmony_ci        /* Use the dpds data to build a seek table.  We can only do this after
255cabdff1aSopenharmony_ci         * we know the offset to the data chunk, as we need that to determine
256cabdff1aSopenharmony_ci         * the actual offset to each input block.
257cabdff1aSopenharmony_ci         * Note: If we allowed ourselves to assume that the data chunk always
258cabdff1aSopenharmony_ci         * follows immediately after the dpds block, we could of course guess
259cabdff1aSopenharmony_ci         * the data block's start offset already while reading the dpds chunk.
260cabdff1aSopenharmony_ci         * I decided against that, just in case other chunks ever are
261cabdff1aSopenharmony_ci         * discovered.
262cabdff1aSopenharmony_ci         */
263cabdff1aSopenharmony_ci        cur_pos = avio_tell(pb);
264cabdff1aSopenharmony_ci        for (i = 0; i < dpds_table_size; ++i) {
265cabdff1aSopenharmony_ci            /* From the number of output bytes that would accumulate in the
266cabdff1aSopenharmony_ci             * output buffer after decoding the first (i+1) packets, we compute
267cabdff1aSopenharmony_ci             * an offset / timestamp pair.
268cabdff1aSopenharmony_ci             */
269cabdff1aSopenharmony_ci            av_add_index_entry(st,
270cabdff1aSopenharmony_ci                               cur_pos + (i+1) * st->codecpar->block_align, /* pos */
271cabdff1aSopenharmony_ci                               dpds_table[i] / bytes_per_sample,            /* timestamp */
272cabdff1aSopenharmony_ci                               st->codecpar->block_align,                   /* size */
273cabdff1aSopenharmony_ci                               0,                                           /* duration */
274cabdff1aSopenharmony_ci                               AVINDEX_KEYFRAME);
275cabdff1aSopenharmony_ci        }
276cabdff1aSopenharmony_ci    } else if (st->codecpar->bit_rate) {
277cabdff1aSopenharmony_ci        /* No dpds chunk was present (or only an empty one), so estimate
278cabdff1aSopenharmony_ci         * the total duration using the average bits per sample and the
279cabdff1aSopenharmony_ci         * total data length.
280cabdff1aSopenharmony_ci         */
281cabdff1aSopenharmony_ci        st->duration = av_rescale((size<<3), st->codecpar->sample_rate, st->codecpar->bit_rate);
282cabdff1aSopenharmony_ci    }
283cabdff1aSopenharmony_ci
284cabdff1aSopenharmony_cifail:
285cabdff1aSopenharmony_ci    av_free(dpds_table);
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_ci    return ret;
288cabdff1aSopenharmony_ci}
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_cistatic int xwma_read_packet(AVFormatContext *s, AVPacket *pkt)
291cabdff1aSopenharmony_ci{
292cabdff1aSopenharmony_ci    int ret, size;
293cabdff1aSopenharmony_ci    int64_t left;
294cabdff1aSopenharmony_ci    AVStream *st;
295cabdff1aSopenharmony_ci    XWMAContext *xwma = s->priv_data;
296cabdff1aSopenharmony_ci
297cabdff1aSopenharmony_ci    st = s->streams[0];
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_ci    left = xwma->data_end - avio_tell(s->pb);
300cabdff1aSopenharmony_ci    if (left <= 0) {
301cabdff1aSopenharmony_ci        return AVERROR_EOF;
302cabdff1aSopenharmony_ci    }
303cabdff1aSopenharmony_ci
304cabdff1aSopenharmony_ci    /* read a single block; the default block size is 2230. */
305cabdff1aSopenharmony_ci    size = (st->codecpar->block_align > 1) ? st->codecpar->block_align : 2230;
306cabdff1aSopenharmony_ci    size = FFMIN(size, left);
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci    ret  = av_get_packet(s->pb, pkt, size);
309cabdff1aSopenharmony_ci    if (ret < 0)
310cabdff1aSopenharmony_ci        return ret;
311cabdff1aSopenharmony_ci
312cabdff1aSopenharmony_ci    pkt->stream_index = 0;
313cabdff1aSopenharmony_ci    return ret;
314cabdff1aSopenharmony_ci}
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_ciconst AVInputFormat ff_xwma_demuxer = {
317cabdff1aSopenharmony_ci    .name           = "xwma",
318cabdff1aSopenharmony_ci    .long_name      = NULL_IF_CONFIG_SMALL("Microsoft xWMA"),
319cabdff1aSopenharmony_ci    .priv_data_size = sizeof(XWMAContext),
320cabdff1aSopenharmony_ci    .read_probe     = xwma_probe,
321cabdff1aSopenharmony_ci    .read_header    = xwma_read_header,
322cabdff1aSopenharmony_ci    .read_packet    = xwma_read_packet,
323cabdff1aSopenharmony_ci};
324