xref: /third_party/ffmpeg/libavcodec/dca_exss.c (revision cabdff1a)
1/*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "dcadec.h"
22
23static void parse_xll_parameters(DCAExssParser *s, DCAExssAsset *asset)
24{
25    // Size of XLL data in extension substream
26    asset->xll_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
27
28    // XLL sync word present flag
29    if (asset->xll_sync_present = get_bits1(&s->gb)) {
30        int xll_delay_nbits;
31
32        // Peak bit rate smoothing buffer size
33        skip_bits(&s->gb, 4);
34
35        // Number of bits for XLL decoding delay
36        xll_delay_nbits = get_bits(&s->gb, 5) + 1;
37
38        // Initial XLL decoding delay in frames
39        asset->xll_delay_nframes = get_bits_long(&s->gb, xll_delay_nbits);
40
41        // Number of bytes offset to XLL sync
42        asset->xll_sync_offset = get_bits(&s->gb, s->exss_size_nbits);
43    } else {
44        asset->xll_delay_nframes = 0;
45        asset->xll_sync_offset = 0;
46    }
47}
48
49static void parse_lbr_parameters(DCAExssParser *s, DCAExssAsset *asset)
50{
51    // Size of LBR component in extension substream
52    asset->lbr_size = get_bits(&s->gb, 14) + 1;
53
54    // LBR sync word present flag
55    if (get_bits1(&s->gb))
56        // LBR sync distance
57        skip_bits(&s->gb, 2);
58}
59
60static int parse_descriptor(DCAExssParser *s, DCAExssAsset *asset)
61{
62    int i, j, drc_present, descr_size, descr_pos = get_bits_count(&s->gb);
63
64    // Size of audio asset descriptor in bytes
65    descr_size = get_bits(&s->gb, 9) + 1;
66
67    // Audio asset identifier
68    asset->asset_index = get_bits(&s->gb, 3);
69
70    //
71    // Per stream static metadata
72    //
73
74    if (s->static_fields_present) {
75        // Asset type descriptor presence
76        if (get_bits1(&s->gb))
77            // Asset type descriptor
78            skip_bits(&s->gb, 4);
79
80        // Language descriptor presence
81        if (get_bits1(&s->gb))
82            // Language descriptor
83            skip_bits(&s->gb, 24);
84
85        // Additional textual information presence
86        if (get_bits1(&s->gb)) {
87            // Byte size of additional text info
88            int text_size = get_bits(&s->gb, 10) + 1;
89
90            // Sanity check available size
91            if (get_bits_left(&s->gb) < text_size * 8)
92                return AVERROR_INVALIDDATA;
93
94            // Additional textual information string
95            skip_bits_long(&s->gb, text_size * 8);
96        }
97
98        // PCM bit resolution
99        asset->pcm_bit_res = get_bits(&s->gb, 5) + 1;
100
101        // Maximum sample rate
102        asset->max_sample_rate = ff_dca_sampling_freqs[get_bits(&s->gb, 4)];
103
104        // Total number of channels
105        asset->nchannels_total = get_bits(&s->gb, 8) + 1;
106
107        // One to one map channel to speakers
108        if (asset->one_to_one_map_ch_to_spkr = get_bits1(&s->gb)) {
109            int spkr_mask_nbits = 0;
110            int spkr_remap_nsets;
111            int nspeakers[8];
112
113            // Embedded stereo flag
114            asset->embedded_stereo = asset->nchannels_total > 2 && get_bits1(&s->gb);
115
116            // Embedded 6 channels flag
117            asset->embedded_6ch = asset->nchannels_total > 6 && get_bits1(&s->gb);
118
119            // Speaker mask enabled flag
120            if (asset->spkr_mask_enabled = get_bits1(&s->gb)) {
121                // Number of bits for speaker activity mask
122                spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2;
123
124                // Loudspeaker activity mask
125                asset->spkr_mask = get_bits(&s->gb, spkr_mask_nbits);
126            }
127
128            // Number of speaker remapping sets
129            if ((spkr_remap_nsets = get_bits(&s->gb, 3)) && !spkr_mask_nbits) {
130                if (s->avctx)
131                    av_log(s->avctx, AV_LOG_ERROR, "Speaker mask disabled yet there are remapping sets\n");
132                return AVERROR_INVALIDDATA;
133            }
134
135            // Standard loudspeaker layout mask
136            for (i = 0; i < spkr_remap_nsets; i++)
137                nspeakers[i] = ff_dca_count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits));
138
139            for (i = 0; i < spkr_remap_nsets; i++) {
140                // Number of channels to be decoded for speaker remapping
141                int nch_for_remaps = get_bits(&s->gb, 5) + 1;
142
143                for (j = 0; j < nspeakers[i]; j++) {
144                    // Decoded channels to output speaker mapping mask
145                    int remap_ch_mask = get_bits_long(&s->gb, nch_for_remaps);
146
147                    // Loudspeaker remapping codes
148                    skip_bits_long(&s->gb, av_popcount(remap_ch_mask) * 5);
149                }
150            }
151        } else {
152            asset->embedded_stereo = 0;
153            asset->embedded_6ch = 0;
154            asset->spkr_mask_enabled = 0;
155            asset->spkr_mask = 0;
156
157            // Representation type
158            asset->representation_type = get_bits(&s->gb, 3);
159        }
160    }
161
162    //
163    // DRC, DNC and mixing metadata
164    //
165
166    // Dynamic range coefficient presence flag
167    drc_present = get_bits1(&s->gb);
168
169    // Code for dynamic range coefficient
170    if (drc_present)
171        skip_bits(&s->gb, 8);
172
173    // Dialog normalization presence flag
174    if (get_bits1(&s->gb))
175        // Dialog normalization code
176        skip_bits(&s->gb, 5);
177
178    // DRC for stereo downmix
179    if (drc_present && asset->embedded_stereo)
180        skip_bits(&s->gb, 8);
181
182    // Mixing metadata presence flag
183    if (s->mix_metadata_enabled && get_bits1(&s->gb)) {
184        int nchannels_dmix;
185
186        // External mixing flag
187        skip_bits1(&s->gb);
188
189        // Post mixing / replacement gain adjustment
190        skip_bits(&s->gb, 6);
191
192        // DRC prior to mixing
193        if (get_bits(&s->gb, 2) == 3)
194            // Custom code for mixing DRC
195            skip_bits(&s->gb, 8);
196        else
197            // Limit for mixing DRC
198            skip_bits(&s->gb, 3);
199
200        // Scaling type for channels of main audio
201        // Scaling parameters of main audio
202        if (get_bits1(&s->gb))
203            for (i = 0; i < s->nmixoutconfigs; i++)
204                skip_bits_long(&s->gb, 6 * s->nmixoutchs[i]);
205        else
206            skip_bits_long(&s->gb, 6 * s->nmixoutconfigs);
207
208        nchannels_dmix = asset->nchannels_total;
209        if (asset->embedded_6ch)
210            nchannels_dmix += 6;
211        if (asset->embedded_stereo)
212            nchannels_dmix += 2;
213
214        for (i = 0; i < s->nmixoutconfigs; i++) {
215            if (!s->nmixoutchs[i]) {
216                if (s->avctx)
217                    av_log(s->avctx, AV_LOG_ERROR, "Invalid speaker layout mask for mixing configuration\n");
218                return AVERROR_INVALIDDATA;
219            }
220            for (j = 0; j < nchannels_dmix; j++) {
221                // Mix output mask
222                int mix_map_mask = get_bits(&s->gb, s->nmixoutchs[i]);
223
224                // Mixing coefficients
225                skip_bits_long(&s->gb, av_popcount(mix_map_mask) * 6);
226            }
227        }
228    }
229
230    //
231    // Decoder navigation data
232    //
233
234    // Coding mode for the asset
235    asset->coding_mode = get_bits(&s->gb, 2);
236
237    // Coding components used in asset
238    switch (asset->coding_mode) {
239    case 0: // Coding mode that may contain multiple coding components
240        asset->extension_mask = get_bits(&s->gb, 12);
241
242        if (asset->extension_mask & DCA_EXSS_CORE) {
243            // Size of core component in extension substream
244            asset->core_size = get_bits(&s->gb, 14) + 1;
245            // Core sync word present flag
246            if (get_bits1(&s->gb))
247                // Core sync distance
248                skip_bits(&s->gb, 2);
249        }
250
251        if (asset->extension_mask & DCA_EXSS_XBR)
252            // Size of XBR extension in extension substream
253            asset->xbr_size = get_bits(&s->gb, 14) + 1;
254
255        if (asset->extension_mask & DCA_EXSS_XXCH)
256            // Size of XXCH extension in extension substream
257            asset->xxch_size = get_bits(&s->gb, 14) + 1;
258
259        if (asset->extension_mask & DCA_EXSS_X96)
260            // Size of X96 extension in extension substream
261            asset->x96_size = get_bits(&s->gb, 12) + 1;
262
263        if (asset->extension_mask & DCA_EXSS_LBR)
264            parse_lbr_parameters(s, asset);
265
266        if (asset->extension_mask & DCA_EXSS_XLL)
267            parse_xll_parameters(s, asset);
268
269        if (asset->extension_mask & DCA_EXSS_RSV1)
270            skip_bits(&s->gb, 16);
271
272        if (asset->extension_mask & DCA_EXSS_RSV2)
273            skip_bits(&s->gb, 16);
274        break;
275
276    case 1: // Loss-less coding mode without CBR component
277        asset->extension_mask = DCA_EXSS_XLL;
278        parse_xll_parameters(s, asset);
279        break;
280
281    case 2: // Low bit rate mode
282        asset->extension_mask = DCA_EXSS_LBR;
283        parse_lbr_parameters(s, asset);
284        break;
285
286    case 3: // Auxiliary coding mode
287        asset->extension_mask = 0;
288
289        // Size of auxiliary coded data
290        skip_bits(&s->gb, 14);
291
292        // Auxiliary codec identification
293        skip_bits(&s->gb, 8);
294
295        // Aux sync word present flag
296        if (get_bits1(&s->gb))
297            // Aux sync distance
298            skip_bits(&s->gb, 3);
299        break;
300    }
301
302    if (asset->extension_mask & DCA_EXSS_XLL)
303        // DTS-HD stream ID
304        asset->hd_stream_id = get_bits(&s->gb, 3);
305
306    // One to one mixing flag
307    // Per channel main audio scaling flag
308    // Main audio scaling codes
309    // Decode asset in secondary decoder flag
310    // Revision 2 DRC metadata
311    // Reserved
312    // Zero pad
313    if (ff_dca_seek_bits(&s->gb, descr_pos + descr_size * 8)) {
314        if (s->avctx)
315            av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS asset descriptor\n");
316        return AVERROR_INVALIDDATA;
317    }
318
319    return 0;
320}
321
322static int set_exss_offsets(DCAExssAsset *asset)
323{
324    int offs = asset->asset_offset;
325    int size = asset->asset_size;
326
327    if (asset->extension_mask & DCA_EXSS_CORE) {
328        asset->core_offset = offs;
329        if (asset->core_size > size)
330            return AVERROR_INVALIDDATA;
331        offs += asset->core_size;
332        size -= asset->core_size;
333    }
334
335    if (asset->extension_mask & DCA_EXSS_XBR) {
336        asset->xbr_offset = offs;
337        if (asset->xbr_size > size)
338            return AVERROR_INVALIDDATA;
339        offs += asset->xbr_size;
340        size -= asset->xbr_size;
341    }
342
343    if (asset->extension_mask & DCA_EXSS_XXCH) {
344        asset->xxch_offset = offs;
345        if (asset->xxch_size > size)
346            return AVERROR_INVALIDDATA;
347        offs += asset->xxch_size;
348        size -= asset->xxch_size;
349    }
350
351    if (asset->extension_mask & DCA_EXSS_X96) {
352        asset->x96_offset = offs;
353        if (asset->x96_size > size)
354            return AVERROR_INVALIDDATA;
355        offs += asset->x96_size;
356        size -= asset->x96_size;
357    }
358
359    if (asset->extension_mask & DCA_EXSS_LBR) {
360        asset->lbr_offset = offs;
361        if (asset->lbr_size > size)
362            return AVERROR_INVALIDDATA;
363        offs += asset->lbr_size;
364        size -= asset->lbr_size;
365    }
366
367    if (asset->extension_mask & DCA_EXSS_XLL) {
368        asset->xll_offset = offs;
369        if (asset->xll_size > size)
370            return AVERROR_INVALIDDATA;
371        offs += asset->xll_size;
372        size -= asset->xll_size;
373    }
374
375    return 0;
376}
377
378int ff_dca_exss_parse(DCAExssParser *s, const uint8_t *data, int size)
379{
380    int i, ret, offset, wide_hdr, header_size;
381
382    if ((ret = init_get_bits8(&s->gb, data, size)) < 0)
383        return ret;
384
385    // Extension substream sync word
386    skip_bits_long(&s->gb, 32);
387
388    // User defined bits
389    skip_bits(&s->gb, 8);
390
391    // Extension substream index
392    s->exss_index = get_bits(&s->gb, 2);
393
394    // Flag indicating short or long header size
395    wide_hdr = get_bits1(&s->gb);
396
397    // Extension substream header length
398    header_size = get_bits(&s->gb, 8 + 4 * wide_hdr) + 1;
399
400    // Check CRC
401    if (s->avctx && ff_dca_check_crc(s->avctx, &s->gb, 32 + 8, header_size * 8)) {
402        av_log(s->avctx, AV_LOG_ERROR, "Invalid EXSS header checksum\n");
403        return AVERROR_INVALIDDATA;
404    }
405
406    s->exss_size_nbits = 16 + 4 * wide_hdr;
407
408    // Number of bytes of extension substream
409    s->exss_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
410    if (s->exss_size > size) {
411        if (s->avctx)
412            av_log(s->avctx, AV_LOG_ERROR, "Packet too short for EXSS frame\n");
413        return AVERROR_INVALIDDATA;
414    }
415
416    // Per stream static fields presence flag
417    if (s->static_fields_present = get_bits1(&s->gb)) {
418        int active_exss_mask[8];
419
420        // Reference clock code
421        skip_bits(&s->gb, 2);
422
423        // Extension substream frame duration
424        skip_bits(&s->gb, 3);
425
426        // Timecode presence flag
427        if (get_bits1(&s->gb))
428            // Timecode data
429            skip_bits_long(&s->gb, 36);
430
431        // Number of defined audio presentations
432        s->npresents = get_bits(&s->gb, 3) + 1;
433        if (s->npresents > 1) {
434            if (s->avctx)
435                avpriv_request_sample(s->avctx, "%d audio presentations", s->npresents);
436            return AVERROR_PATCHWELCOME;
437        }
438
439        // Number of audio assets in extension substream
440        s->nassets = get_bits(&s->gb, 3) + 1;
441        if (s->nassets > 1) {
442            if (s->avctx)
443                avpriv_request_sample(s->avctx, "%d audio assets", s->nassets);
444            return AVERROR_PATCHWELCOME;
445        }
446
447        // Active extension substream mask for audio presentation
448        for (i = 0; i < s->npresents; i++)
449            active_exss_mask[i] = get_bits(&s->gb, s->exss_index + 1);
450
451        // Active audio asset mask
452        for (i = 0; i < s->npresents; i++)
453            skip_bits_long(&s->gb, av_popcount(active_exss_mask[i]) * 8);
454
455        // Mixing metadata enable flag
456        if (s->mix_metadata_enabled = get_bits1(&s->gb)) {
457            int spkr_mask_nbits;
458
459            // Mixing metadata adjustment level
460            skip_bits(&s->gb, 2);
461
462            // Number of bits for mixer output speaker activity mask
463            spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2;
464
465            // Number of mixing configurations
466            s->nmixoutconfigs = get_bits(&s->gb, 2) + 1;
467
468            // Speaker layout mask for mixer output channels
469            for (i = 0; i < s->nmixoutconfigs; i++)
470                s->nmixoutchs[i] = ff_dca_count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits));
471        }
472    } else {
473        s->npresents = 1;
474        s->nassets = 1;
475    }
476
477    // Size of encoded asset data in bytes
478    offset = header_size;
479    for (i = 0; i < s->nassets; i++) {
480        s->assets[i].asset_offset = offset;
481        s->assets[i].asset_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
482        offset += s->assets[i].asset_size;
483        if (offset > s->exss_size) {
484            if (s->avctx)
485                av_log(s->avctx, AV_LOG_ERROR, "EXSS asset out of bounds\n");
486            return AVERROR_INVALIDDATA;
487        }
488    }
489
490    // Audio asset descriptor
491    for (i = 0; i < s->nassets; i++) {
492        if ((ret = parse_descriptor(s, &s->assets[i])) < 0)
493            return ret;
494        if ((ret = set_exss_offsets(&s->assets[i])) < 0) {
495            if (s->avctx)
496                av_log(s->avctx, AV_LOG_ERROR, "Invalid extension size in EXSS asset descriptor\n");
497            return ret;
498        }
499    }
500
501    // Backward compatible core present
502    // Backward compatible core substream index
503    // Backward compatible core asset index
504    // Reserved
505    // Byte align
506    // CRC16 of extension substream header
507    if (ff_dca_seek_bits(&s->gb, header_size * 8)) {
508        if (s->avctx)
509            av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS header\n");
510        return AVERROR_INVALIDDATA;
511    }
512
513    return 0;
514}
515