1/*
2 * Windows Media Audio Lossless decoder
3 * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
4 * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
5 * Copyright (c) 2011 Andreas Öman
6 * Copyright (c) 2011 - 2012 Mashiat Sarker Shakkhar
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <inttypes.h>
26
27#include "libavutil/attributes.h"
28#include "libavutil/avassert.h"
29#include "libavutil/mem_internal.h"
30
31#include "avcodec.h"
32#include "codec_internal.h"
33#include "internal.h"
34#include "get_bits.h"
35#include "put_bits.h"
36#include "lossless_audiodsp.h"
37#include "wma.h"
38#include "wma_common.h"
39
40/** current decoder limitations */
41#define WMALL_MAX_CHANNELS      8                       ///< max number of handled channels
42#define MAX_SUBFRAMES          32                       ///< max number of subframes per channel
43#define MAX_BANDS              29                       ///< max number of scale factor bands
44#define MAX_FRAMESIZE       32768                       ///< maximum compressed frame size
45#define MAX_ORDER             256
46
47#define WMALL_BLOCK_MIN_BITS    6                       ///< log2 of min block size
48#define WMALL_BLOCK_MAX_BITS   14                       ///< log2 of max block size
49#define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS)    ///< maximum block size
50#define WMALL_BLOCK_SIZES    (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
51
52#define WMALL_COEFF_PAD_SIZE   16                       ///< pad coef buffers with 0 for use with SIMD
53
54/**
55 * @brief frame-specific decoder context for a single channel
56 */
57typedef struct WmallChannelCtx {
58    int16_t     prev_block_len;                         ///< length of the previous block
59    uint8_t     transmit_coefs;
60    uint8_t     num_subframes;
61    uint16_t    subframe_len[MAX_SUBFRAMES];            ///< subframe length in samples
62    uint16_t    subframe_offsets[MAX_SUBFRAMES];        ///< subframe positions in the current frame
63    uint8_t     cur_subframe;                           ///< current subframe number
64    uint16_t    decoded_samples;                        ///< number of already processed samples
65    int         quant_step;                             ///< quantization step for the current subframe
66    int         transient_counter;                      ///< number of transient samples from the beginning of the transient zone
67} WmallChannelCtx;
68
69/**
70 * @brief main decoder context
71 */
72typedef struct WmallDecodeCtx {
73    /* generic decoder variables */
74    AVCodecContext  *avctx;
75    AVFrame         *frame;
76    LLAudDSPContext dsp;                           ///< accelerated DSP functions
77    uint8_t         *frame_data;                    ///< compressed frame data
78    int             max_frame_size;                 ///< max bitstream size
79    PutBitContext   pb;                             ///< context for filling the frame_data buffer
80
81    /* frame size dependent frame information (set during initialization) */
82    uint32_t        decode_flags;                   ///< used compression features
83    int             len_prefix;                     ///< frame is prefixed with its length
84    int             dynamic_range_compression;      ///< frame contains DRC data
85    uint8_t         bits_per_sample;                ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
86    uint16_t        samples_per_frame;              ///< number of samples to output
87    uint16_t        log2_frame_size;
88    int8_t          num_channels;                   ///< number of channels in the stream (same as AVCodecContext.num_channels)
89    int8_t          lfe_channel;                    ///< lfe channel index
90    uint8_t         max_num_subframes;
91    uint8_t         subframe_len_bits;              ///< number of bits used for the subframe length
92    uint8_t         max_subframe_len_bit;           ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
93    uint16_t        min_samples_per_subframe;
94
95    /* packet decode state */
96    GetBitContext   pgb;                            ///< bitstream reader context for the packet
97    int             next_packet_start;              ///< start offset of the next WMA packet in the demuxer packet
98    uint8_t         packet_offset;                  ///< offset to the frame in the packet
99    uint8_t         packet_sequence_number;         ///< current packet number
100    int             num_saved_bits;                 ///< saved number of bits
101    int             frame_offset;                   ///< frame offset in the bit reservoir
102    int             subframe_offset;                ///< subframe offset in the bit reservoir
103    uint8_t         packet_loss;                    ///< set in case of bitstream error
104    uint8_t         packet_done;                    ///< set when a packet is fully decoded
105
106    /* frame decode state */
107    uint32_t        frame_num;                      ///< current frame number (not used for decoding)
108    GetBitContext   gb;                             ///< bitstream reader context
109    int             buf_bit_size;                   ///< buffer size in bits
110    int16_t         *samples_16[WMALL_MAX_CHANNELS]; ///< current sample buffer pointer (16-bit)
111    int32_t         *samples_32[WMALL_MAX_CHANNELS]; ///< current sample buffer pointer (24-bit)
112    uint8_t         drc_gain;                       ///< gain for the DRC tool
113    int8_t          skip_frame;                     ///< skip output step
114    int8_t          parsed_all_subframes;           ///< all subframes decoded?
115
116    /* subframe/block decode state */
117    int16_t         subframe_len;                   ///< current subframe length
118    int8_t          channels_for_cur_subframe;      ///< number of channels that contain the subframe
119    int8_t          channel_indexes_for_cur_subframe[WMALL_MAX_CHANNELS];
120
121    WmallChannelCtx channel[WMALL_MAX_CHANNELS];    ///< per channel data
122
123    // WMA Lossless-specific
124
125    uint8_t do_arith_coding;
126    uint8_t do_ac_filter;
127    uint8_t do_inter_ch_decorr;
128    uint8_t do_mclms;
129    uint8_t do_lpc;
130
131    int8_t  acfilter_order;
132    int8_t  acfilter_scaling;
133    int16_t acfilter_coeffs[16];
134    int     acfilter_prevvalues[WMALL_MAX_CHANNELS][16];
135
136    int8_t  mclms_order;
137    int8_t  mclms_scaling;
138    int16_t mclms_coeffs[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS * 32];
139    int16_t mclms_coeffs_cur[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS];
140    int32_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
141    int32_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
142    int     mclms_recent;
143
144    int     movave_scaling;
145    int     quant_stepsize;
146
147    struct {
148        int order;
149        int scaling;
150        int coefsend;
151        int bitsend;
152        DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
153        DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
154        DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
155        int recent;
156    } cdlms[WMALL_MAX_CHANNELS][9];
157
158    int cdlms_ttl[WMALL_MAX_CHANNELS];
159
160    int bV3RTM;
161
162    int is_channel_coded[WMALL_MAX_CHANNELS];
163    int update_speed[WMALL_MAX_CHANNELS];
164
165    int transient[WMALL_MAX_CHANNELS];
166    int transient_pos[WMALL_MAX_CHANNELS];
167    int seekable_tile;
168
169    unsigned ave_sum[WMALL_MAX_CHANNELS];
170
171    int channel_residues[WMALL_MAX_CHANNELS][WMALL_BLOCK_MAX_SIZE];
172
173    int lpc_coefs[WMALL_MAX_CHANNELS][40];
174    int lpc_order;
175    int lpc_scaling;
176    int lpc_intbits;
177} WmallDecodeCtx;
178
179/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */
180#define WMASIGN(x) (((x) > 0) - ((x) < 0))
181
182static av_cold int decode_init(AVCodecContext *avctx)
183{
184    WmallDecodeCtx *s  = avctx->priv_data;
185    uint8_t *edata_ptr = avctx->extradata;
186    unsigned int channel_mask;
187    int i, log2_max_num_subframes;
188
189    if (avctx->block_align <= 0 || avctx->block_align > (1<<21)) {
190        av_log(avctx, AV_LOG_ERROR, "block_align is not set or invalid\n");
191        return AVERROR(EINVAL);
192    }
193
194    if (avctx->extradata_size >= 18) {
195        s->decode_flags    = AV_RL16(edata_ptr + 14);
196        channel_mask       = AV_RL32(edata_ptr +  2);
197        s->bits_per_sample = AV_RL16(edata_ptr);
198        if (s->bits_per_sample == 16)
199            avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
200        else if (s->bits_per_sample == 24) {
201            avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
202            avctx->bits_per_raw_sample = 24;
203        } else {
204            av_log(avctx, AV_LOG_ERROR, "Unknown bit-depth: %"PRIu8"\n",
205                   s->bits_per_sample);
206            return AVERROR_INVALIDDATA;
207        }
208        /* dump the extradata */
209        for (i = 0; i < avctx->extradata_size; i++)
210            ff_dlog(avctx, "[%x] ", avctx->extradata[i]);
211        ff_dlog(avctx, "\n");
212
213    } else {
214        avpriv_request_sample(avctx, "Unsupported extradata size");
215        return AVERROR_PATCHWELCOME;
216    }
217
218    if (channel_mask) {
219        av_channel_layout_uninit(&avctx->ch_layout);
220        av_channel_layout_from_mask(&avctx->ch_layout, channel_mask);
221    }
222    av_assert0(avctx->ch_layout.nb_channels >= 0);
223    if (avctx->ch_layout.nb_channels > WMALL_MAX_CHANNELS) {
224        avpriv_request_sample(avctx,
225                            "More than " AV_STRINGIFY(WMALL_MAX_CHANNELS) " channels");
226        return AVERROR_PATCHWELCOME;
227    }
228
229    s->num_channels = avctx->ch_layout.nb_channels;
230
231    /* extract lfe channel position */
232    s->lfe_channel = -1;
233
234    if (channel_mask & 8) {
235        unsigned int mask;
236        for (mask = 1; mask < 16; mask <<= 1)
237            if (channel_mask & mask)
238                ++s->lfe_channel;
239    }
240
241    s->max_frame_size = MAX_FRAMESIZE * avctx->ch_layout.nb_channels;
242    s->frame_data = av_mallocz(s->max_frame_size + AV_INPUT_BUFFER_PADDING_SIZE);
243    if (!s->frame_data)
244        return AVERROR(ENOMEM);
245
246    s->avctx = avctx;
247    ff_llauddsp_init(&s->dsp);
248    init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
249
250    /* generic init */
251    s->log2_frame_size = av_log2(avctx->block_align) + 4;
252
253    /* frame info */
254    s->skip_frame  = 1; /* skip first frame */
255    s->packet_loss = 1;
256    s->len_prefix  = s->decode_flags & 0x40;
257
258    /* get frame len */
259    s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
260                                                          3, s->decode_flags);
261    av_assert0(s->samples_per_frame <= WMALL_BLOCK_MAX_SIZE);
262
263    /* init previous block len */
264    for (i = 0; i < avctx->ch_layout.nb_channels; i++)
265        s->channel[i].prev_block_len = s->samples_per_frame;
266
267    /* subframe info */
268    log2_max_num_subframes  = (s->decode_flags & 0x38) >> 3;
269    s->max_num_subframes    = 1 << log2_max_num_subframes;
270    s->max_subframe_len_bit = 0;
271    s->subframe_len_bits    = av_log2(log2_max_num_subframes) + 1;
272
273    s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
274    s->dynamic_range_compression = s->decode_flags & 0x80;
275    s->bV3RTM                    = s->decode_flags & 0x100;
276
277    if (s->max_num_subframes > MAX_SUBFRAMES) {
278        av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %"PRIu8"\n",
279               s->max_num_subframes);
280        return AVERROR_INVALIDDATA;
281    }
282
283    s->frame = av_frame_alloc();
284    if (!s->frame)
285        return AVERROR(ENOMEM);
286
287    return 0;
288}
289
290/**
291 * @brief Decode the subframe length.
292 * @param s      context
293 * @param offset sample offset in the frame
294 * @return decoded subframe length on success, < 0 in case of an error
295 */
296static int decode_subframe_length(WmallDecodeCtx *s, int offset)
297{
298    int frame_len_ratio, subframe_len, len;
299
300    /* no need to read from the bitstream when only one length is possible */
301    if (offset == s->samples_per_frame - s->min_samples_per_subframe)
302        return s->min_samples_per_subframe;
303
304    len             = av_log2(s->max_num_subframes - 1) + 1;
305    frame_len_ratio = get_bits(&s->gb, len);
306    subframe_len    = s->min_samples_per_subframe * (frame_len_ratio + 1);
307
308    /* sanity check the length */
309    if (subframe_len < s->min_samples_per_subframe ||
310        subframe_len > s->samples_per_frame) {
311        av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
312               subframe_len);
313        return AVERROR_INVALIDDATA;
314    }
315    return subframe_len;
316}
317
318/**
319 * @brief Decode how the data in the frame is split into subframes.
320 *       Every WMA frame contains the encoded data for a fixed number of
321 *       samples per channel. The data for every channel might be split
322 *       into several subframes. This function will reconstruct the list of
323 *       subframes for every channel.
324 *
325 *       If the subframes are not evenly split, the algorithm estimates the
326 *       channels with the lowest number of total samples.
327 *       Afterwards, for each of these channels a bit is read from the
328 *       bitstream that indicates if the channel contains a subframe with the
329 *       next subframe size that is going to be read from the bitstream or not.
330 *       If a channel contains such a subframe, the subframe size gets added to
331 *       the channel's subframe list.
332 *       The algorithm repeats these steps until the frame is properly divided
333 *       between the individual channels.
334 *
335 * @param s context
336 * @return 0 on success, < 0 in case of an error
337 */
338static int decode_tilehdr(WmallDecodeCtx *s)
339{
340    uint16_t num_samples[WMALL_MAX_CHANNELS] = { 0 }; /* sum of samples for all currently known subframes of a channel */
341    uint8_t  contains_subframe[WMALL_MAX_CHANNELS];   /* flag indicating if a channel contains the current subframe */
342    int channels_for_cur_subframe = s->num_channels;  /* number of channels that contain the current subframe */
343    int fixed_channel_layout = 0;                     /* flag indicating that all channels use the same subfra2me offsets and sizes */
344    int min_channel_len = 0;                          /* smallest sum of samples (channels with this length will be processed first) */
345    int c, tile_aligned;
346
347    /* reset tiling information */
348    for (c = 0; c < s->num_channels; c++)
349        s->channel[c].num_subframes = 0;
350
351    tile_aligned = get_bits1(&s->gb);
352    if (s->max_num_subframes == 1 || tile_aligned)
353        fixed_channel_layout = 1;
354
355    /* loop until the frame data is split between the subframes */
356    do {
357        int subframe_len, in_use = 0;
358
359        /* check which channels contain the subframe */
360        for (c = 0; c < s->num_channels; c++) {
361            if (num_samples[c] == min_channel_len) {
362                if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
363                   (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) {
364                    contains_subframe[c] = 1;
365                } else {
366                    contains_subframe[c] = get_bits1(&s->gb);
367                }
368                in_use |= contains_subframe[c];
369            } else
370                contains_subframe[c] = 0;
371        }
372
373        if (!in_use) {
374            av_log(s->avctx, AV_LOG_ERROR,
375                   "Found empty subframe\n");
376            return AVERROR_INVALIDDATA;
377        }
378
379        /* get subframe length, subframe_len == 0 is not allowed */
380        if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
381            return AVERROR_INVALIDDATA;
382        /* add subframes to the individual channels and find new min_channel_len */
383        min_channel_len += subframe_len;
384        for (c = 0; c < s->num_channels; c++) {
385            WmallChannelCtx *chan = &s->channel[c];
386
387            if (contains_subframe[c]) {
388                if (chan->num_subframes >= MAX_SUBFRAMES) {
389                    av_log(s->avctx, AV_LOG_ERROR,
390                           "broken frame: num subframes > 31\n");
391                    return AVERROR_INVALIDDATA;
392                }
393                chan->subframe_len[chan->num_subframes] = subframe_len;
394                num_samples[c] += subframe_len;
395                ++chan->num_subframes;
396                if (num_samples[c] > s->samples_per_frame) {
397                    av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
398                           "channel len(%"PRIu16") > samples_per_frame(%"PRIu16")\n",
399                           num_samples[c], s->samples_per_frame);
400                    return AVERROR_INVALIDDATA;
401                }
402            } else if (num_samples[c] <= min_channel_len) {
403                if (num_samples[c] < min_channel_len) {
404                    channels_for_cur_subframe = 0;
405                    min_channel_len = num_samples[c];
406                }
407                ++channels_for_cur_subframe;
408            }
409        }
410    } while (min_channel_len < s->samples_per_frame);
411
412    for (c = 0; c < s->num_channels; c++) {
413        int i, offset = 0;
414        for (i = 0; i < s->channel[c].num_subframes; i++) {
415            s->channel[c].subframe_offsets[i] = offset;
416            offset += s->channel[c].subframe_len[i];
417        }
418    }
419
420    return 0;
421}
422
423static void decode_ac_filter(WmallDecodeCtx *s)
424{
425    int i;
426    s->acfilter_order   = get_bits(&s->gb, 4) + 1;
427    s->acfilter_scaling = get_bits(&s->gb, 4);
428
429    for (i = 0; i < s->acfilter_order; i++)
430        s->acfilter_coeffs[i] = get_bitsz(&s->gb, s->acfilter_scaling) + 1;
431}
432
433static void decode_mclms(WmallDecodeCtx *s)
434{
435    s->mclms_order   = (get_bits(&s->gb, 4) + 1) * 2;
436    s->mclms_scaling = get_bits(&s->gb, 4);
437    if (get_bits1(&s->gb)) {
438        int i, send_coef_bits;
439        int cbits = av_log2(s->mclms_scaling + 1);
440        if (1 << cbits < s->mclms_scaling + 1)
441            cbits++;
442
443        send_coef_bits = get_bitsz(&s->gb, cbits) + 2;
444
445        for (i = 0; i < s->mclms_order * s->num_channels * s->num_channels; i++)
446            s->mclms_coeffs[i] = get_bits(&s->gb, send_coef_bits);
447
448        for (i = 0; i < s->num_channels; i++) {
449            int c;
450            for (c = 0; c < i; c++)
451                s->mclms_coeffs_cur[i * s->num_channels + c] = get_bits(&s->gb, send_coef_bits);
452        }
453    }
454}
455
456static int decode_cdlms(WmallDecodeCtx *s)
457{
458    int c, i;
459    int cdlms_send_coef = get_bits1(&s->gb);
460
461    for (c = 0; c < s->num_channels; c++) {
462        s->cdlms_ttl[c] = get_bits(&s->gb, 3) + 1;
463        for (i = 0; i < s->cdlms_ttl[c]; i++) {
464            s->cdlms[c][i].order = (get_bits(&s->gb, 7) + 1) * 8;
465            if (s->cdlms[c][i].order > MAX_ORDER) {
466                av_log(s->avctx, AV_LOG_ERROR,
467                       "Order[%d][%d] %d > max (%d), not supported\n",
468                       c, i, s->cdlms[c][i].order, MAX_ORDER);
469                s->cdlms[0][0].order = 0;
470                return AVERROR_INVALIDDATA;
471            }
472            if(s->cdlms[c][i].order & 8 && s->bits_per_sample == 16) {
473                static int warned;
474                if(!warned)
475                    avpriv_request_sample(s->avctx, "CDLMS of order %d",
476                                          s->cdlms[c][i].order);
477                warned = 1;
478            }
479        }
480
481        for (i = 0; i < s->cdlms_ttl[c]; i++)
482            s->cdlms[c][i].scaling = get_bits(&s->gb, 4);
483
484        if (cdlms_send_coef) {
485            for (i = 0; i < s->cdlms_ttl[c]; i++) {
486                int cbits, shift_l, shift_r, j;
487                cbits = av_log2(s->cdlms[c][i].order);
488                if ((1 << cbits) < s->cdlms[c][i].order)
489                    cbits++;
490                s->cdlms[c][i].coefsend = get_bits(&s->gb, cbits) + 1;
491
492                cbits = av_log2(s->cdlms[c][i].scaling + 1);
493                if ((1 << cbits) < s->cdlms[c][i].scaling + 1)
494                    cbits++;
495
496                s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2;
497                shift_l = 32 - s->cdlms[c][i].bitsend;
498                shift_r = 32 - s->cdlms[c][i].scaling - 2;
499                for (j = 0; j < s->cdlms[c][i].coefsend; j++)
500                    s->cdlms[c][i].coefs[j] =
501                        (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
502            }
503        }
504
505        for (i = 0; i < s->cdlms_ttl[c]; i++)
506            memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
507                   0, WMALL_COEFF_PAD_SIZE);
508    }
509
510    return 0;
511}
512
513static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
514{
515    int i = 0;
516    unsigned int ave_mean;
517    s->transient[ch] = get_bits1(&s->gb);
518    if (s->transient[ch]) {
519        s->transient_pos[ch] = get_bits(&s->gb, av_log2(tile_size));
520        if (s->transient_pos[ch])
521            s->transient[ch] = 0;
522        s->channel[ch].transient_counter =
523            FFMAX(s->channel[ch].transient_counter, s->samples_per_frame / 2);
524    } else if (s->channel[ch].transient_counter)
525        s->transient[ch] = 1;
526
527    if (s->seekable_tile) {
528        ave_mean = get_bits(&s->gb, s->bits_per_sample);
529        s->ave_sum[ch] = ave_mean << (s->movave_scaling + 1);
530    }
531
532    if (s->seekable_tile) {
533        if (s->do_inter_ch_decorr)
534            s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample + 1);
535        else
536            s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample);
537        i++;
538    }
539    for (; i < tile_size; i++) {
540        int rem, rem_bits;
541        unsigned quo = 0, residue;
542        while(get_bits1(&s->gb)) {
543            quo++;
544            if (get_bits_left(&s->gb) <= 0)
545                return -1;
546        }
547        if (quo >= 32)
548            quo += get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1);
549
550        ave_mean = (s->ave_sum[ch] + (1 << s->movave_scaling)) >> (s->movave_scaling + 1);
551        if (ave_mean <= 1)
552            residue = quo;
553        else {
554            rem_bits = av_ceil_log2(ave_mean);
555            rem      = get_bits_long(&s->gb, rem_bits);
556            residue  = (quo << rem_bits) + rem;
557        }
558
559        s->ave_sum[ch] = residue + s->ave_sum[ch] -
560                         (s->ave_sum[ch] >> s->movave_scaling);
561
562        residue = (residue >> 1) ^ -(residue & 1);
563        s->channel_residues[ch][i] = residue;
564    }
565
566    return 0;
567
568}
569
570static void decode_lpc(WmallDecodeCtx *s)
571{
572    int ch, i, cbits;
573    s->lpc_order   = get_bits(&s->gb, 5) + 1;
574    s->lpc_scaling = get_bits(&s->gb, 4);
575    s->lpc_intbits = get_bits(&s->gb, 3) + 1;
576    cbits = s->lpc_scaling + s->lpc_intbits;
577    for (ch = 0; ch < s->num_channels; ch++)
578        for (i = 0; i < s->lpc_order; i++)
579            s->lpc_coefs[ch][i] = get_sbits(&s->gb, cbits);
580}
581
582static void clear_codec_buffers(WmallDecodeCtx *s)
583{
584    int ich, ilms;
585
586    memset(s->acfilter_coeffs,     0, sizeof(s->acfilter_coeffs));
587    memset(s->acfilter_prevvalues, 0, sizeof(s->acfilter_prevvalues));
588    memset(s->lpc_coefs,           0, sizeof(s->lpc_coefs));
589
590    memset(s->mclms_coeffs,     0, sizeof(s->mclms_coeffs));
591    memset(s->mclms_coeffs_cur, 0, sizeof(s->mclms_coeffs_cur));
592    memset(s->mclms_prevvalues, 0, sizeof(s->mclms_prevvalues));
593    memset(s->mclms_updates,    0, sizeof(s->mclms_updates));
594
595    for (ich = 0; ich < s->num_channels; ich++) {
596        for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++) {
597            memset(s->cdlms[ich][ilms].coefs, 0,
598                   sizeof(s->cdlms[ich][ilms].coefs));
599            memset(s->cdlms[ich][ilms].lms_prevvalues, 0,
600                   sizeof(s->cdlms[ich][ilms].lms_prevvalues));
601            memset(s->cdlms[ich][ilms].lms_updates, 0,
602                   sizeof(s->cdlms[ich][ilms].lms_updates));
603        }
604        s->ave_sum[ich] = 0;
605    }
606}
607
608/**
609 * @brief Reset filter parameters and transient area at new seekable tile.
610 */
611static void reset_codec(WmallDecodeCtx *s)
612{
613    int ich, ilms;
614    s->mclms_recent = s->mclms_order * s->num_channels;
615    for (ich = 0; ich < s->num_channels; ich++) {
616        for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++)
617            s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order;
618        /* first sample of a seekable subframe is considered as the starting of
619            a transient area which is samples_per_frame samples long */
620        s->channel[ich].transient_counter = s->samples_per_frame;
621        s->transient[ich]     = 1;
622        s->transient_pos[ich] = 0;
623    }
624}
625
626static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
627{
628    int i, j, ich, pred_error;
629    int order        = s->mclms_order;
630    int num_channels = s->num_channels;
631    int range        = 1 << (s->bits_per_sample - 1);
632
633    for (ich = 0; ich < num_channels; ich++) {
634        pred_error = s->channel_residues[ich][icoef] - (unsigned)pred[ich];
635        if (pred_error > 0) {
636            for (i = 0; i < order * num_channels; i++)
637                s->mclms_coeffs[i + ich * order * num_channels] +=
638                    s->mclms_updates[s->mclms_recent + i];
639            for (j = 0; j < ich; j++)
640                s->mclms_coeffs_cur[ich * num_channels + j] += WMASIGN(s->channel_residues[j][icoef]);
641        } else if (pred_error < 0) {
642            for (i = 0; i < order * num_channels; i++)
643                s->mclms_coeffs[i + ich * order * num_channels] -=
644                    s->mclms_updates[s->mclms_recent + i];
645            for (j = 0; j < ich; j++)
646                s->mclms_coeffs_cur[ich * num_channels + j] -= WMASIGN(s->channel_residues[j][icoef]);
647        }
648    }
649
650    for (ich = num_channels - 1; ich >= 0; ich--) {
651        s->mclms_recent--;
652        s->mclms_prevvalues[s->mclms_recent] = av_clip(s->channel_residues[ich][icoef],
653            -range, range - 1);
654        s->mclms_updates[s->mclms_recent] = WMASIGN(s->channel_residues[ich][icoef]);
655    }
656
657    if (s->mclms_recent == 0) {
658        memcpy(&s->mclms_prevvalues[order * num_channels],
659               s->mclms_prevvalues,
660               sizeof(int32_t) * order * num_channels);
661        memcpy(&s->mclms_updates[order * num_channels],
662               s->mclms_updates,
663               sizeof(int32_t) * order * num_channels);
664        s->mclms_recent = num_channels * order;
665    }
666}
667
668static void mclms_predict(WmallDecodeCtx *s, int icoef, int *pred)
669{
670    int ich, i;
671    int order        = s->mclms_order;
672    int num_channels = s->num_channels;
673
674    for (ich = 0; ich < num_channels; ich++) {
675        pred[ich] = 0;
676        if (!s->is_channel_coded[ich])
677            continue;
678        for (i = 0; i < order * num_channels; i++)
679            pred[ich] += (uint32_t)s->mclms_prevvalues[i + s->mclms_recent] *
680                         s->mclms_coeffs[i + order * num_channels * ich];
681        for (i = 0; i < ich; i++)
682            pred[ich] += (uint32_t)s->channel_residues[i][icoef] *
683                         s->mclms_coeffs_cur[i + num_channels * ich];
684        pred[ich] += (1U << s->mclms_scaling) >> 1;
685        pred[ich] >>= s->mclms_scaling;
686        s->channel_residues[ich][icoef] += (unsigned)pred[ich];
687    }
688}
689
690static void revert_mclms(WmallDecodeCtx *s, int tile_size)
691{
692    int icoef, pred[WMALL_MAX_CHANNELS] = { 0 };
693    for (icoef = 0; icoef < tile_size; icoef++) {
694        mclms_predict(s, icoef, pred);
695        mclms_update(s, icoef, pred);
696    }
697}
698
699static void use_high_update_speed(WmallDecodeCtx *s, int ich)
700{
701    int ilms, recent, icoef;
702    for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
703        recent = s->cdlms[ich][ilms].recent;
704        if (s->update_speed[ich] == 16)
705            continue;
706        if (s->bV3RTM) {
707            for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
708                s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2;
709        } else {
710            for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
711                s->cdlms[ich][ilms].lms_updates[icoef] *= 2;
712        }
713    }
714    s->update_speed[ich] = 16;
715}
716
717static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
718{
719    int ilms, recent, icoef;
720    for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
721        recent = s->cdlms[ich][ilms].recent;
722        if (s->update_speed[ich] == 8)
723            continue;
724        if (s->bV3RTM)
725            for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
726                s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2;
727        else
728            for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
729                s->cdlms[ich][ilms].lms_updates[icoef] /= 2;
730    }
731    s->update_speed[ich] = 8;
732}
733
734#define CD_LMS(bits, ROUND) \
735static void lms_update ## bits (WmallDecodeCtx *s, int ich, int ilms, int input) \
736{ \
737    int recent = s->cdlms[ich][ilms].recent; \
738    int range  = 1 << s->bits_per_sample - 1; \
739    int order  = s->cdlms[ich][ilms].order; \
740    int ##bits##_t *prev = (int##bits##_t *)s->cdlms[ich][ilms].lms_prevvalues; \
741 \
742    if (recent) \
743        recent--; \
744    else { \
745        memcpy(prev + order, prev, (bits/8) * order); \
746        memcpy(s->cdlms[ich][ilms].lms_updates + order, \
747               s->cdlms[ich][ilms].lms_updates, \
748               sizeof(*s->cdlms[ich][ilms].lms_updates) * order); \
749        recent = order - 1; \
750    } \
751 \
752    prev[recent] = av_clip(input, -range, range - 1); \
753    s->cdlms[ich][ilms].lms_updates[recent] = WMASIGN(input) * s->update_speed[ich]; \
754 \
755    s->cdlms[ich][ilms].lms_updates[recent + (order >> 4)] >>= 2; \
756    s->cdlms[ich][ilms].lms_updates[recent + (order >> 3)] >>= 1; \
757    s->cdlms[ich][ilms].recent = recent; \
758    memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0, \
759           sizeof(s->cdlms[ich][ilms].lms_updates) - \
760           sizeof(*s->cdlms[ich][ilms].lms_updates)*(recent+order)); \
761} \
762 \
763static void revert_cdlms ## bits (WmallDecodeCtx *s, int ch, \
764                                  int coef_begin, int coef_end) \
765{ \
766    int icoef, ilms, num_lms, residue, input; \
767    unsigned pred;\
768 \
769    num_lms = s->cdlms_ttl[ch]; \
770    for (ilms = num_lms - 1; ilms >= 0; ilms--) { \
771        for (icoef = coef_begin; icoef < coef_end; icoef++) { \
772            int##bits##_t *prevvalues = (int##bits##_t *)s->cdlms[ch][ilms].lms_prevvalues; \
773            pred = (1 << s->cdlms[ch][ilms].scaling) >> 1; \
774            residue = s->channel_residues[ch][icoef]; \
775            pred += s->dsp.scalarproduct_and_madd_int## bits (s->cdlms[ch][ilms].coefs, \
776                                                        prevvalues + s->cdlms[ch][ilms].recent, \
777                                                        s->cdlms[ch][ilms].lms_updates + \
778                                                        s->cdlms[ch][ilms].recent, \
779                                                        FFALIGN(s->cdlms[ch][ilms].order, ROUND), \
780                                                        WMASIGN(residue)); \
781            input = residue + (unsigned)((int)pred >> s->cdlms[ch][ilms].scaling); \
782            lms_update ## bits(s, ch, ilms, input); \
783            s->channel_residues[ch][icoef] = input; \
784        } \
785    } \
786    if (bits <= 16) emms_c(); \
787}
788
789CD_LMS(16, WMALL_COEFF_PAD_SIZE)
790CD_LMS(32, 8)
791
792static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size)
793{
794    if (s->num_channels != 2)
795        return;
796    else if (s->is_channel_coded[0] || s->is_channel_coded[1]) {
797        int icoef;
798        for (icoef = 0; icoef < tile_size; icoef++) {
799            s->channel_residues[0][icoef] -= (unsigned)(s->channel_residues[1][icoef] >> 1);
800            s->channel_residues[1][icoef] += (unsigned) s->channel_residues[0][icoef];
801        }
802    }
803}
804
805static void revert_acfilter(WmallDecodeCtx *s, int tile_size)
806{
807    int ich, pred, i, j;
808    int16_t *filter_coeffs = s->acfilter_coeffs;
809    int scaling            = s->acfilter_scaling;
810    int order              = s->acfilter_order;
811
812    for (ich = 0; ich < s->num_channels; ich++) {
813        int *prevvalues = s->acfilter_prevvalues[ich];
814        for (i = 0; i < order; i++) {
815            pred = 0;
816            for (j = 0; j < order; j++) {
817                if (i <= j)
818                    pred += (uint32_t)filter_coeffs[j] * prevvalues[j - i];
819                else
820                    pred += (uint32_t)s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
821            }
822            pred >>= scaling;
823            s->channel_residues[ich][i] += (unsigned)pred;
824        }
825        for (i = order; i < tile_size; i++) {
826            pred = 0;
827            for (j = 0; j < order; j++)
828                pred += (uint32_t)s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
829            pred >>= scaling;
830            s->channel_residues[ich][i] += (unsigned)pred;
831        }
832        for (j = order - 1; j >= 0; j--)
833            if (tile_size <= j) {
834                prevvalues[j] = prevvalues[j - tile_size];
835            }else
836                prevvalues[j] = s->channel_residues[ich][tile_size - j - 1];
837    }
838}
839
840static int decode_subframe(WmallDecodeCtx *s)
841{
842    int offset        = s->samples_per_frame;
843    int subframe_len  = s->samples_per_frame;
844    int total_samples = s->samples_per_frame * s->num_channels;
845    int i, j, rawpcm_tile, padding_zeroes, res;
846
847    s->subframe_offset = get_bits_count(&s->gb);
848
849    /* reset channel context and find the next block offset and size
850        == the next block of the channel with the smallest number of
851        decoded samples */
852    for (i = 0; i < s->num_channels; i++) {
853        if (offset > s->channel[i].decoded_samples) {
854            offset = s->channel[i].decoded_samples;
855            subframe_len =
856                s->channel[i].subframe_len[s->channel[i].cur_subframe];
857        }
858    }
859
860    /* get a list of all channels that contain the estimated block */
861    s->channels_for_cur_subframe = 0;
862    for (i = 0; i < s->num_channels; i++) {
863        const int cur_subframe = s->channel[i].cur_subframe;
864        /* subtract already processed samples */
865        total_samples -= s->channel[i].decoded_samples;
866
867        /* and count if there are multiple subframes that match our profile */
868        if (offset == s->channel[i].decoded_samples &&
869            subframe_len == s->channel[i].subframe_len[cur_subframe]) {
870            total_samples -= s->channel[i].subframe_len[cur_subframe];
871            s->channel[i].decoded_samples +=
872                s->channel[i].subframe_len[cur_subframe];
873            s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
874            ++s->channels_for_cur_subframe;
875        }
876    }
877
878    /* check if the frame will be complete after processing the
879        estimated block */
880    if (!total_samples)
881        s->parsed_all_subframes = 1;
882
883
884    s->seekable_tile = get_bits1(&s->gb);
885    if (s->seekable_tile) {
886        clear_codec_buffers(s);
887
888        s->do_arith_coding    = get_bits1(&s->gb);
889        if (s->do_arith_coding) {
890            avpriv_request_sample(s->avctx, "Arithmetic coding");
891            return AVERROR_PATCHWELCOME;
892        }
893        s->do_ac_filter       = get_bits1(&s->gb);
894        s->do_inter_ch_decorr = get_bits1(&s->gb);
895        s->do_mclms           = get_bits1(&s->gb);
896
897        if (s->do_ac_filter)
898            decode_ac_filter(s);
899
900        if (s->do_mclms)
901            decode_mclms(s);
902
903        if ((res = decode_cdlms(s)) < 0)
904            return res;
905        s->movave_scaling = get_bits(&s->gb, 3);
906        s->quant_stepsize = get_bits(&s->gb, 8) + 1;
907
908        reset_codec(s);
909    }
910
911    rawpcm_tile = get_bits1(&s->gb);
912
913    if (!rawpcm_tile && !s->cdlms[0][0].order) {
914        av_log(s->avctx, AV_LOG_DEBUG,
915               "Waiting for seekable tile\n");
916        av_frame_unref(s->frame);
917        return -1;
918    }
919
920
921    for (i = 0; i < s->num_channels; i++)
922        s->is_channel_coded[i] = 1;
923
924    if (!rawpcm_tile) {
925        for (i = 0; i < s->num_channels; i++)
926            s->is_channel_coded[i] = get_bits1(&s->gb);
927
928        if (s->bV3RTM) {
929            // LPC
930            s->do_lpc = get_bits1(&s->gb);
931            if (s->do_lpc) {
932                decode_lpc(s);
933                avpriv_request_sample(s->avctx, "Expect wrong output since "
934                                      "inverse LPC filter");
935            }
936        } else
937            s->do_lpc = 0;
938    }
939
940    if (get_bits_left(&s->gb) < 1)
941        return AVERROR_INVALIDDATA;
942
943    if (get_bits1(&s->gb))
944        padding_zeroes = get_bits(&s->gb, 5);
945    else
946        padding_zeroes = 0;
947
948    if (rawpcm_tile) {
949        int bits = s->bits_per_sample - padding_zeroes;
950        if (bits <= 0) {
951            av_log(s->avctx, AV_LOG_ERROR,
952                   "Invalid number of padding bits in raw PCM tile\n");
953            return AVERROR_INVALIDDATA;
954        }
955        ff_dlog(s->avctx, "RAWPCM %d bits per sample. "
956                "total %d bits, remain=%d\n", bits,
957                bits * s->num_channels * subframe_len, get_bits_count(&s->gb));
958        for (i = 0; i < s->num_channels; i++)
959            for (j = 0; j < subframe_len; j++)
960                s->channel_residues[i][j] = get_sbits_long(&s->gb, bits);
961    } else {
962        if (s->bits_per_sample < padding_zeroes)
963            return AVERROR_INVALIDDATA;
964        for (i = 0; i < s->num_channels; i++) {
965            if (s->is_channel_coded[i]) {
966                decode_channel_residues(s, i, subframe_len);
967                if (s->seekable_tile)
968                    use_high_update_speed(s, i);
969                else
970                    use_normal_update_speed(s, i);
971                if (s->bits_per_sample > 16)
972                    revert_cdlms32(s, i, 0, subframe_len);
973                else
974                    revert_cdlms16(s, i, 0, subframe_len);
975            } else {
976                memset(s->channel_residues[i], 0, sizeof(**s->channel_residues) * subframe_len);
977            }
978        }
979
980        if (s->do_mclms)
981            revert_mclms(s, subframe_len);
982        if (s->do_inter_ch_decorr)
983            revert_inter_ch_decorr(s, subframe_len);
984        if (s->do_ac_filter)
985            revert_acfilter(s, subframe_len);
986
987        /* Dequantize */
988        if (s->quant_stepsize != 1)
989            for (i = 0; i < s->num_channels; i++)
990                for (j = 0; j < subframe_len; j++)
991                    s->channel_residues[i][j] *= (unsigned)s->quant_stepsize;
992    }
993
994    /* Write to proper output buffer depending on bit-depth */
995    for (i = 0; i < s->channels_for_cur_subframe; i++) {
996        int c = s->channel_indexes_for_cur_subframe[i];
997        int subframe_len = s->channel[c].subframe_len[s->channel[c].cur_subframe];
998
999        for (j = 0; j < subframe_len; j++) {
1000            if (s->bits_per_sample == 16) {
1001                *s->samples_16[c]++ = (int16_t) s->channel_residues[c][j] * (1 << padding_zeroes);
1002            } else {
1003                *s->samples_32[c]++ = s->channel_residues[c][j] * (256U << padding_zeroes);
1004            }
1005        }
1006    }
1007
1008    /* handled one subframe */
1009    for (i = 0; i < s->channels_for_cur_subframe; i++) {
1010        int c = s->channel_indexes_for_cur_subframe[i];
1011        if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1012            av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1013            return AVERROR_INVALIDDATA;
1014        }
1015        ++s->channel[c].cur_subframe;
1016    }
1017    return 0;
1018}
1019
1020/**
1021 * @brief Decode one WMA frame.
1022 * @param s codec context
1023 * @return 0 if the trailer bit indicates that this is the last frame,
1024 *         1 if there are additional frames
1025 */
1026static int decode_frame(WmallDecodeCtx *s)
1027{
1028    GetBitContext* gb = &s->gb;
1029    int more_frames = 0, len = 0, i, ret;
1030
1031    s->frame->nb_samples = s->samples_per_frame;
1032    if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0) {
1033        /* return an error if no frame could be decoded at all */
1034        s->packet_loss = 1;
1035        s->frame->nb_samples = 0;
1036        return ret;
1037    }
1038    for (i = 0; i < s->num_channels; i++) {
1039        s->samples_16[i] = (int16_t *)s->frame->extended_data[i];
1040        s->samples_32[i] = (int32_t *)s->frame->extended_data[i];
1041    }
1042
1043    /* get frame length */
1044    if (s->len_prefix)
1045        len = get_bits(gb, s->log2_frame_size);
1046
1047    /* decode tile information */
1048    if ((ret = decode_tilehdr(s))) {
1049        s->packet_loss = 1;
1050        av_frame_unref(s->frame);
1051        return ret;
1052    }
1053
1054    /* read drc info */
1055    if (s->dynamic_range_compression)
1056        s->drc_gain = get_bits(gb, 8);
1057
1058    /* no idea what these are for, might be the number of samples
1059       that need to be skipped at the beginning or end of a stream */
1060    if (get_bits1(gb)) {
1061        int av_unused skip;
1062
1063        /* usually true for the first frame */
1064        if (get_bits1(gb)) {
1065            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1066            ff_dlog(s->avctx, "start skip: %i\n", skip);
1067        }
1068
1069        /* sometimes true for the last frame */
1070        if (get_bits1(gb)) {
1071            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1072            ff_dlog(s->avctx, "end skip: %i\n", skip);
1073            s->frame->nb_samples -= skip;
1074            if (s->frame->nb_samples <= 0)
1075                return AVERROR_INVALIDDATA;
1076        }
1077
1078    }
1079
1080    /* reset subframe states */
1081    s->parsed_all_subframes = 0;
1082    for (i = 0; i < s->num_channels; i++) {
1083        s->channel[i].decoded_samples = 0;
1084        s->channel[i].cur_subframe    = 0;
1085    }
1086
1087    /* decode all subframes */
1088    while (!s->parsed_all_subframes) {
1089        int decoded_samples = s->channel[0].decoded_samples;
1090        if (decode_subframe(s) < 0) {
1091            s->packet_loss = 1;
1092            if (s->frame->nb_samples)
1093                s->frame->nb_samples = decoded_samples;
1094            return 0;
1095        }
1096    }
1097
1098    ff_dlog(s->avctx, "Frame done\n");
1099
1100    s->skip_frame = 0;
1101
1102    if (s->len_prefix) {
1103        if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1104            /* FIXME: not sure if this is always an error */
1105            av_log(s->avctx, AV_LOG_ERROR,
1106                   "frame[%"PRIu32"] would have to skip %i bits\n",
1107                   s->frame_num,
1108                   len - (get_bits_count(gb) - s->frame_offset) - 1);
1109            s->packet_loss = 1;
1110            return 0;
1111        }
1112
1113        /* skip the rest of the frame data */
1114        skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1115    }
1116
1117    /* decode trailer bit */
1118    more_frames = get_bits1(gb);
1119    ++s->frame_num;
1120    return more_frames;
1121}
1122
1123/**
1124 * @brief Calculate remaining input buffer length.
1125 * @param s  codec context
1126 * @param gb bitstream reader context
1127 * @return remaining size in bits
1128 */
1129static int remaining_bits(WmallDecodeCtx *s, GetBitContext *gb)
1130{
1131    return s->buf_bit_size - get_bits_count(gb);
1132}
1133
1134/**
1135 * @brief Fill the bit reservoir with a (partial) frame.
1136 * @param s      codec context
1137 * @param gb     bitstream reader context
1138 * @param len    length of the partial frame
1139 * @param append decides whether to reset the buffer or not
1140 */
1141static void save_bits(WmallDecodeCtx *s, GetBitContext* gb, int len,
1142                      int append)
1143{
1144    int buflen;
1145    PutBitContext tmp;
1146
1147    /* when the frame data does not need to be concatenated, the input buffer
1148        is reset and additional bits from the previous frame are copied
1149        and skipped later so that a fast byte copy is possible */
1150
1151    if (!append) {
1152        s->frame_offset   = get_bits_count(gb) & 7;
1153        s->num_saved_bits = s->frame_offset;
1154        init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1155    }
1156
1157    buflen = (s->num_saved_bits + len + 8) >> 3;
1158
1159    if (len <= 0 || buflen > s->max_frame_size) {
1160        avpriv_request_sample(s->avctx, "Too small input buffer");
1161        s->packet_loss = 1;
1162        s->num_saved_bits = 0;
1163        return;
1164    }
1165
1166    s->num_saved_bits += len;
1167    if (!append) {
1168        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1169                         s->num_saved_bits);
1170    } else {
1171        int align = 8 - (get_bits_count(gb) & 7);
1172        align = FFMIN(align, len);
1173        put_bits(&s->pb, align, get_bits(gb, align));
1174        len -= align;
1175        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1176    }
1177    skip_bits_long(gb, len);
1178
1179    tmp = s->pb;
1180    flush_put_bits(&tmp);
1181
1182    init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1183    skip_bits(&s->gb, s->frame_offset);
1184}
1185
1186static int decode_packet(AVCodecContext *avctx, AVFrame *rframe,
1187                         int *got_frame_ptr, AVPacket* avpkt)
1188{
1189    WmallDecodeCtx *s = avctx->priv_data;
1190    GetBitContext* gb  = &s->pgb;
1191    const uint8_t* buf = avpkt->data;
1192    int buf_size       = avpkt->size;
1193    int num_bits_prev_frame, packet_sequence_number, spliced_packet;
1194
1195    s->frame->nb_samples = 0;
1196
1197    if (!buf_size && s->num_saved_bits > get_bits_count(&s->gb)) {
1198        s->packet_done = 0;
1199        if (!decode_frame(s))
1200            s->num_saved_bits = 0;
1201    } else if (s->packet_done || s->packet_loss) {
1202        s->packet_done = 0;
1203
1204        if (!buf_size)
1205            return 0;
1206
1207        s->next_packet_start = buf_size - FFMIN(avctx->block_align, buf_size);
1208        buf_size             = FFMIN(avctx->block_align, buf_size);
1209        s->buf_bit_size      = buf_size << 3;
1210
1211        /* parse packet header */
1212        init_get_bits(gb, buf, s->buf_bit_size);
1213        packet_sequence_number = get_bits(gb, 4);
1214        skip_bits(gb, 1);   // Skip seekable_frame_in_packet, currently unused
1215        spliced_packet = get_bits1(gb);
1216        if (spliced_packet)
1217            avpriv_request_sample(avctx, "Bitstream splicing");
1218
1219        /* get number of bits that need to be added to the previous frame */
1220        num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1221
1222        /* check for packet loss */
1223        if (!s->packet_loss &&
1224            ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1225            s->packet_loss = 1;
1226            av_log(avctx, AV_LOG_ERROR,
1227                   "Packet loss detected! seq %"PRIx8" vs %x\n",
1228                   s->packet_sequence_number, packet_sequence_number);
1229        }
1230        s->packet_sequence_number = packet_sequence_number;
1231
1232        if (num_bits_prev_frame > 0) {
1233            int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1234            if (num_bits_prev_frame >= remaining_packet_bits) {
1235                num_bits_prev_frame = remaining_packet_bits;
1236                s->packet_done = 1;
1237            }
1238
1239            /* Append the previous frame data to the remaining data from the
1240             * previous packet to create a full frame. */
1241            save_bits(s, gb, num_bits_prev_frame, 1);
1242
1243            /* decode the cross packet frame if it is valid */
1244            if (num_bits_prev_frame < remaining_packet_bits && !s->packet_loss)
1245                decode_frame(s);
1246        } else if (s->num_saved_bits - s->frame_offset) {
1247            ff_dlog(avctx, "ignoring %x previously saved bits\n",
1248                    s->num_saved_bits - s->frame_offset);
1249        }
1250
1251        if (s->packet_loss) {
1252            /* Reset number of saved bits so that the decoder does not start
1253             * to decode incomplete frames in the s->len_prefix == 0 case. */
1254            s->num_saved_bits = 0;
1255            s->packet_loss    = 0;
1256            init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1257        }
1258
1259    } else {
1260        int frame_size;
1261
1262        s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1263        init_get_bits(gb, avpkt->data, s->buf_bit_size);
1264        skip_bits(gb, s->packet_offset);
1265
1266        if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1267            (frame_size = show_bits(gb, s->log2_frame_size)) &&
1268            frame_size <= remaining_bits(s, gb)) {
1269            save_bits(s, gb, frame_size, 0);
1270
1271            if (!s->packet_loss)
1272                s->packet_done = !decode_frame(s);
1273        } else if (!s->len_prefix
1274                   && s->num_saved_bits > get_bits_count(&s->gb)) {
1275            /* when the frames do not have a length prefix, we don't know the
1276             * compressed length of the individual frames however, we know what
1277             * part of a new packet belongs to the previous frame therefore we
1278             * save the incoming packet first, then we append the "previous
1279             * frame" data from the next packet so that we get a buffer that
1280             * only contains full frames */
1281            s->packet_done = !decode_frame(s);
1282        } else {
1283            s->packet_done = 1;
1284        }
1285    }
1286
1287    if (remaining_bits(s, gb) < 0) {
1288        av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -remaining_bits(s, gb));
1289        s->packet_loss = 1;
1290    }
1291
1292    if (s->packet_done && !s->packet_loss &&
1293        remaining_bits(s, gb) > 0) {
1294        /* save the rest of the data so that it can be decoded
1295         * with the next packet */
1296        save_bits(s, gb, remaining_bits(s, gb), 0);
1297    }
1298
1299    *got_frame_ptr   = s->frame->nb_samples > 0;
1300    av_frame_move_ref(rframe, s->frame);
1301
1302    s->packet_offset = get_bits_count(gb) & 7;
1303
1304    return (s->packet_loss) ? AVERROR_INVALIDDATA : buf_size ? get_bits_count(gb) >> 3 : 0;
1305}
1306
1307static void flush(AVCodecContext *avctx)
1308{
1309    WmallDecodeCtx *s    = avctx->priv_data;
1310    s->packet_loss       = 1;
1311    s->packet_done       = 0;
1312    s->num_saved_bits    = 0;
1313    s->frame_offset      = 0;
1314    s->next_packet_start = 0;
1315    s->cdlms[0][0].order = 0;
1316    s->frame->nb_samples = 0;
1317    init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1318}
1319
1320static av_cold int decode_close(AVCodecContext *avctx)
1321{
1322    WmallDecodeCtx *s = avctx->priv_data;
1323
1324    av_frame_free(&s->frame);
1325    av_freep(&s->frame_data);
1326
1327    return 0;
1328}
1329
1330const FFCodec ff_wmalossless_decoder = {
1331    .p.name         = "wmalossless",
1332    .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"),
1333    .p.type         = AVMEDIA_TYPE_AUDIO,
1334    .p.id           = AV_CODEC_ID_WMALOSSLESS,
1335    .priv_data_size = sizeof(WmallDecodeCtx),
1336    .init           = decode_init,
1337    .close          = decode_close,
1338    FF_CODEC_DECODE_CB(decode_packet),
1339    .flush          = flush,
1340    .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
1341    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1342    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
1343                                                      AV_SAMPLE_FMT_S32P,
1344                                                      AV_SAMPLE_FMT_NONE },
1345};
1346