xref: /third_party/ffmpeg/libavcodec/apedec.c (revision cabdff1a)
1/*
2 * Monkey's Audio lossless audio decoder
3 * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
4 *  based upon libdemac from Dave Chapman.
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include <inttypes.h>
24
25#include "libavutil/avassert.h"
26#include "libavutil/channel_layout.h"
27#include "libavutil/crc.h"
28#include "libavutil/opt.h"
29#include "lossless_audiodsp.h"
30#include "avcodec.h"
31#include "bswapdsp.h"
32#include "bytestream.h"
33#include "codec_internal.h"
34#include "internal.h"
35#include "get_bits.h"
36#include "unary.h"
37
38/**
39 * @file
40 * Monkey's Audio lossless audio decoder
41 */
42
43#define MAX_CHANNELS        2
44#define MAX_BYTESPERSAMPLE  3
45
46#define APE_FRAMECODE_MONO_SILENCE    1
47#define APE_FRAMECODE_STEREO_SILENCE  3
48#define APE_FRAMECODE_PSEUDO_STEREO   4
49
50#define HISTORY_SIZE 512
51#define PREDICTOR_ORDER 8
52/** Total size of all predictor histories */
53#define PREDICTOR_SIZE 50
54
55#define YDELAYA (18 + PREDICTOR_ORDER*4)
56#define YDELAYB (18 + PREDICTOR_ORDER*3)
57#define XDELAYA (18 + PREDICTOR_ORDER*2)
58#define XDELAYB (18 + PREDICTOR_ORDER)
59
60#define YADAPTCOEFFSA 18
61#define XADAPTCOEFFSA 14
62#define YADAPTCOEFFSB 10
63#define XADAPTCOEFFSB 5
64
65/**
66 * Possible compression levels
67 * @{
68 */
69enum APECompressionLevel {
70    COMPRESSION_LEVEL_FAST       = 1000,
71    COMPRESSION_LEVEL_NORMAL     = 2000,
72    COMPRESSION_LEVEL_HIGH       = 3000,
73    COMPRESSION_LEVEL_EXTRA_HIGH = 4000,
74    COMPRESSION_LEVEL_INSANE     = 5000
75};
76/** @} */
77
78#define APE_FILTER_LEVELS 3
79
80/** Filter orders depending on compression level */
81static const uint16_t ape_filter_orders[5][APE_FILTER_LEVELS] = {
82    {  0,   0,    0 },
83    { 16,   0,    0 },
84    { 64,   0,    0 },
85    { 32, 256,    0 },
86    { 16, 256, 1280 }
87};
88
89/** Filter fraction bits depending on compression level */
90static const uint8_t ape_filter_fracbits[5][APE_FILTER_LEVELS] = {
91    {  0,  0,  0 },
92    { 11,  0,  0 },
93    { 11,  0,  0 },
94    { 10, 13,  0 },
95    { 11, 13, 15 }
96};
97
98
99/** Filters applied to the decoded data */
100typedef struct APEFilter {
101    int16_t *coeffs;        ///< actual coefficients used in filtering
102    int16_t *adaptcoeffs;   ///< adaptive filter coefficients used for correcting of actual filter coefficients
103    int16_t *historybuffer; ///< filter memory
104    int16_t *delay;         ///< filtered values
105
106    uint32_t avg;
107} APEFilter;
108
109typedef struct APERice {
110    uint32_t k;
111    uint32_t ksum;
112} APERice;
113
114typedef struct APERangecoder {
115    uint32_t low;           ///< low end of interval
116    uint32_t range;         ///< length of interval
117    uint32_t help;          ///< bytes_to_follow resp. intermediate value
118    unsigned int buffer;    ///< buffer for input/output
119} APERangecoder;
120
121/** Filter histories */
122typedef struct APEPredictor {
123    int32_t *buf;
124
125    int32_t lastA[2];
126
127    int32_t filterA[2];
128    int32_t filterB[2];
129
130    uint32_t coeffsA[2][4];  ///< adaption coefficients
131    uint32_t coeffsB[2][5];  ///< adaption coefficients
132    int32_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE];
133
134    unsigned int sample_pos;
135} APEPredictor;
136
137typedef struct APEPredictor64 {
138    int64_t *buf;
139
140    int64_t lastA[2];
141
142    int64_t filterA[2];
143    int64_t filterB[2];
144
145    uint64_t coeffsA[2][4];  ///< adaption coefficients
146    uint64_t coeffsB[2][5];  ///< adaption coefficients
147    int64_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE];
148
149    unsigned int sample_pos;
150} APEPredictor64;
151
152/** Decoder context */
153typedef struct APEContext {
154    AVClass *class;                          ///< class for AVOptions
155    AVCodecContext *avctx;
156    BswapDSPContext bdsp;
157    LLAudDSPContext adsp;
158    int channels;
159    int samples;                             ///< samples left to decode in current frame
160    int bps;
161
162    int fileversion;                         ///< codec version, very important in decoding process
163    int compression_level;                   ///< compression levels
164    int fset;                                ///< which filter set to use (calculated from compression level)
165    int flags;                               ///< global decoder flags
166
167    uint32_t CRC;                            ///< signalled frame CRC
168    uint32_t CRC_state;                      ///< accumulated CRC
169    int frameflags;                          ///< frame flags
170    APEPredictor predictor;                  ///< predictor used for final reconstruction
171    APEPredictor64 predictor64;              ///< 64bit predictor used for final reconstruction
172
173    int32_t *decoded_buffer;
174    int decoded_size;
175    int32_t *decoded[MAX_CHANNELS];          ///< decoded data for each channel
176    int blocks_per_loop;                     ///< maximum number of samples to decode for each call
177
178    int16_t* filterbuf[APE_FILTER_LEVELS];   ///< filter memory
179
180    APERangecoder rc;                        ///< rangecoder used to decode actual values
181    APERice riceX;                           ///< rice code parameters for the second channel
182    APERice riceY;                           ///< rice code parameters for the first channel
183    APEFilter filters[APE_FILTER_LEVELS][2]; ///< filters used for reconstruction
184    GetBitContext gb;
185
186    uint8_t *data;                           ///< current frame data
187    uint8_t *data_end;                       ///< frame data end
188    int data_size;                           ///< frame data allocated size
189    const uint8_t *ptr;                      ///< current position in frame data
190
191    int error;
192
193    void (*entropy_decode_mono)(struct APEContext *ctx, int blockstodecode);
194    void (*entropy_decode_stereo)(struct APEContext *ctx, int blockstodecode);
195    void (*predictor_decode_mono)(struct APEContext *ctx, int count);
196    void (*predictor_decode_stereo)(struct APEContext *ctx, int count);
197} APEContext;
198
199static void ape_apply_filters(APEContext *ctx, int32_t *decoded0,
200                              int32_t *decoded1, int count);
201
202static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode);
203static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode);
204static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode);
205static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode);
206static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode);
207static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode);
208static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode);
209static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode);
210static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode);
211
212static void predictor_decode_mono_3800(APEContext *ctx, int count);
213static void predictor_decode_stereo_3800(APEContext *ctx, int count);
214static void predictor_decode_mono_3930(APEContext *ctx, int count);
215static void predictor_decode_stereo_3930(APEContext *ctx, int count);
216static void predictor_decode_mono_3950(APEContext *ctx, int count);
217static void predictor_decode_stereo_3950(APEContext *ctx, int count);
218
219static av_cold int ape_decode_close(AVCodecContext *avctx)
220{
221    APEContext *s = avctx->priv_data;
222    int i;
223
224    for (i = 0; i < APE_FILTER_LEVELS; i++)
225        av_freep(&s->filterbuf[i]);
226
227    av_freep(&s->decoded_buffer);
228    av_freep(&s->data);
229    s->decoded_size = s->data_size = 0;
230
231    return 0;
232}
233
234static av_cold int ape_decode_init(AVCodecContext *avctx)
235{
236    APEContext *s = avctx->priv_data;
237    int channels = avctx->ch_layout.nb_channels;
238    int i;
239
240    if (avctx->extradata_size != 6) {
241        av_log(avctx, AV_LOG_ERROR, "Incorrect extradata\n");
242        return AVERROR(EINVAL);
243    }
244    if (channels > 2) {
245        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo is supported\n");
246        return AVERROR(EINVAL);
247    }
248    avctx->bits_per_raw_sample =
249    s->bps = avctx->bits_per_coded_sample;
250    switch (s->bps) {
251    case 8:
252        avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
253        break;
254    case 16:
255        avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
256        break;
257    case 24:
258        avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
259        break;
260    default:
261        avpriv_request_sample(avctx,
262                              "%d bits per coded sample", s->bps);
263        return AVERROR_PATCHWELCOME;
264    }
265    s->avctx             = avctx;
266    s->channels          = channels;
267    s->fileversion       = AV_RL16(avctx->extradata);
268    s->compression_level = AV_RL16(avctx->extradata + 2);
269    s->flags             = AV_RL16(avctx->extradata + 4);
270
271    av_log(avctx, AV_LOG_VERBOSE, "Compression Level: %d - Flags: %d\n",
272           s->compression_level, s->flags);
273    if (s->compression_level % 1000 || s->compression_level > COMPRESSION_LEVEL_INSANE ||
274        !s->compression_level ||
275        (s->fileversion < 3930 && s->compression_level == COMPRESSION_LEVEL_INSANE)) {
276        av_log(avctx, AV_LOG_ERROR, "Incorrect compression level %d\n",
277               s->compression_level);
278        return AVERROR_INVALIDDATA;
279    }
280    s->fset = s->compression_level / 1000 - 1;
281    for (i = 0; i < APE_FILTER_LEVELS; i++) {
282        if (!ape_filter_orders[s->fset][i])
283            break;
284        if (!(s->filterbuf[i] = av_malloc((ape_filter_orders[s->fset][i] * 3 + HISTORY_SIZE) * 4)))
285            return AVERROR(ENOMEM);
286    }
287
288    if (s->fileversion < 3860) {
289        s->entropy_decode_mono   = entropy_decode_mono_0000;
290        s->entropy_decode_stereo = entropy_decode_stereo_0000;
291    } else if (s->fileversion < 3900) {
292        s->entropy_decode_mono   = entropy_decode_mono_3860;
293        s->entropy_decode_stereo = entropy_decode_stereo_3860;
294    } else if (s->fileversion < 3930) {
295        s->entropy_decode_mono   = entropy_decode_mono_3900;
296        s->entropy_decode_stereo = entropy_decode_stereo_3900;
297    } else if (s->fileversion < 3990) {
298        s->entropy_decode_mono   = entropy_decode_mono_3900;
299        s->entropy_decode_stereo = entropy_decode_stereo_3930;
300    } else {
301        s->entropy_decode_mono   = entropy_decode_mono_3990;
302        s->entropy_decode_stereo = entropy_decode_stereo_3990;
303    }
304
305    if (s->fileversion < 3930) {
306        s->predictor_decode_mono   = predictor_decode_mono_3800;
307        s->predictor_decode_stereo = predictor_decode_stereo_3800;
308    } else if (s->fileversion < 3950) {
309        s->predictor_decode_mono   = predictor_decode_mono_3930;
310        s->predictor_decode_stereo = predictor_decode_stereo_3930;
311    } else {
312        s->predictor_decode_mono   = predictor_decode_mono_3950;
313        s->predictor_decode_stereo = predictor_decode_stereo_3950;
314    }
315
316    ff_bswapdsp_init(&s->bdsp);
317    ff_llauddsp_init(&s->adsp);
318    av_channel_layout_uninit(&avctx->ch_layout);
319    avctx->ch_layout = (channels == 2) ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
320                                       : (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
321
322    return 0;
323}
324
325/**
326 * @name APE range decoding functions
327 * @{
328 */
329
330#define CODE_BITS    32
331#define TOP_VALUE    ((unsigned int)1 << (CODE_BITS-1))
332#define SHIFT_BITS   (CODE_BITS - 9)
333#define EXTRA_BITS   ((CODE_BITS-2) % 8 + 1)
334#define BOTTOM_VALUE (TOP_VALUE >> 8)
335
336/** Start the decoder */
337static inline void range_start_decoding(APEContext *ctx)
338{
339    ctx->rc.buffer = bytestream_get_byte(&ctx->ptr);
340    ctx->rc.low    = ctx->rc.buffer >> (8 - EXTRA_BITS);
341    ctx->rc.range  = (uint32_t) 1 << EXTRA_BITS;
342}
343
344/** Perform normalization */
345static inline void range_dec_normalize(APEContext *ctx)
346{
347    while (ctx->rc.range <= BOTTOM_VALUE) {
348        ctx->rc.buffer <<= 8;
349        if(ctx->ptr < ctx->data_end) {
350            ctx->rc.buffer += *ctx->ptr;
351            ctx->ptr++;
352        } else {
353            ctx->error = 1;
354        }
355        ctx->rc.low    = (ctx->rc.low << 8)    | ((ctx->rc.buffer >> 1) & 0xFF);
356        ctx->rc.range  <<= 8;
357    }
358}
359
360/**
361 * Calculate cumulative frequency for next symbol. Does NO update!
362 * @param ctx decoder context
363 * @param tot_f is the total frequency or (code_value)1<<shift
364 * @return the cumulative frequency
365 */
366static inline int range_decode_culfreq(APEContext *ctx, int tot_f)
367{
368    range_dec_normalize(ctx);
369    ctx->rc.help = ctx->rc.range / tot_f;
370    return ctx->rc.low / ctx->rc.help;
371}
372
373/**
374 * Decode value with given size in bits
375 * @param ctx decoder context
376 * @param shift number of bits to decode
377 */
378static inline int range_decode_culshift(APEContext *ctx, int shift)
379{
380    range_dec_normalize(ctx);
381    ctx->rc.help = ctx->rc.range >> shift;
382    return ctx->rc.low / ctx->rc.help;
383}
384
385
386/**
387 * Update decoding state
388 * @param ctx decoder context
389 * @param sy_f the interval length (frequency of the symbol)
390 * @param lt_f the lower end (frequency sum of < symbols)
391 */
392static inline void range_decode_update(APEContext *ctx, int sy_f, int lt_f)
393{
394    ctx->rc.low  -= ctx->rc.help * lt_f;
395    ctx->rc.range = ctx->rc.help * sy_f;
396}
397
398/** Decode n bits (n <= 16) without modelling */
399static inline int range_decode_bits(APEContext *ctx, int n)
400{
401    int sym = range_decode_culshift(ctx, n);
402    range_decode_update(ctx, 1, sym);
403    return sym;
404}
405
406
407#define MODEL_ELEMENTS 64
408
409/**
410 * Fixed probabilities for symbols in Monkey Audio version 3.97
411 */
412static const uint16_t counts_3970[22] = {
413        0, 14824, 28224, 39348, 47855, 53994, 58171, 60926,
414    62682, 63786, 64463, 64878, 65126, 65276, 65365, 65419,
415    65450, 65469, 65480, 65487, 65491, 65493,
416};
417
418/**
419 * Probability ranges for symbols in Monkey Audio version 3.97
420 */
421static const uint16_t counts_diff_3970[21] = {
422    14824, 13400, 11124, 8507, 6139, 4177, 2755, 1756,
423    1104, 677, 415, 248, 150, 89, 54, 31,
424    19, 11, 7, 4, 2,
425};
426
427/**
428 * Fixed probabilities for symbols in Monkey Audio version 3.98
429 */
430static const uint16_t counts_3980[22] = {
431        0, 19578, 36160, 48417, 56323, 60899, 63265, 64435,
432    64971, 65232, 65351, 65416, 65447, 65466, 65476, 65482,
433    65485, 65488, 65490, 65491, 65492, 65493,
434};
435
436/**
437 * Probability ranges for symbols in Monkey Audio version 3.98
438 */
439static const uint16_t counts_diff_3980[21] = {
440    19578, 16582, 12257, 7906, 4576, 2366, 1170, 536,
441    261, 119, 65, 31, 19, 10, 6, 3,
442    3, 2, 1, 1, 1,
443};
444
445/**
446 * Decode symbol
447 * @param ctx decoder context
448 * @param counts probability range start position
449 * @param counts_diff probability range widths
450 */
451static inline int range_get_symbol(APEContext *ctx,
452                                   const uint16_t counts[],
453                                   const uint16_t counts_diff[])
454{
455    int symbol, cf;
456
457    cf = range_decode_culshift(ctx, 16);
458
459    if(cf > 65492){
460        symbol= cf - 65535 + 63;
461        range_decode_update(ctx, 1, cf);
462        if(cf > 65535)
463            ctx->error=1;
464        return symbol;
465    }
466    /* figure out the symbol inefficiently; a binary search would be much better */
467    for (symbol = 0; counts[symbol + 1] <= cf; symbol++);
468
469    range_decode_update(ctx, counts_diff[symbol], counts[symbol]);
470
471    return symbol;
472}
473/** @} */ // group rangecoder
474
475static inline void update_rice(APERice *rice, unsigned int x)
476{
477    int lim = rice->k ? (1 << (rice->k + 4)) : 0;
478    rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
479
480    if (rice->ksum < lim)
481        rice->k--;
482    else if (rice->ksum >= (1 << (rice->k + 5)) && rice->k < 24)
483        rice->k++;
484}
485
486static inline int get_rice_ook(GetBitContext *gb, int k)
487{
488    unsigned int x;
489
490    x = get_unary(gb, 1, get_bits_left(gb));
491
492    if (k)
493        x = (x << k) | get_bits(gb, k);
494
495    return x;
496}
497
498static inline int ape_decode_value_3860(APEContext *ctx, GetBitContext *gb,
499                                        APERice *rice)
500{
501    unsigned int x, overflow;
502
503    overflow = get_unary(gb, 1, get_bits_left(gb));
504
505    if (ctx->fileversion > 3880) {
506        while (overflow >= 16) {
507            overflow -= 16;
508            rice->k  += 4;
509        }
510    }
511
512    if (!rice->k)
513        x = overflow;
514    else if(rice->k <= MIN_CACHE_BITS) {
515        x = (overflow << rice->k) + get_bits(gb, rice->k);
516    } else {
517        av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %"PRIu32"\n", rice->k);
518        ctx->error = 1;
519        return AVERROR_INVALIDDATA;
520    }
521    rice->ksum += x - (rice->ksum + 8 >> 4);
522    if (rice->ksum < (rice->k ? 1 << (rice->k + 4) : 0))
523        rice->k--;
524    else if (rice->ksum >= (1 << (rice->k + 5)) && rice->k < 24)
525        rice->k++;
526
527    /* Convert to signed */
528    return ((x >> 1) ^ ((x & 1) - 1)) + 1;
529}
530
531static inline int ape_decode_value_3900(APEContext *ctx, APERice *rice)
532{
533    unsigned int x, overflow;
534    int tmpk;
535
536    overflow = range_get_symbol(ctx, counts_3970, counts_diff_3970);
537
538    if (overflow == (MODEL_ELEMENTS - 1)) {
539        tmpk = range_decode_bits(ctx, 5);
540        overflow = 0;
541    } else
542        tmpk = (rice->k < 1) ? 0 : rice->k - 1;
543
544    if (tmpk <= 16 || ctx->fileversion < 3910) {
545        if (tmpk > 23) {
546            av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk);
547            return AVERROR_INVALIDDATA;
548        }
549        x = range_decode_bits(ctx, tmpk);
550    } else if (tmpk <= 31) {
551        x = range_decode_bits(ctx, 16);
552        x |= (range_decode_bits(ctx, tmpk - 16) << 16);
553    } else {
554        av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk);
555        return AVERROR_INVALIDDATA;
556    }
557    x += overflow << tmpk;
558
559    update_rice(rice, x);
560
561    /* Convert to signed */
562    return ((x >> 1) ^ ((x & 1) - 1)) + 1;
563}
564
565static inline int ape_decode_value_3990(APEContext *ctx, APERice *rice)
566{
567    unsigned int x, overflow, pivot;
568    int base;
569
570    pivot = FFMAX(rice->ksum >> 5, 1);
571
572    overflow = range_get_symbol(ctx, counts_3980, counts_diff_3980);
573
574    if (overflow == (MODEL_ELEMENTS - 1)) {
575        overflow  = (unsigned)range_decode_bits(ctx, 16) << 16;
576        overflow |= range_decode_bits(ctx, 16);
577    }
578
579    if (pivot < 0x10000) {
580        base = range_decode_culfreq(ctx, pivot);
581        range_decode_update(ctx, 1, base);
582    } else {
583        int base_hi = pivot, base_lo;
584        int bbits = 0;
585
586        while (base_hi & ~0xFFFF) {
587            base_hi >>= 1;
588            bbits++;
589        }
590        base_hi = range_decode_culfreq(ctx, base_hi + 1);
591        range_decode_update(ctx, 1, base_hi);
592        base_lo = range_decode_culfreq(ctx, 1 << bbits);
593        range_decode_update(ctx, 1, base_lo);
594
595        base = (base_hi << bbits) + base_lo;
596    }
597
598    x = base + overflow * pivot;
599
600    update_rice(rice, x);
601
602    /* Convert to signed */
603    return ((x >> 1) ^ ((x & 1) - 1)) + 1;
604}
605
606static int get_k(int ksum)
607{
608    return av_log2(ksum) + !!ksum;
609}
610
611static void decode_array_0000(APEContext *ctx, GetBitContext *gb,
612                              int32_t *out, APERice *rice, int blockstodecode)
613{
614    int i;
615    unsigned ksummax, ksummin;
616
617    rice->ksum = 0;
618    for (i = 0; i < FFMIN(blockstodecode, 5); i++) {
619        out[i] = get_rice_ook(&ctx->gb, 10);
620        rice->ksum += out[i];
621    }
622
623    if (blockstodecode <= 5)
624        goto end;
625
626    rice->k = get_k(rice->ksum / 10);
627    if (rice->k >= 24)
628        return;
629    for (; i < FFMIN(blockstodecode, 64); i++) {
630        out[i] = get_rice_ook(&ctx->gb, rice->k);
631        rice->ksum += out[i];
632        rice->k = get_k(rice->ksum / ((i + 1) * 2));
633        if (rice->k >= 24)
634            return;
635    }
636
637    if (blockstodecode <= 64)
638        goto end;
639
640    rice->k = get_k(rice->ksum >> 7);
641    ksummax = 1 << rice->k + 7;
642    ksummin = rice->k ? (1 << rice->k + 6) : 0;
643    for (; i < blockstodecode; i++) {
644        if (get_bits_left(&ctx->gb) < 1) {
645            ctx->error = 1;
646            return;
647        }
648        out[i] = get_rice_ook(&ctx->gb, rice->k);
649        rice->ksum += out[i] - (unsigned)out[i - 64];
650        while (rice->ksum < ksummin) {
651            rice->k--;
652            ksummin = rice->k ? ksummin >> 1 : 0;
653            ksummax >>= 1;
654        }
655        while (rice->ksum >= ksummax) {
656            rice->k++;
657            if (rice->k > 24)
658                return;
659            ksummax <<= 1;
660            ksummin = ksummin ? ksummin << 1 : 128;
661        }
662    }
663
664end:
665    for (i = 0; i < blockstodecode; i++)
666        out[i] = ((out[i] >> 1) ^ ((out[i] & 1) - 1)) + 1;
667}
668
669static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode)
670{
671    decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY,
672                      blockstodecode);
673}
674
675static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode)
676{
677    decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY,
678                      blockstodecode);
679    decode_array_0000(ctx, &ctx->gb, ctx->decoded[1], &ctx->riceX,
680                      blockstodecode);
681}
682
683static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode)
684{
685    int32_t *decoded0 = ctx->decoded[0];
686
687    while (blockstodecode--)
688        *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY);
689}
690
691static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode)
692{
693    int32_t *decoded0 = ctx->decoded[0];
694    int32_t *decoded1 = ctx->decoded[1];
695    int blocks = blockstodecode;
696
697    while (blockstodecode--)
698        *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY);
699    while (blocks--)
700        *decoded1++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceX);
701}
702
703static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode)
704{
705    int32_t *decoded0 = ctx->decoded[0];
706
707    while (blockstodecode--)
708        *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
709}
710
711static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode)
712{
713    int32_t *decoded0 = ctx->decoded[0];
714    int32_t *decoded1 = ctx->decoded[1];
715    int blocks = blockstodecode;
716
717    while (blockstodecode--)
718        *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
719    range_dec_normalize(ctx);
720    // because of some implementation peculiarities we need to backpedal here
721    ctx->ptr -= 1;
722    range_start_decoding(ctx);
723    while (blocks--)
724        *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX);
725}
726
727static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode)
728{
729    int32_t *decoded0 = ctx->decoded[0];
730    int32_t *decoded1 = ctx->decoded[1];
731
732    while (blockstodecode--) {
733        *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
734        *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX);
735    }
736}
737
738static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode)
739{
740    int32_t *decoded0 = ctx->decoded[0];
741
742    while (blockstodecode--)
743        *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY);
744}
745
746static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode)
747{
748    int32_t *decoded0 = ctx->decoded[0];
749    int32_t *decoded1 = ctx->decoded[1];
750
751    while (blockstodecode--) {
752        *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY);
753        *decoded1++ = ape_decode_value_3990(ctx, &ctx->riceX);
754    }
755}
756
757static int init_entropy_decoder(APEContext *ctx)
758{
759    /* Read the CRC */
760    if (ctx->fileversion >= 3900) {
761        if (ctx->data_end - ctx->ptr < 6)
762            return AVERROR_INVALIDDATA;
763        ctx->CRC = bytestream_get_be32(&ctx->ptr);
764    } else {
765        ctx->CRC = get_bits_long(&ctx->gb, 32);
766    }
767
768    /* Read the frame flags if they exist */
769    ctx->frameflags = 0;
770    ctx->CRC_state = UINT32_MAX;
771    if ((ctx->fileversion > 3820) && (ctx->CRC & 0x80000000)) {
772        ctx->CRC &= ~0x80000000;
773
774        if (ctx->data_end - ctx->ptr < 6)
775            return AVERROR_INVALIDDATA;
776        ctx->frameflags = bytestream_get_be32(&ctx->ptr);
777    }
778
779    /* Initialize the rice structs */
780    ctx->riceX.k = 10;
781    ctx->riceX.ksum = (1 << ctx->riceX.k) * 16;
782    ctx->riceY.k = 10;
783    ctx->riceY.ksum = (1 << ctx->riceY.k) * 16;
784
785    if (ctx->fileversion >= 3900) {
786        /* The first 8 bits of input are ignored. */
787        ctx->ptr++;
788
789        range_start_decoding(ctx);
790    }
791
792    return 0;
793}
794
795static const int32_t initial_coeffs_fast_3320[1] = {
796    375,
797};
798
799static const int32_t initial_coeffs_a_3800[3] = {
800    64, 115, 64,
801};
802
803static const int32_t initial_coeffs_b_3800[2] = {
804    740, 0
805};
806
807static const int32_t initial_coeffs_3930[4] = {
808    360, 317, -109, 98
809};
810
811static const int64_t initial_coeffs_3930_64bit[4] = {
812    360, 317, -109, 98
813};
814
815static void init_predictor_decoder(APEContext *ctx)
816{
817    APEPredictor *p = &ctx->predictor;
818    APEPredictor64 *p64 = &ctx->predictor64;
819
820    /* Zero the history buffers */
821    memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(*p->historybuffer));
822    memset(p64->historybuffer, 0, PREDICTOR_SIZE * sizeof(*p64->historybuffer));
823    p->buf = p->historybuffer;
824    p64->buf = p64->historybuffer;
825
826    /* Initialize and zero the coefficients */
827    if (ctx->fileversion < 3930) {
828        if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
829            memcpy(p->coeffsA[0], initial_coeffs_fast_3320,
830                   sizeof(initial_coeffs_fast_3320));
831            memcpy(p->coeffsA[1], initial_coeffs_fast_3320,
832                   sizeof(initial_coeffs_fast_3320));
833        } else {
834            memcpy(p->coeffsA[0], initial_coeffs_a_3800,
835                   sizeof(initial_coeffs_a_3800));
836            memcpy(p->coeffsA[1], initial_coeffs_a_3800,
837                   sizeof(initial_coeffs_a_3800));
838        }
839    } else {
840        memcpy(p->coeffsA[0], initial_coeffs_3930, sizeof(initial_coeffs_3930));
841        memcpy(p->coeffsA[1], initial_coeffs_3930, sizeof(initial_coeffs_3930));
842        memcpy(p64->coeffsA[0], initial_coeffs_3930_64bit, sizeof(initial_coeffs_3930_64bit));
843        memcpy(p64->coeffsA[1], initial_coeffs_3930_64bit, sizeof(initial_coeffs_3930_64bit));
844    }
845    memset(p->coeffsB, 0, sizeof(p->coeffsB));
846    memset(p64->coeffsB, 0, sizeof(p64->coeffsB));
847    if (ctx->fileversion < 3930) {
848        memcpy(p->coeffsB[0], initial_coeffs_b_3800,
849               sizeof(initial_coeffs_b_3800));
850        memcpy(p->coeffsB[1], initial_coeffs_b_3800,
851               sizeof(initial_coeffs_b_3800));
852    }
853
854    p->filterA[0] = p->filterA[1] = 0;
855    p->filterB[0] = p->filterB[1] = 0;
856    p->lastA[0]   = p->lastA[1]   = 0;
857
858    p64->filterA[0] = p64->filterA[1] = 0;
859    p64->filterB[0] = p64->filterB[1] = 0;
860    p64->lastA[0]   = p64->lastA[1]   = 0;
861
862    p->sample_pos = 0;
863
864    p64->sample_pos = 0;
865}
866
867/** Get inverse sign of integer (-1 for positive, 1 for negative and 0 for zero) */
868static inline int APESIGN(int32_t x) {
869    return (x < 0) - (x > 0);
870}
871
872static av_always_inline int filter_fast_3320(APEPredictor *p,
873                                             const int decoded, const int filter,
874                                             const int delayA)
875{
876    int32_t predictionA;
877
878    p->buf[delayA] = p->lastA[filter];
879    if (p->sample_pos < 3) {
880        p->lastA[filter]   = decoded;
881        p->filterA[filter] = decoded;
882        return decoded;
883    }
884
885    predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1];
886    p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA  * p->coeffsA[filter][0]) >> 9);
887
888    if ((decoded ^ predictionA) > 0)
889        p->coeffsA[filter][0]++;
890    else
891        p->coeffsA[filter][0]--;
892
893    p->filterA[filter] += (unsigned)p->lastA[filter];
894
895    return p->filterA[filter];
896}
897
898static av_always_inline int filter_3800(APEPredictor *p,
899                                        const unsigned decoded, const int filter,
900                                        const int delayA,  const int delayB,
901                                        const int start,   const int shift)
902{
903    int32_t predictionA, predictionB, sign;
904    int32_t d0, d1, d2, d3, d4;
905
906    p->buf[delayA] = p->lastA[filter];
907    p->buf[delayB] = p->filterB[filter];
908    if (p->sample_pos < start) {
909        predictionA = decoded + p->filterA[filter];
910        p->lastA[filter]   = decoded;
911        p->filterB[filter] = decoded;
912        p->filterA[filter] = predictionA;
913        return predictionA;
914    }
915    d2 =  p->buf[delayA];
916    d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2;
917    d0 =  p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8);
918    d3 =  p->buf[delayB] * 2U - p->buf[delayB - 1];
919    d4 =  p->buf[delayB];
920
921    predictionA = d0 * p->coeffsA[filter][0] +
922                  d1 * p->coeffsA[filter][1] +
923                  d2 * p->coeffsA[filter][2];
924
925    sign = APESIGN(decoded);
926    p->coeffsA[filter][0] += (((d0 >> 30) & 2) - 1) * sign;
927    p->coeffsA[filter][1] += (((d1 >> 28) & 8) - 4) * sign;
928    p->coeffsA[filter][2] += (((d2 >> 28) & 8) - 4) * sign;
929
930    predictionB = d3 * p->coeffsB[filter][0] -
931                  d4 * p->coeffsB[filter][1];
932    p->lastA[filter] = decoded + (predictionA >> 11);
933    sign = APESIGN(p->lastA[filter]);
934    p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
935    p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
936
937    p->filterB[filter] = p->lastA[filter] + (unsigned)(predictionB >> shift);
938    p->filterA[filter] = p->filterB[filter] + (unsigned)((int)(p->filterA[filter] * 31U) >> 5);
939
940    return p->filterA[filter];
941}
942
943static void long_filter_high_3800(int32_t *buffer, int order, int shift, int length)
944{
945    int i, j;
946    int32_t dotprod, sign;
947    int32_t coeffs[256], delay[256];
948
949    if (order >= length)
950        return;
951
952    memset(coeffs, 0, order * sizeof(*coeffs));
953    for (i = 0; i < order; i++)
954        delay[i] = buffer[i];
955    for (i = order; i < length; i++) {
956        dotprod = 0;
957        sign = APESIGN(buffer[i]);
958        for (j = 0; j < order; j++) {
959            dotprod += delay[j] * (unsigned)coeffs[j];
960            coeffs[j] += ((delay[j] >> 31) | 1) * sign;
961        }
962        buffer[i] -= (unsigned)(dotprod >> shift);
963        for (j = 0; j < order - 1; j++)
964            delay[j] = delay[j + 1];
965        delay[order - 1] = buffer[i];
966    }
967}
968
969static void long_filter_ehigh_3830(int32_t *buffer, int length)
970{
971    int i, j;
972    int32_t dotprod, sign;
973    int32_t delay[8] = { 0 };
974    uint32_t coeffs[8] = { 0 };
975
976    for (i = 0; i < length; i++) {
977        dotprod = 0;
978        sign = APESIGN(buffer[i]);
979        for (j = 7; j >= 0; j--) {
980            dotprod += delay[j] * coeffs[j];
981            coeffs[j] += ((delay[j] >> 31) | 1) * sign;
982        }
983        for (j = 7; j > 0; j--)
984            delay[j] = delay[j - 1];
985        delay[0] = buffer[i];
986        buffer[i] -= (unsigned)(dotprod >> 9);
987    }
988}
989
990static void predictor_decode_stereo_3800(APEContext *ctx, int count)
991{
992    APEPredictor *p = &ctx->predictor;
993    int32_t *decoded0 = ctx->decoded[0];
994    int32_t *decoded1 = ctx->decoded[1];
995    int start = 4, shift = 10;
996
997    if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
998        start = 16;
999        long_filter_high_3800(decoded0, 16, 9, count);
1000        long_filter_high_3800(decoded1, 16, 9, count);
1001    } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
1002        int order = 128, shift2 = 11;
1003
1004        if (ctx->fileversion >= 3830) {
1005            order <<= 1;
1006            shift++;
1007            shift2++;
1008            long_filter_ehigh_3830(decoded0 + order, count - order);
1009            long_filter_ehigh_3830(decoded1 + order, count - order);
1010        }
1011        start = order;
1012        long_filter_high_3800(decoded0, order, shift2, count);
1013        long_filter_high_3800(decoded1, order, shift2, count);
1014    }
1015
1016    while (count--) {
1017        int X = *decoded0, Y = *decoded1;
1018        if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
1019            *decoded0 = filter_fast_3320(p, Y, 0, YDELAYA);
1020            decoded0++;
1021            *decoded1 = filter_fast_3320(p, X, 1, XDELAYA);
1022            decoded1++;
1023        } else {
1024            *decoded0 = filter_3800(p, Y, 0, YDELAYA, YDELAYB,
1025                                    start, shift);
1026            decoded0++;
1027            *decoded1 = filter_3800(p, X, 1, XDELAYA, XDELAYB,
1028                                    start, shift);
1029            decoded1++;
1030        }
1031
1032        /* Combined */
1033        p->buf++;
1034        p->sample_pos++;
1035
1036        /* Have we filled the history buffer? */
1037        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1038            memmove(p->historybuffer, p->buf,
1039                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1040            p->buf = p->historybuffer;
1041        }
1042    }
1043}
1044
1045static void predictor_decode_mono_3800(APEContext *ctx, int count)
1046{
1047    APEPredictor *p = &ctx->predictor;
1048    int32_t *decoded0 = ctx->decoded[0];
1049    int start = 4, shift = 10;
1050
1051    if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
1052        start = 16;
1053        long_filter_high_3800(decoded0, 16, 9, count);
1054    } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
1055        int order = 128, shift2 = 11;
1056
1057        if (ctx->fileversion >= 3830) {
1058            order <<= 1;
1059            shift++;
1060            shift2++;
1061            long_filter_ehigh_3830(decoded0 + order, count - order);
1062        }
1063        start = order;
1064        long_filter_high_3800(decoded0, order, shift2, count);
1065    }
1066
1067    while (count--) {
1068        if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
1069            *decoded0 = filter_fast_3320(p, *decoded0, 0, YDELAYA);
1070            decoded0++;
1071        } else {
1072            *decoded0 = filter_3800(p, *decoded0, 0, YDELAYA, YDELAYB,
1073                                    start, shift);
1074            decoded0++;
1075        }
1076
1077        /* Combined */
1078        p->buf++;
1079        p->sample_pos++;
1080
1081        /* Have we filled the history buffer? */
1082        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1083            memmove(p->historybuffer, p->buf,
1084                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1085            p->buf = p->historybuffer;
1086        }
1087    }
1088}
1089
1090static av_always_inline int predictor_update_3930(APEPredictor *p,
1091                                                  const int decoded, const int filter,
1092                                                  const int delayA)
1093{
1094    int32_t predictionA, sign;
1095    uint32_t d0, d1, d2, d3;
1096
1097    p->buf[delayA]     = p->lastA[filter];
1098    d0 = p->buf[delayA    ];
1099    d1 = p->buf[delayA    ] - (unsigned)p->buf[delayA - 1];
1100    d2 = p->buf[delayA - 1] - (unsigned)p->buf[delayA - 2];
1101    d3 = p->buf[delayA - 2] - (unsigned)p->buf[delayA - 3];
1102
1103    predictionA = d0 * p->coeffsA[filter][0] +
1104                  d1 * p->coeffsA[filter][1] +
1105                  d2 * p->coeffsA[filter][2] +
1106                  d3 * p->coeffsA[filter][3];
1107
1108    p->lastA[filter] = decoded + (predictionA >> 9);
1109    p->filterA[filter] = p->lastA[filter] + ((int)(p->filterA[filter] * 31U) >> 5);
1110
1111    sign = APESIGN(decoded);
1112    p->coeffsA[filter][0] += (((int32_t)d0 < 0) * 2 - 1) * sign;
1113    p->coeffsA[filter][1] += (((int32_t)d1 < 0) * 2 - 1) * sign;
1114    p->coeffsA[filter][2] += (((int32_t)d2 < 0) * 2 - 1) * sign;
1115    p->coeffsA[filter][3] += (((int32_t)d3 < 0) * 2 - 1) * sign;
1116
1117    return p->filterA[filter];
1118}
1119
1120static void predictor_decode_stereo_3930(APEContext *ctx, int count)
1121{
1122    APEPredictor *p = &ctx->predictor;
1123    int32_t *decoded0 = ctx->decoded[0];
1124    int32_t *decoded1 = ctx->decoded[1];
1125
1126    ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count);
1127
1128    while (count--) {
1129        /* Predictor Y */
1130        int Y = *decoded1, X = *decoded0;
1131        *decoded0 = predictor_update_3930(p, Y, 0, YDELAYA);
1132        decoded0++;
1133        *decoded1 = predictor_update_3930(p, X, 1, XDELAYA);
1134        decoded1++;
1135
1136        /* Combined */
1137        p->buf++;
1138
1139        /* Have we filled the history buffer? */
1140        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1141            memmove(p->historybuffer, p->buf,
1142                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1143            p->buf = p->historybuffer;
1144        }
1145    }
1146}
1147
1148static void predictor_decode_mono_3930(APEContext *ctx, int count)
1149{
1150    APEPredictor *p = &ctx->predictor;
1151    int32_t *decoded0 = ctx->decoded[0];
1152
1153    ape_apply_filters(ctx, ctx->decoded[0], NULL, count);
1154
1155    while (count--) {
1156        *decoded0 = predictor_update_3930(p, *decoded0, 0, YDELAYA);
1157        decoded0++;
1158
1159        p->buf++;
1160
1161        /* Have we filled the history buffer? */
1162        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1163            memmove(p->historybuffer, p->buf,
1164                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1165            p->buf = p->historybuffer;
1166        }
1167    }
1168}
1169
1170static av_always_inline int predictor_update_filter(APEPredictor64 *p,
1171                                                    const int decoded, const int filter,
1172                                                    const int delayA,  const int delayB,
1173                                                    const int adaptA,  const int adaptB,
1174                                                    int compression_level)
1175{
1176    int64_t predictionA, predictionB;
1177    int32_t sign;
1178
1179    p->buf[delayA]     = p->lastA[filter];
1180    p->buf[adaptA]     = APESIGN(p->buf[delayA]);
1181    p->buf[delayA - 1] = p->buf[delayA] - (uint64_t)p->buf[delayA - 1];
1182    p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]);
1183
1184    predictionA = p->buf[delayA    ] * p->coeffsA[filter][0] +
1185                  p->buf[delayA - 1] * p->coeffsA[filter][1] +
1186                  p->buf[delayA - 2] * p->coeffsA[filter][2] +
1187                  p->buf[delayA - 3] * p->coeffsA[filter][3];
1188
1189    /*  Apply a scaled first-order filter compression */
1190    p->buf[delayB]     = p->filterA[filter ^ 1] - ((int64_t)(p->filterB[filter] * 31ULL) >> 5);
1191    p->buf[adaptB]     = APESIGN(p->buf[delayB]);
1192    p->buf[delayB - 1] = p->buf[delayB] - (uint64_t)p->buf[delayB - 1];
1193    p->buf[adaptB - 1] = APESIGN(p->buf[delayB - 1]);
1194    p->filterB[filter] = p->filterA[filter ^ 1];
1195
1196    predictionB = p->buf[delayB    ] * p->coeffsB[filter][0] +
1197                  p->buf[delayB - 1] * p->coeffsB[filter][1] +
1198                  p->buf[delayB - 2] * p->coeffsB[filter][2] +
1199                  p->buf[delayB - 3] * p->coeffsB[filter][3] +
1200                  p->buf[delayB - 4] * p->coeffsB[filter][4];
1201
1202    if (compression_level < COMPRESSION_LEVEL_INSANE) {
1203        predictionA = (int32_t)predictionA;
1204        predictionB = (int32_t)predictionB;
1205        p->lastA[filter] = (int32_t)(decoded + (unsigned)((int32_t)(predictionA + (predictionB >> 1)) >> 10));
1206    } else {
1207        p->lastA[filter] = decoded + ((int64_t)((uint64_t)predictionA + (predictionB >> 1)) >> 10);
1208    }
1209    p->filterA[filter] = p->lastA[filter] + ((int64_t)(p->filterA[filter] * 31ULL) >> 5);
1210
1211    sign = APESIGN(decoded);
1212    p->coeffsA[filter][0] += p->buf[adaptA    ] * sign;
1213    p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign;
1214    p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign;
1215    p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign;
1216    p->coeffsB[filter][0] += p->buf[adaptB    ] * sign;
1217    p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign;
1218    p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign;
1219    p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign;
1220    p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign;
1221
1222    return p->filterA[filter];
1223}
1224
1225static void predictor_decode_stereo_3950(APEContext *ctx, int count)
1226{
1227    APEPredictor64 *p = &ctx->predictor64;
1228    int32_t *decoded0 = ctx->decoded[0];
1229    int32_t *decoded1 = ctx->decoded[1];
1230
1231    ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count);
1232
1233    while (count--) {
1234        /* Predictor Y */
1235        *decoded0 = predictor_update_filter(p, *decoded0, 0, YDELAYA, YDELAYB,
1236                                            YADAPTCOEFFSA, YADAPTCOEFFSB,
1237                                            ctx->compression_level);
1238        decoded0++;
1239        *decoded1 = predictor_update_filter(p, *decoded1, 1, XDELAYA, XDELAYB,
1240                                            XADAPTCOEFFSA, XADAPTCOEFFSB,
1241                                            ctx->compression_level);
1242        decoded1++;
1243
1244        /* Combined */
1245        p->buf++;
1246
1247        /* Have we filled the history buffer? */
1248        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1249            memmove(p->historybuffer, p->buf,
1250                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1251            p->buf = p->historybuffer;
1252        }
1253    }
1254}
1255
1256static void predictor_decode_mono_3950(APEContext *ctx, int count)
1257{
1258    APEPredictor64 *p = &ctx->predictor64;
1259    int32_t *decoded0 = ctx->decoded[0];
1260    int32_t predictionA, currentA, A, sign;
1261
1262    ape_apply_filters(ctx, ctx->decoded[0], NULL, count);
1263
1264    currentA = p->lastA[0];
1265
1266    while (count--) {
1267        A = *decoded0;
1268
1269        p->buf[YDELAYA] = currentA;
1270        p->buf[YDELAYA - 1] = p->buf[YDELAYA] - (uint64_t)p->buf[YDELAYA - 1];
1271
1272        predictionA = p->buf[YDELAYA    ] * p->coeffsA[0][0] +
1273                      p->buf[YDELAYA - 1] * p->coeffsA[0][1] +
1274                      p->buf[YDELAYA - 2] * p->coeffsA[0][2] +
1275                      p->buf[YDELAYA - 3] * p->coeffsA[0][3];
1276
1277        currentA = A + (uint64_t)(predictionA >> 10);
1278
1279        p->buf[YADAPTCOEFFSA]     = APESIGN(p->buf[YDELAYA    ]);
1280        p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
1281
1282        sign = APESIGN(A);
1283        p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ] * sign;
1284        p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign;
1285        p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign;
1286        p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign;
1287
1288        p->buf++;
1289
1290        /* Have we filled the history buffer? */
1291        if (p->buf == p->historybuffer + HISTORY_SIZE) {
1292            memmove(p->historybuffer, p->buf,
1293                    PREDICTOR_SIZE * sizeof(*p->historybuffer));
1294            p->buf = p->historybuffer;
1295        }
1296
1297        p->filterA[0] = currentA + (uint64_t)((int64_t)(p->filterA[0] * 31U) >> 5);
1298        *(decoded0++) = p->filterA[0];
1299    }
1300
1301    p->lastA[0] = currentA;
1302}
1303
1304static void do_init_filter(APEFilter *f, int16_t *buf, int order)
1305{
1306    f->coeffs = buf;
1307    f->historybuffer = buf + order;
1308    f->delay       = f->historybuffer + order * 2;
1309    f->adaptcoeffs = f->historybuffer + order;
1310
1311    memset(f->historybuffer, 0, (order * 2) * sizeof(*f->historybuffer));
1312    memset(f->coeffs, 0, order * sizeof(*f->coeffs));
1313    f->avg = 0;
1314}
1315
1316static void init_filter(APEContext *ctx, APEFilter *f, int16_t *buf, int order)
1317{
1318    do_init_filter(&f[0], buf, order);
1319    do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order);
1320}
1321
1322static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
1323                            int32_t *data, int count, int order, int fracbits)
1324{
1325    int res;
1326    unsigned absres;
1327
1328    while (count--) {
1329        /* round fixedpoint scalar product */
1330        res = ctx->adsp.scalarproduct_and_madd_int16(f->coeffs,
1331                                                     f->delay - order,
1332                                                     f->adaptcoeffs - order,
1333                                                     order, APESIGN(*data));
1334        res = (int64_t)(res + (1LL << (fracbits - 1))) >> fracbits;
1335        res += (unsigned)*data;
1336        *data++ = res;
1337
1338        /* Update the output history */
1339        *f->delay++ = av_clip_int16(res);
1340
1341        if (version < 3980) {
1342            /* Version ??? to < 3.98 files (untested) */
1343            f->adaptcoeffs[0]  = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
1344            f->adaptcoeffs[-4] >>= 1;
1345            f->adaptcoeffs[-8] >>= 1;
1346        } else {
1347            /* Version 3.98 and later files */
1348
1349            /* Update the adaption coefficients */
1350            absres = FFABSU(res);
1351            if (absres)
1352                *f->adaptcoeffs = APESIGN(res) *
1353                                  (8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3))));
1354                /* equivalent to the following code
1355                    if (absres <= f->avg * 4 / 3)
1356                        *f->adaptcoeffs = APESIGN(res) * 8;
1357                    else if (absres <= f->avg * 3)
1358                        *f->adaptcoeffs = APESIGN(res) * 16;
1359                    else
1360                        *f->adaptcoeffs = APESIGN(res) * 32;
1361                */
1362            else
1363                *f->adaptcoeffs = 0;
1364
1365            f->avg += (int)(absres - (unsigned)f->avg) / 16;
1366
1367            f->adaptcoeffs[-1] >>= 1;
1368            f->adaptcoeffs[-2] >>= 1;
1369            f->adaptcoeffs[-8] >>= 1;
1370        }
1371
1372        f->adaptcoeffs++;
1373
1374        /* Have we filled the history buffer? */
1375        if (f->delay == f->historybuffer + HISTORY_SIZE + (order * 2)) {
1376            memmove(f->historybuffer, f->delay - (order * 2),
1377                    (order * 2) * sizeof(*f->historybuffer));
1378            f->delay = f->historybuffer + order * 2;
1379            f->adaptcoeffs = f->historybuffer + order;
1380        }
1381    }
1382}
1383
1384static void apply_filter(APEContext *ctx, APEFilter *f,
1385                         int32_t *data0, int32_t *data1,
1386                         int count, int order, int fracbits)
1387{
1388    do_apply_filter(ctx, ctx->fileversion, &f[0], data0, count, order, fracbits);
1389    if (data1)
1390        do_apply_filter(ctx, ctx->fileversion, &f[1], data1, count, order, fracbits);
1391}
1392
1393static void ape_apply_filters(APEContext *ctx, int32_t *decoded0,
1394                              int32_t *decoded1, int count)
1395{
1396    int i;
1397
1398    for (i = 0; i < APE_FILTER_LEVELS; i++) {
1399        if (!ape_filter_orders[ctx->fset][i])
1400            break;
1401        apply_filter(ctx, ctx->filters[i], decoded0, decoded1, count,
1402                     ape_filter_orders[ctx->fset][i],
1403                     ape_filter_fracbits[ctx->fset][i]);
1404    }
1405}
1406
1407static int init_frame_decoder(APEContext *ctx)
1408{
1409    int i, ret;
1410    if ((ret = init_entropy_decoder(ctx)) < 0)
1411        return ret;
1412    init_predictor_decoder(ctx);
1413
1414    for (i = 0; i < APE_FILTER_LEVELS; i++) {
1415        if (!ape_filter_orders[ctx->fset][i])
1416            break;
1417        init_filter(ctx, ctx->filters[i], ctx->filterbuf[i],
1418                    ape_filter_orders[ctx->fset][i]);
1419    }
1420    return 0;
1421}
1422
1423static void ape_unpack_mono(APEContext *ctx, int count)
1424{
1425    if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
1426        /* We are pure silence, so we're done. */
1427        av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence mono\n");
1428        return;
1429    }
1430
1431    ctx->entropy_decode_mono(ctx, count);
1432    if (ctx->error)
1433        return;
1434
1435    /* Now apply the predictor decoding */
1436    ctx->predictor_decode_mono(ctx, count);
1437
1438    /* Pseudo-stereo - just copy left channel to right channel */
1439    if (ctx->channels == 2) {
1440        memcpy(ctx->decoded[1], ctx->decoded[0], count * sizeof(*ctx->decoded[1]));
1441    }
1442}
1443
1444static void ape_unpack_stereo(APEContext *ctx, int count)
1445{
1446    unsigned left, right;
1447    int32_t *decoded0 = ctx->decoded[0];
1448    int32_t *decoded1 = ctx->decoded[1];
1449
1450    if ((ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) == APE_FRAMECODE_STEREO_SILENCE) {
1451        /* We are pure silence, so we're done. */
1452        av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence stereo\n");
1453        return;
1454    }
1455
1456    ctx->entropy_decode_stereo(ctx, count);
1457    if (ctx->error)
1458        return;
1459
1460    /* Now apply the predictor decoding */
1461    ctx->predictor_decode_stereo(ctx, count);
1462
1463    /* Decorrelate and scale to output depth */
1464    while (count--) {
1465        left = *decoded1 - (unsigned)(*decoded0 / 2);
1466        right = left + *decoded0;
1467
1468        *(decoded0++) = left;
1469        *(decoded1++) = right;
1470    }
1471}
1472
1473static int ape_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1474                            int *got_frame_ptr, AVPacket *avpkt)
1475{
1476    const uint8_t *buf = avpkt->data;
1477    APEContext *s = avctx->priv_data;
1478    uint8_t *sample8;
1479    int16_t *sample16;
1480    int32_t *sample24;
1481    int i, ch, ret;
1482    int blockstodecode;
1483    uint64_t decoded_buffer_size;
1484
1485    /* this should never be negative, but bad things will happen if it is, so
1486       check it just to make sure. */
1487    av_assert0(s->samples >= 0);
1488
1489    if(!s->samples){
1490        uint32_t nblocks, offset;
1491        int buf_size;
1492
1493        if (!avpkt->size) {
1494            *got_frame_ptr = 0;
1495            return 0;
1496        }
1497        if (avpkt->size < 8) {
1498            av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
1499            return AVERROR_INVALIDDATA;
1500        }
1501        buf_size = avpkt->size & ~3;
1502        if (buf_size != avpkt->size) {
1503            av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. "
1504                   "extra bytes at the end will be skipped.\n");
1505        }
1506        if (s->fileversion < 3950) // previous versions overread two bytes
1507            buf_size += 2;
1508        av_fast_padded_malloc(&s->data, &s->data_size, buf_size);
1509        if (!s->data)
1510            return AVERROR(ENOMEM);
1511        s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf,
1512                          buf_size >> 2);
1513        memset(s->data + (buf_size & ~3), 0, buf_size & 3);
1514        s->ptr = s->data;
1515        s->data_end = s->data + buf_size;
1516
1517        nblocks = bytestream_get_be32(&s->ptr);
1518        offset  = bytestream_get_be32(&s->ptr);
1519        if (s->fileversion >= 3900) {
1520            if (offset > 3) {
1521                av_log(avctx, AV_LOG_ERROR, "Incorrect offset passed\n");
1522                av_freep(&s->data);
1523                s->data_size = 0;
1524                return AVERROR_INVALIDDATA;
1525            }
1526            if (s->data_end - s->ptr < offset) {
1527                av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
1528                return AVERROR_INVALIDDATA;
1529            }
1530            s->ptr += offset;
1531        } else {
1532            if ((ret = init_get_bits8(&s->gb, s->ptr, s->data_end - s->ptr)) < 0)
1533                return ret;
1534            if (s->fileversion > 3800)
1535                skip_bits_long(&s->gb, offset * 8);
1536            else
1537                skip_bits_long(&s->gb, offset);
1538        }
1539
1540        if (!nblocks || nblocks > INT_MAX / 2 / sizeof(*s->decoded_buffer) - 8) {
1541            av_log(avctx, AV_LOG_ERROR, "Invalid sample count: %"PRIu32".\n",
1542                   nblocks);
1543            return AVERROR_INVALIDDATA;
1544        }
1545
1546        /* Initialize the frame decoder */
1547        if (init_frame_decoder(s) < 0) {
1548            av_log(avctx, AV_LOG_ERROR, "Error reading frame header\n");
1549            return AVERROR_INVALIDDATA;
1550        }
1551        s->samples = nblocks;
1552    }
1553
1554    if (!s->data) {
1555        *got_frame_ptr = 0;
1556        return avpkt->size;
1557    }
1558
1559    blockstodecode = FFMIN(s->blocks_per_loop, s->samples);
1560    // for old files coefficients were not interleaved,
1561    // so we need to decode all of them at once
1562    if (s->fileversion < 3930)
1563        blockstodecode = s->samples;
1564
1565    /* reallocate decoded sample buffer if needed */
1566    decoded_buffer_size = 2LL * FFALIGN(blockstodecode, 8) * sizeof(*s->decoded_buffer);
1567    av_assert0(decoded_buffer_size <= INT_MAX);
1568
1569    /* get output buffer */
1570    frame->nb_samples = blockstodecode;
1571    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
1572        s->samples=0;
1573        return ret;
1574    }
1575
1576    av_fast_malloc(&s->decoded_buffer, &s->decoded_size, decoded_buffer_size);
1577    if (!s->decoded_buffer)
1578        return AVERROR(ENOMEM);
1579    memset(s->decoded_buffer, 0, decoded_buffer_size);
1580    s->decoded[0] = s->decoded_buffer;
1581    s->decoded[1] = s->decoded_buffer + FFALIGN(blockstodecode, 8);
1582
1583    s->error=0;
1584
1585    if ((s->channels == 1) || (s->frameflags & APE_FRAMECODE_PSEUDO_STEREO))
1586        ape_unpack_mono(s, blockstodecode);
1587    else
1588        ape_unpack_stereo(s, blockstodecode);
1589    emms_c();
1590
1591    if (s->error) {
1592        s->samples=0;
1593        av_log(avctx, AV_LOG_ERROR, "Error decoding frame\n");
1594        return AVERROR_INVALIDDATA;
1595    }
1596
1597    switch (s->bps) {
1598    case 8:
1599        for (ch = 0; ch < s->channels; ch++) {
1600            sample8 = (uint8_t *)frame->data[ch];
1601            for (i = 0; i < blockstodecode; i++)
1602                *sample8++ = (s->decoded[ch][i] + 0x80U) & 0xff;
1603        }
1604        break;
1605    case 16:
1606        for (ch = 0; ch < s->channels; ch++) {
1607            sample16 = (int16_t *)frame->data[ch];
1608            for (i = 0; i < blockstodecode; i++)
1609                *sample16++ = s->decoded[ch][i];
1610        }
1611        break;
1612    case 24:
1613        for (ch = 0; ch < s->channels; ch++) {
1614            sample24 = (int32_t *)frame->data[ch];
1615            for (i = 0; i < blockstodecode; i++)
1616                *sample24++ = s->decoded[ch][i] * 256U;
1617        }
1618        break;
1619    }
1620
1621    s->samples -= blockstodecode;
1622
1623    if (avctx->err_recognition & AV_EF_CRCCHECK &&
1624        s->fileversion >= 3900) {
1625        uint32_t crc = s->CRC_state;
1626        const AVCRC *crc_tab = av_crc_get_table(AV_CRC_32_IEEE_LE);
1627        int stride = s->bps == 24 ? 4 : (s->bps>>3);
1628        int offset = s->bps == 24;
1629        int bytes  = s->bps >> 3;
1630
1631        for (i = 0; i < blockstodecode; i++) {
1632            for (ch = 0; ch < s->channels; ch++) {
1633#if HAVE_BIGENDIAN
1634                uint8_t *smp_native = frame->data[ch] + i*stride;
1635                uint8_t smp[4];
1636                for(int j = 0; j<stride; j++)
1637                    smp[j] = smp_native[stride-j-1];
1638#else
1639                uint8_t *smp = frame->data[ch] + i*stride;
1640#endif
1641                crc = av_crc(crc_tab, crc, smp+offset, bytes);
1642            }
1643        }
1644
1645        if (!s->samples && (~crc >> 1) ^ s->CRC) {
1646            av_log(avctx, AV_LOG_ERROR, "CRC mismatch! Previously decoded "
1647                   "frames may have been affected as well.\n");
1648            if (avctx->err_recognition & AV_EF_EXPLODE)
1649                return AVERROR_INVALIDDATA;
1650        }
1651
1652        s->CRC_state = crc;
1653    }
1654
1655    *got_frame_ptr = 1;
1656
1657    return !s->samples ? avpkt->size : 0;
1658}
1659
1660static void ape_flush(AVCodecContext *avctx)
1661{
1662    APEContext *s = avctx->priv_data;
1663    s->samples= 0;
1664}
1665
1666#define OFFSET(x) offsetof(APEContext, x)
1667#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
1668static const AVOption options[] = {
1669    { "max_samples", "maximum number of samples decoded per call",             OFFSET(blocks_per_loop), AV_OPT_TYPE_INT,   { .i64 = 4608 },    1,       INT_MAX, PAR, "max_samples" },
1670    { "all",         "no maximum. decode all samples for each packet at once", 0,                       AV_OPT_TYPE_CONST, { .i64 = INT_MAX }, INT_MIN, INT_MAX, PAR, "max_samples" },
1671    { NULL},
1672};
1673
1674static const AVClass ape_decoder_class = {
1675    .class_name = "APE decoder",
1676    .item_name  = av_default_item_name,
1677    .option     = options,
1678    .version    = LIBAVUTIL_VERSION_INT,
1679};
1680
1681const FFCodec ff_ape_decoder = {
1682    .p.name         = "ape",
1683    .p.long_name    = NULL_IF_CONFIG_SMALL("Monkey's Audio"),
1684    .p.type         = AVMEDIA_TYPE_AUDIO,
1685    .p.id           = AV_CODEC_ID_APE,
1686    .priv_data_size = sizeof(APEContext),
1687    .init           = ape_decode_init,
1688    .close          = ape_decode_close,
1689    FF_CODEC_DECODE_CB(ape_decode_frame),
1690    .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DELAY |
1691                      AV_CODEC_CAP_DR1,
1692    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1693    .flush          = ape_flush,
1694    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
1695                                                      AV_SAMPLE_FMT_S16P,
1696                                                      AV_SAMPLE_FMT_S32P,
1697                                                      AV_SAMPLE_FMT_NONE },
1698    .p.priv_class   = &ape_decoder_class,
1699};
1700