xref: /third_party/ffmpeg/libavcodec/sonic.c (revision cabdff1a)
1/*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "config_components.h"
23
24#include "avcodec.h"
25#include "codec_internal.h"
26#include "encode.h"
27#include "get_bits.h"
28#include "golomb.h"
29#include "internal.h"
30#include "put_golomb.h"
31#include "rangecoder.h"
32
33
34/**
35 * @file
36 * Simple free lossless/lossy audio codec
37 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
38 * Written and designed by Alex Beregszaszi
39 *
40 * TODO:
41 *  - CABAC put/get_symbol
42 *  - independent quantizer for channels
43 *  - >2 channels support
44 *  - more decorrelation types
45 *  - more tap_quant tests
46 *  - selectable intlist writers/readers (bonk-style, golomb, cabac)
47 */
48
49#define MAX_CHANNELS 2
50
51#define MID_SIDE 0
52#define LEFT_SIDE 1
53#define RIGHT_SIDE 2
54
55typedef struct SonicContext {
56    int version;
57    int minor_version;
58    int lossless, decorrelation;
59
60    int num_taps, downsampling;
61    double quantization;
62
63    int channels, samplerate, block_align, frame_size;
64
65    int *tap_quant;
66    int *int_samples;
67    int *coded_samples[MAX_CHANNELS];
68
69    // for encoding
70    int *tail;
71    int tail_size;
72    int *window;
73    int window_size;
74
75    // for decoding
76    int *predictor_k;
77    int *predictor_state[MAX_CHANNELS];
78} SonicContext;
79
80#define LATTICE_SHIFT   10
81#define SAMPLE_SHIFT    4
82#define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
83#define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
84
85#define BASE_QUANT      0.6
86#define RATE_VARIATION  3.0
87
88static inline int shift(int a,int b)
89{
90    return (a+(1<<(b-1))) >> b;
91}
92
93static inline int shift_down(int a,int b)
94{
95    return (a>>b)+(a<0);
96}
97
98static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
99    int i;
100
101#define put_rac(C,S,B) \
102do{\
103    if(rc_stat){\
104        rc_stat[*(S)][B]++;\
105        rc_stat2[(S)-state][B]++;\
106    }\
107    put_rac(C,S,B);\
108}while(0)
109
110    if(v){
111        const int a= FFABS(v);
112        const int e= av_log2(a);
113        put_rac(c, state+0, 0);
114        if(e<=9){
115            for(i=0; i<e; i++){
116                put_rac(c, state+1+i, 1);  //1..10
117            }
118            put_rac(c, state+1+i, 0);
119
120            for(i=e-1; i>=0; i--){
121                put_rac(c, state+22+i, (a>>i)&1); //22..31
122            }
123
124            if(is_signed)
125                put_rac(c, state+11 + e, v < 0); //11..21
126        }else{
127            for(i=0; i<e; i++){
128                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
129            }
130            put_rac(c, state+1+9, 0);
131
132            for(i=e-1; i>=0; i--){
133                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
134            }
135
136            if(is_signed)
137                put_rac(c, state+11 + 10, v < 0); //11..21
138        }
139    }else{
140        put_rac(c, state+0, 1);
141    }
142#undef put_rac
143}
144
145static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
146    if(get_rac(c, state+0))
147        return 0;
148    else{
149        int i, e;
150        unsigned a;
151        e= 0;
152        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
153            e++;
154            if (e > 31)
155                return AVERROR_INVALIDDATA;
156        }
157
158        a= 1;
159        for(i=e-1; i>=0; i--){
160            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
161        }
162
163        e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
164        return (a^e)-e;
165    }
166}
167
168#if 1
169static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
170{
171    int i;
172
173    for (i = 0; i < entries; i++)
174        put_symbol(c, state, buf[i], 1, NULL, NULL);
175
176    return 1;
177}
178
179static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
180{
181    int i;
182
183    for (i = 0; i < entries; i++)
184        buf[i] = get_symbol(c, state, 1);
185
186    return 1;
187}
188#elif 1
189static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
190{
191    int i;
192
193    for (i = 0; i < entries; i++)
194        set_se_golomb(pb, buf[i]);
195
196    return 1;
197}
198
199static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
200{
201    int i;
202
203    for (i = 0; i < entries; i++)
204        buf[i] = get_se_golomb(gb);
205
206    return 1;
207}
208
209#else
210
211#define ADAPT_LEVEL 8
212
213static int bits_to_store(uint64_t x)
214{
215    int res = 0;
216
217    while(x)
218    {
219        res++;
220        x >>= 1;
221    }
222    return res;
223}
224
225static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
226{
227    int i, bits;
228
229    if (!max)
230        return;
231
232    bits = bits_to_store(max);
233
234    for (i = 0; i < bits-1; i++)
235        put_bits(pb, 1, value & (1 << i));
236
237    if ( (value | (1 << (bits-1))) <= max)
238        put_bits(pb, 1, value & (1 << (bits-1)));
239}
240
241static unsigned int read_uint_max(GetBitContext *gb, int max)
242{
243    int i, bits, value = 0;
244
245    if (!max)
246        return 0;
247
248    bits = bits_to_store(max);
249
250    for (i = 0; i < bits-1; i++)
251        if (get_bits1(gb))
252            value += 1 << i;
253
254    if ( (value | (1<<(bits-1))) <= max)
255        if (get_bits1(gb))
256            value += 1 << (bits-1);
257
258    return value;
259}
260
261static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
262{
263    int i, j, x = 0, low_bits = 0, max = 0;
264    int step = 256, pos = 0, dominant = 0, any = 0;
265    int *copy, *bits;
266
267    copy = av_calloc(entries, sizeof(*copy));
268    if (!copy)
269        return AVERROR(ENOMEM);
270
271    if (base_2_part)
272    {
273        int energy = 0;
274
275        for (i = 0; i < entries; i++)
276            energy += abs(buf[i]);
277
278        low_bits = bits_to_store(energy / (entries * 2));
279        if (low_bits > 15)
280            low_bits = 15;
281
282        put_bits(pb, 4, low_bits);
283    }
284
285    for (i = 0; i < entries; i++)
286    {
287        put_bits(pb, low_bits, abs(buf[i]));
288        copy[i] = abs(buf[i]) >> low_bits;
289        if (copy[i] > max)
290            max = abs(copy[i]);
291    }
292
293    bits = av_calloc(entries*max, sizeof(*bits));
294    if (!bits)
295    {
296        av_free(copy);
297        return AVERROR(ENOMEM);
298    }
299
300    for (i = 0; i <= max; i++)
301    {
302        for (j = 0; j < entries; j++)
303            if (copy[j] >= i)
304                bits[x++] = copy[j] > i;
305    }
306
307    // store bitstream
308    while (pos < x)
309    {
310        int steplet = step >> 8;
311
312        if (pos + steplet > x)
313            steplet = x - pos;
314
315        for (i = 0; i < steplet; i++)
316            if (bits[i+pos] != dominant)
317                any = 1;
318
319        put_bits(pb, 1, any);
320
321        if (!any)
322        {
323            pos += steplet;
324            step += step / ADAPT_LEVEL;
325        }
326        else
327        {
328            int interloper = 0;
329
330            while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
331                interloper++;
332
333            // note change
334            write_uint_max(pb, interloper, (step >> 8) - 1);
335
336            pos += interloper + 1;
337            step -= step / ADAPT_LEVEL;
338        }
339
340        if (step < 256)
341        {
342            step = 65536 / step;
343            dominant = !dominant;
344        }
345    }
346
347    // store signs
348    for (i = 0; i < entries; i++)
349        if (buf[i])
350            put_bits(pb, 1, buf[i] < 0);
351
352    av_free(bits);
353    av_free(copy);
354
355    return 0;
356}
357
358static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
359{
360    int i, low_bits = 0, x = 0;
361    int n_zeros = 0, step = 256, dominant = 0;
362    int pos = 0, level = 0;
363    int *bits = av_calloc(entries, sizeof(*bits));
364
365    if (!bits)
366        return AVERROR(ENOMEM);
367
368    if (base_2_part)
369    {
370        low_bits = get_bits(gb, 4);
371
372        if (low_bits)
373            for (i = 0; i < entries; i++)
374                buf[i] = get_bits(gb, low_bits);
375    }
376
377//    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
378
379    while (n_zeros < entries)
380    {
381        int steplet = step >> 8;
382
383        if (!get_bits1(gb))
384        {
385            for (i = 0; i < steplet; i++)
386                bits[x++] = dominant;
387
388            if (!dominant)
389                n_zeros += steplet;
390
391            step += step / ADAPT_LEVEL;
392        }
393        else
394        {
395            int actual_run = read_uint_max(gb, steplet-1);
396
397//            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
398
399            for (i = 0; i < actual_run; i++)
400                bits[x++] = dominant;
401
402            bits[x++] = !dominant;
403
404            if (!dominant)
405                n_zeros += actual_run;
406            else
407                n_zeros++;
408
409            step -= step / ADAPT_LEVEL;
410        }
411
412        if (step < 256)
413        {
414            step = 65536 / step;
415            dominant = !dominant;
416        }
417    }
418
419    // reconstruct unsigned values
420    n_zeros = 0;
421    for (i = 0; n_zeros < entries; i++)
422    {
423        while(1)
424        {
425            if (pos >= entries)
426            {
427                pos = 0;
428                level += 1 << low_bits;
429            }
430
431            if (buf[pos] >= level)
432                break;
433
434            pos++;
435        }
436
437        if (bits[i])
438            buf[pos] += 1 << low_bits;
439        else
440            n_zeros++;
441
442        pos++;
443    }
444    av_free(bits);
445
446    // read signs
447    for (i = 0; i < entries; i++)
448        if (buf[i] && get_bits1(gb))
449            buf[i] = -buf[i];
450
451//    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
452
453    return 0;
454}
455#endif
456
457static void predictor_init_state(int *k, int *state, int order)
458{
459    int i;
460
461    for (i = order-2; i >= 0; i--)
462    {
463        int j, p, x = state[i];
464
465        for (j = 0, p = i+1; p < order; j++,p++)
466            {
467            int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
468            state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
469            x = tmp;
470        }
471    }
472}
473
474static int predictor_calc_error(int *k, int *state, int order, int error)
475{
476    int i, x = error - (unsigned)shift_down(k[order-1] *  (unsigned)state[order-1], LATTICE_SHIFT);
477
478#if 1
479    int *k_ptr = &(k[order-2]),
480        *state_ptr = &(state[order-2]);
481    for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
482    {
483        int k_value = *k_ptr, state_value = *state_ptr;
484        x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
485        state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
486    }
487#else
488    for (i = order-2; i >= 0; i--)
489    {
490        x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
491        state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
492    }
493#endif
494
495    // don't drift too far, to avoid overflows
496    if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
497    if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
498
499    state[0] = x;
500
501    return x;
502}
503
504#if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
505// Heavily modified Levinson-Durbin algorithm which
506// copes better with quantization, and calculates the
507// actual whitened result as it goes.
508
509static void modified_levinson_durbin(int *window, int window_entries,
510        int *out, int out_entries, int channels, int *tap_quant)
511{
512    int i;
513    int *state = window + window_entries;
514
515    memcpy(state, window, window_entries * sizeof(*state));
516
517    for (i = 0; i < out_entries; i++)
518    {
519        int step = (i+1)*channels, k, j;
520        double xx = 0.0, xy = 0.0;
521#if 1
522        int *x_ptr = &(window[step]);
523        int *state_ptr = &(state[0]);
524        j = window_entries - step;
525        for (;j>0;j--,x_ptr++,state_ptr++)
526        {
527            double x_value = *x_ptr;
528            double state_value = *state_ptr;
529            xx += state_value*state_value;
530            xy += x_value*state_value;
531        }
532#else
533        for (j = 0; j <= (window_entries - step); j++);
534        {
535            double stepval = window[step+j];
536            double stateval = window[j];
537//            xx += (double)window[j]*(double)window[j];
538//            xy += (double)window[step+j]*(double)window[j];
539            xx += stateval*stateval;
540            xy += stepval*stateval;
541        }
542#endif
543        if (xx == 0.0)
544            k = 0;
545        else
546            k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
547
548        if (k > (LATTICE_FACTOR/tap_quant[i]))
549            k = LATTICE_FACTOR/tap_quant[i];
550        if (-k > (LATTICE_FACTOR/tap_quant[i]))
551            k = -(LATTICE_FACTOR/tap_quant[i]);
552
553        out[i] = k;
554        k *= tap_quant[i];
555
556#if 1
557        x_ptr = &(window[step]);
558        state_ptr = &(state[0]);
559        j = window_entries - step;
560        for (;j>0;j--,x_ptr++,state_ptr++)
561        {
562            int x_value = *x_ptr;
563            int state_value = *state_ptr;
564            *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
565            *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
566        }
567#else
568        for (j=0; j <= (window_entries - step); j++)
569        {
570            int stepval = window[step+j];
571            int stateval=state[j];
572            window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
573            state[j] += shift_down(k * stepval, LATTICE_SHIFT);
574        }
575#endif
576    }
577}
578
579static inline int code_samplerate(int samplerate)
580{
581    switch (samplerate)
582    {
583        case 44100: return 0;
584        case 22050: return 1;
585        case 11025: return 2;
586        case 96000: return 3;
587        case 48000: return 4;
588        case 32000: return 5;
589        case 24000: return 6;
590        case 16000: return 7;
591        case 8000: return 8;
592    }
593    return AVERROR(EINVAL);
594}
595
596static av_cold int sonic_encode_init(AVCodecContext *avctx)
597{
598    SonicContext *s = avctx->priv_data;
599    int *coded_samples;
600    PutBitContext pb;
601    int i;
602
603    s->version = 2;
604
605    if (avctx->ch_layout.nb_channels > MAX_CHANNELS)
606    {
607        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
608        return AVERROR(EINVAL); /* only stereo or mono for now */
609    }
610
611    if (avctx->ch_layout.nb_channels == 2)
612        s->decorrelation = MID_SIDE;
613    else
614        s->decorrelation = 3;
615
616    if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
617    {
618        s->lossless = 1;
619        s->num_taps = 32;
620        s->downsampling = 1;
621        s->quantization = 0.0;
622    }
623    else
624    {
625        s->num_taps = 128;
626        s->downsampling = 2;
627        s->quantization = 1.0;
628    }
629
630    // max tap 2048
631    if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
632        av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
633        return AVERROR_INVALIDDATA;
634    }
635
636    // generate taps
637    s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
638    if (!s->tap_quant)
639        return AVERROR(ENOMEM);
640
641    for (i = 0; i < s->num_taps; i++)
642        s->tap_quant[i] = ff_sqrt(i+1);
643
644    s->channels = avctx->ch_layout.nb_channels;
645    s->samplerate = avctx->sample_rate;
646
647    s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
648    s->frame_size = s->channels*s->block_align*s->downsampling;
649
650    s->tail_size = s->num_taps*s->channels;
651    s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
652    if (!s->tail)
653        return AVERROR(ENOMEM);
654
655    s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
656    if (!s->predictor_k)
657        return AVERROR(ENOMEM);
658
659    coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
660    if (!coded_samples)
661        return AVERROR(ENOMEM);
662    for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
663        s->coded_samples[i] = coded_samples;
664
665    s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
666
667    s->window_size = ((2*s->tail_size)+s->frame_size);
668    s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
669    if (!s->window || !s->int_samples)
670        return AVERROR(ENOMEM);
671
672    avctx->extradata = av_mallocz(16);
673    if (!avctx->extradata)
674        return AVERROR(ENOMEM);
675    init_put_bits(&pb, avctx->extradata, 16*8);
676
677    put_bits(&pb, 2, s->version); // version
678    if (s->version >= 1)
679    {
680        if (s->version >= 2) {
681            put_bits(&pb, 8, s->version);
682            put_bits(&pb, 8, s->minor_version);
683        }
684        put_bits(&pb, 2, s->channels);
685        put_bits(&pb, 4, code_samplerate(s->samplerate));
686    }
687    put_bits(&pb, 1, s->lossless);
688    if (!s->lossless)
689        put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
690    put_bits(&pb, 2, s->decorrelation);
691    put_bits(&pb, 2, s->downsampling);
692    put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
693    put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
694
695    flush_put_bits(&pb);
696    avctx->extradata_size = put_bytes_output(&pb);
697
698    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
699        s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
700
701    avctx->frame_size = s->block_align*s->downsampling;
702
703    return 0;
704}
705
706static av_cold int sonic_encode_close(AVCodecContext *avctx)
707{
708    SonicContext *s = avctx->priv_data;
709
710    av_freep(&s->coded_samples[0]);
711    av_freep(&s->predictor_k);
712    av_freep(&s->tail);
713    av_freep(&s->tap_quant);
714    av_freep(&s->window);
715    av_freep(&s->int_samples);
716
717    return 0;
718}
719
720static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
721                              const AVFrame *frame, int *got_packet_ptr)
722{
723    SonicContext *s = avctx->priv_data;
724    RangeCoder c;
725    int i, j, ch, quant = 0, x = 0;
726    int ret;
727    const short *samples = (const int16_t*)frame->data[0];
728    uint8_t state[32];
729
730    if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
731        return ret;
732
733    ff_init_range_encoder(&c, avpkt->data, avpkt->size);
734    ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
735    memset(state, 128, sizeof(state));
736
737    // short -> internal
738    for (i = 0; i < s->frame_size; i++)
739        s->int_samples[i] = samples[i];
740
741    if (!s->lossless)
742        for (i = 0; i < s->frame_size; i++)
743            s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
744
745    switch(s->decorrelation)
746    {
747        case MID_SIDE:
748            for (i = 0; i < s->frame_size; i += s->channels)
749            {
750                s->int_samples[i] += s->int_samples[i+1];
751                s->int_samples[i+1] -= shift(s->int_samples[i], 1);
752            }
753            break;
754        case LEFT_SIDE:
755            for (i = 0; i < s->frame_size; i += s->channels)
756                s->int_samples[i+1] -= s->int_samples[i];
757            break;
758        case RIGHT_SIDE:
759            for (i = 0; i < s->frame_size; i += s->channels)
760                s->int_samples[i] -= s->int_samples[i+1];
761            break;
762    }
763
764    memset(s->window, 0, s->window_size * sizeof(*s->window));
765
766    for (i = 0; i < s->tail_size; i++)
767        s->window[x++] = s->tail[i];
768
769    for (i = 0; i < s->frame_size; i++)
770        s->window[x++] = s->int_samples[i];
771
772    for (i = 0; i < s->tail_size; i++)
773        s->window[x++] = 0;
774
775    for (i = 0; i < s->tail_size; i++)
776        s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
777
778    // generate taps
779    modified_levinson_durbin(s->window, s->window_size,
780                s->predictor_k, s->num_taps, s->channels, s->tap_quant);
781
782    if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
783        return ret;
784
785    for (ch = 0; ch < s->channels; ch++)
786    {
787        x = s->tail_size+ch;
788        for (i = 0; i < s->block_align; i++)
789        {
790            int sum = 0;
791            for (j = 0; j < s->downsampling; j++, x += s->channels)
792                sum += s->window[x];
793            s->coded_samples[ch][i] = sum;
794        }
795    }
796
797    // simple rate control code
798    if (!s->lossless)
799    {
800        double energy1 = 0.0, energy2 = 0.0;
801        for (ch = 0; ch < s->channels; ch++)
802        {
803            for (i = 0; i < s->block_align; i++)
804            {
805                double sample = s->coded_samples[ch][i];
806                energy2 += sample*sample;
807                energy1 += fabs(sample);
808            }
809        }
810
811        energy2 = sqrt(energy2/(s->channels*s->block_align));
812        energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
813
814        // increase bitrate when samples are like a gaussian distribution
815        // reduce bitrate when samples are like a two-tailed exponential distribution
816
817        if (energy2 > energy1)
818            energy2 += (energy2-energy1)*RATE_VARIATION;
819
820        quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
821//        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
822
823        quant = av_clip(quant, 1, 65534);
824
825        put_symbol(&c, state, quant, 0, NULL, NULL);
826
827        quant *= SAMPLE_FACTOR;
828    }
829
830    // write out coded samples
831    for (ch = 0; ch < s->channels; ch++)
832    {
833        if (!s->lossless)
834            for (i = 0; i < s->block_align; i++)
835                s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
836
837        if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
838            return ret;
839    }
840
841    avpkt->size = ff_rac_terminate(&c, 0);
842    *got_packet_ptr = 1;
843    return 0;
844
845}
846#endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
847
848#if CONFIG_SONIC_DECODER
849static const int samplerate_table[] =
850    { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
851
852static av_cold int sonic_decode_init(AVCodecContext *avctx)
853{
854    SonicContext *s = avctx->priv_data;
855    int *tmp;
856    GetBitContext gb;
857    int i;
858    int ret;
859
860    s->channels = avctx->ch_layout.nb_channels;
861    s->samplerate = avctx->sample_rate;
862
863    if (!avctx->extradata)
864    {
865        av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
866        return AVERROR_INVALIDDATA;
867    }
868
869    ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
870    if (ret < 0)
871        return ret;
872
873    s->version = get_bits(&gb, 2);
874    if (s->version >= 2) {
875        s->version       = get_bits(&gb, 8);
876        s->minor_version = get_bits(&gb, 8);
877    }
878    if (s->version != 2)
879    {
880        av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
881        return AVERROR_INVALIDDATA;
882    }
883
884    if (s->version >= 1)
885    {
886        int sample_rate_index;
887        s->channels = get_bits(&gb, 2);
888        sample_rate_index = get_bits(&gb, 4);
889        if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
890            av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
891            return AVERROR_INVALIDDATA;
892        }
893        s->samplerate = samplerate_table[sample_rate_index];
894        av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
895            s->channels, s->samplerate);
896    }
897
898    if (s->channels > MAX_CHANNELS || s->channels < 1)
899    {
900        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
901        return AVERROR_INVALIDDATA;
902    }
903    av_channel_layout_uninit(&avctx->ch_layout);
904    avctx->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
905    avctx->ch_layout.nb_channels = s->channels;
906
907    s->lossless = get_bits1(&gb);
908    if (!s->lossless)
909        skip_bits(&gb, 3); // XXX FIXME
910    s->decorrelation = get_bits(&gb, 2);
911    if (s->decorrelation != 3 && s->channels != 2) {
912        av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
913        return AVERROR_INVALIDDATA;
914    }
915
916    s->downsampling = get_bits(&gb, 2);
917    if (!s->downsampling) {
918        av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
919        return AVERROR_INVALIDDATA;
920    }
921
922    s->num_taps = (get_bits(&gb, 5)+1)<<5;
923    if (get_bits1(&gb)) // XXX FIXME
924        av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
925
926    s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
927    s->frame_size = s->channels*s->block_align*s->downsampling;
928//    avctx->frame_size = s->block_align;
929
930    if (s->num_taps * s->channels > s->frame_size) {
931        av_log(avctx, AV_LOG_ERROR,
932               "number of taps times channels (%d * %d) larger than frame size %d\n",
933               s->num_taps, s->channels, s->frame_size);
934        return AVERROR_INVALIDDATA;
935    }
936
937    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
938        s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
939
940    // generate taps
941    s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
942    if (!s->tap_quant)
943        return AVERROR(ENOMEM);
944
945    for (i = 0; i < s->num_taps; i++)
946        s->tap_quant[i] = ff_sqrt(i+1);
947
948    s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
949
950    tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
951    if (!tmp)
952        return AVERROR(ENOMEM);
953    for (i = 0; i < s->channels; i++, tmp += s->num_taps)
954        s->predictor_state[i] = tmp;
955
956    tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
957    if (!tmp)
958        return AVERROR(ENOMEM);
959    for (i = 0; i < s->channels; i++, tmp += s->block_align)
960        s->coded_samples[i]   = tmp;
961
962    s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
963    if (!s->int_samples)
964        return AVERROR(ENOMEM);
965
966    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
967    return 0;
968}
969
970static av_cold int sonic_decode_close(AVCodecContext *avctx)
971{
972    SonicContext *s = avctx->priv_data;
973
974    av_freep(&s->int_samples);
975    av_freep(&s->tap_quant);
976    av_freep(&s->predictor_k);
977    av_freep(&s->predictor_state[0]);
978    av_freep(&s->coded_samples[0]);
979
980    return 0;
981}
982
983static int sonic_decode_frame(AVCodecContext *avctx, AVFrame *frame,
984                              int *got_frame_ptr, AVPacket *avpkt)
985{
986    const uint8_t *buf = avpkt->data;
987    int buf_size = avpkt->size;
988    SonicContext *s = avctx->priv_data;
989    RangeCoder c;
990    uint8_t state[32];
991    int i, quant, ch, j, ret;
992    int16_t *samples;
993
994    if (buf_size == 0) return 0;
995
996    frame->nb_samples = s->frame_size / avctx->ch_layout.nb_channels;
997    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
998        return ret;
999    samples = (int16_t *)frame->data[0];
1000
1001//    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
1002
1003    memset(state, 128, sizeof(state));
1004    ff_init_range_decoder(&c, buf, buf_size);
1005    ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1006
1007    intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1008
1009    // dequantize
1010    for (i = 0; i < s->num_taps; i++)
1011        s->predictor_k[i] *= (unsigned) s->tap_quant[i];
1012
1013    if (s->lossless)
1014        quant = 1;
1015    else
1016        quant = get_symbol(&c, state, 0) * (unsigned)SAMPLE_FACTOR;
1017
1018//    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1019
1020    for (ch = 0; ch < s->channels; ch++)
1021    {
1022        int x = ch;
1023
1024        if (c.overread > MAX_OVERREAD)
1025            return AVERROR_INVALIDDATA;
1026
1027        predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1028
1029        intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1030
1031        for (i = 0; i < s->block_align; i++)
1032        {
1033            for (j = 0; j < s->downsampling - 1; j++)
1034            {
1035                s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1036                x += s->channels;
1037            }
1038
1039            s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1040            x += s->channels;
1041        }
1042
1043        for (i = 0; i < s->num_taps; i++)
1044            s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1045    }
1046
1047    switch(s->decorrelation)
1048    {
1049        case MID_SIDE:
1050            for (i = 0; i < s->frame_size; i += s->channels)
1051            {
1052                s->int_samples[i+1] += shift(s->int_samples[i], 1);
1053                s->int_samples[i] -= s->int_samples[i+1];
1054            }
1055            break;
1056        case LEFT_SIDE:
1057            for (i = 0; i < s->frame_size; i += s->channels)
1058                s->int_samples[i+1] += s->int_samples[i];
1059            break;
1060        case RIGHT_SIDE:
1061            for (i = 0; i < s->frame_size; i += s->channels)
1062                s->int_samples[i] += s->int_samples[i+1];
1063            break;
1064    }
1065
1066    if (!s->lossless)
1067        for (i = 0; i < s->frame_size; i++)
1068            s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1069
1070    // internal -> short
1071    for (i = 0; i < s->frame_size; i++)
1072        samples[i] = av_clip_int16(s->int_samples[i]);
1073
1074    *got_frame_ptr = 1;
1075
1076    return buf_size;
1077}
1078
1079const FFCodec ff_sonic_decoder = {
1080    .p.name         = "sonic",
1081    .p.long_name    = NULL_IF_CONFIG_SMALL("Sonic"),
1082    .p.type         = AVMEDIA_TYPE_AUDIO,
1083    .p.id           = AV_CODEC_ID_SONIC,
1084    .priv_data_size = sizeof(SonicContext),
1085    .init           = sonic_decode_init,
1086    .close          = sonic_decode_close,
1087    FF_CODEC_DECODE_CB(sonic_decode_frame),
1088    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
1089    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1090};
1091#endif /* CONFIG_SONIC_DECODER */
1092
1093#if CONFIG_SONIC_ENCODER
1094const FFCodec ff_sonic_encoder = {
1095    .p.name         = "sonic",
1096    .p.long_name    = NULL_IF_CONFIG_SMALL("Sonic"),
1097    .p.type         = AVMEDIA_TYPE_AUDIO,
1098    .p.id           = AV_CODEC_ID_SONIC,
1099    .priv_data_size = sizeof(SonicContext),
1100    .init           = sonic_encode_init,
1101    FF_CODEC_ENCODE_CB(sonic_encode_frame),
1102    .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1103    .p.capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1104    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1105    .close          = sonic_encode_close,
1106};
1107#endif
1108
1109#if CONFIG_SONIC_LS_ENCODER
1110const FFCodec ff_sonic_ls_encoder = {
1111    .p.name         = "sonicls",
1112    .p.long_name    = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1113    .p.type         = AVMEDIA_TYPE_AUDIO,
1114    .p.id           = AV_CODEC_ID_SONIC_LS,
1115    .priv_data_size = sizeof(SonicContext),
1116    .init           = sonic_encode_init,
1117    FF_CODEC_ENCODE_CB(sonic_encode_frame),
1118    .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1119    .p.capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1120    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1121    .close          = sonic_encode_close,
1122};
1123#endif
1124