1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "libavutil/common.h"
39#include "libavutil/float_dsp.h"
40#include "libavutil/mathematics.h"
41#include "libavutil/thread.h"
42
43#include "audio_frame_queue.h"
44#include "avcodec.h"
45#include "codec_internal.h"
46#include "encode.h"
47#include "fft.h"
48#include "nellymoser.h"
49#include "sinewin.h"
50
51#define BITSTREAM_WRITER_LE
52#include "put_bits.h"
53
54#define POW_TABLE_SIZE (1<<11)
55#define POW_TABLE_OFFSET 3
56#define OPT_SIZE ((1<<15) + 3000)
57
58typedef struct NellyMoserEncodeContext {
59    AVCodecContext  *avctx;
60    int             last_frame;
61    AVFloatDSPContext *fdsp;
62    FFTContext      mdct_ctx;
63    AudioFrameQueue afq;
64    DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
65    DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
66    DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
67    float           (*opt )[OPT_SIZE];
68    uint8_t         (*path)[OPT_SIZE];
69} NellyMoserEncodeContext;
70
71static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
72
73static const uint8_t sf_lut[96] = {
74     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
75     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
76    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
77    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
78    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
79    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80};
81
82static const uint8_t sf_delta_lut[78] = {
83     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
84     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
85    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
86    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
87    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88};
89
90static const uint8_t quant_lut[230] = {
91     0,
92
93     0,  1,  2,
94
95     0,  1,  2,  3,  4,  5,  6,
96
97     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
98    12, 13, 13, 13, 14,
99
100     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
101     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
102    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103    30,
104
105     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
106     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
107    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
108    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
109    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
110    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
111    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
112    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
113    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114    61, 61, 61, 61, 62,
115};
116
117static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
118static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
119static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120
121static void apply_mdct(NellyMoserEncodeContext *s)
122{
123    float *in0 = s->buf;
124    float *in1 = s->buf + NELLY_BUF_LEN;
125    float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126
127    s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
128    s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
129    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130
131    s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
132    s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
133    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134}
135
136static av_cold int encode_end(AVCodecContext *avctx)
137{
138    NellyMoserEncodeContext *s = avctx->priv_data;
139
140    ff_mdct_end(&s->mdct_ctx);
141
142    av_freep(&s->opt);
143    av_freep(&s->path);
144    ff_af_queue_close(&s->afq);
145    av_freep(&s->fdsp);
146
147    return 0;
148}
149
150static av_cold void nellymoser_init_static(void)
151{
152    /* faster way of doing
153    for (int i = 0; i < POW_TABLE_SIZE; i++)
154       pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
155    pow_table[0] = 1;
156    pow_table[1024] = M_SQRT1_2;
157    for (int i = 1; i < 513; i++) {
158        double tmp = exp2(-i / 2048.0);
159        pow_table[i] = tmp;
160        pow_table[1024-i] = M_SQRT1_2 / tmp;
161        pow_table[1024+i] = tmp * M_SQRT1_2;
162        pow_table[2048-i] = 0.5 / tmp;
163    }
164    /* Generate overlap window */
165    ff_init_ff_sine_windows(7);
166}
167
168static av_cold int encode_init(AVCodecContext *avctx)
169{
170    static AVOnce init_static_once = AV_ONCE_INIT;
171    NellyMoserEncodeContext *s = avctx->priv_data;
172    int ret;
173
174    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
175        avctx->sample_rate != 11025 &&
176        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
177        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
178        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
179        return AVERROR(EINVAL);
180    }
181
182    avctx->frame_size = NELLY_SAMPLES;
183    avctx->initial_padding = NELLY_BUF_LEN;
184    ff_af_queue_init(avctx, &s->afq);
185    s->avctx = avctx;
186    if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
187        return ret;
188    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
189    if (!s->fdsp)
190        return AVERROR(ENOMEM);
191
192    if (s->avctx->trellis) {
193        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
194        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
195        if (!s->opt || !s->path)
196            return AVERROR(ENOMEM);
197    }
198
199    ff_thread_once(&init_static_once, nellymoser_init_static);
200
201    return 0;
202}
203
204#define find_best(val, table, LUT, LUT_add, LUT_size) \
205    best_idx = \
206        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
207    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
208        best_idx++;
209
210static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211{
212    int band, best_idx, power_idx = 0;
213    float power_candidate;
214
215    //base exponent
216    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
217    idx_table[0] = best_idx;
218    power_idx = ff_nelly_init_table[best_idx];
219
220    for (band = 1; band < NELLY_BANDS; band++) {
221        power_candidate = cand[band] - power_idx;
222        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
223        idx_table[band] = best_idx;
224        power_idx += ff_nelly_delta_table[best_idx];
225    }
226}
227
228static inline float distance(float x, float y, int band)
229{
230    //return pow(fabs(x-y), 2.0);
231    float tmp = x - y;
232    return tmp * tmp;
233}
234
235static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
236{
237    int i, j, band, best_idx;
238    float power_candidate, best_val;
239
240    float  (*opt )[OPT_SIZE] = s->opt ;
241    uint8_t(*path)[OPT_SIZE] = s->path;
242
243    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
244        opt[0][i] = INFINITY;
245    }
246
247    for (i = 0; i < 64; i++) {
248        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
249        path[0][ff_nelly_init_table[i]] = i;
250    }
251
252    for (band = 1; band < NELLY_BANDS; band++) {
253        int q, c = 0;
254        float tmp;
255        int idx_min, idx_max, idx;
256        power_candidate = cand[band];
257        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
258            idx_min = FFMAX(0, cand[band] - q);
259            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
260            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
261                if ( isinf(opt[band - 1][i]) )
262                    continue;
263                for (j = 0; j < 32; j++) {
264                    idx = i + ff_nelly_delta_table[j];
265                    if (idx > idx_max)
266                        break;
267                    if (idx >= idx_min) {
268                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
269                        if (opt[band][idx] > tmp) {
270                            opt[band][idx] = tmp;
271                            path[band][idx] = j;
272                            c = 1;
273                        }
274                    }
275                }
276            }
277        }
278        av_assert1(c); //FIXME
279    }
280
281    best_val = INFINITY;
282    best_idx = -1;
283    band = NELLY_BANDS - 1;
284    for (i = 0; i < OPT_SIZE; i++) {
285        if (best_val > opt[band][i]) {
286            best_val = opt[band][i];
287            best_idx = i;
288        }
289    }
290    for (band = NELLY_BANDS - 1; band >= 0; band--) {
291        idx_table[band] = path[band][best_idx];
292        if (band) {
293            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
294        }
295    }
296}
297
298/**
299 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
300 *  @param s               encoder context
301 *  @param output          output buffer
302 *  @param output_size     size of output buffer
303 */
304static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
305{
306    PutBitContext pb;
307    int i, j, band, block, best_idx, power_idx = 0;
308    float power_val, coeff, coeff_sum;
309    float pows[NELLY_FILL_LEN];
310    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
311    float cand[NELLY_BANDS];
312
313    apply_mdct(s);
314
315    init_put_bits(&pb, output, output_size);
316
317    i = 0;
318    for (band = 0; band < NELLY_BANDS; band++) {
319        coeff_sum = 0;
320        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
322                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
323        }
324        cand[band] =
325            log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
326    }
327
328    if (s->avctx->trellis) {
329        get_exponent_dynamic(s, cand, idx_table);
330    } else {
331        get_exponent_greedy(s, cand, idx_table);
332    }
333
334    i = 0;
335    for (band = 0; band < NELLY_BANDS; band++) {
336        if (band) {
337            power_idx += ff_nelly_delta_table[idx_table[band]];
338            put_bits(&pb, 5, idx_table[band]);
339        } else {
340            power_idx = ff_nelly_init_table[idx_table[0]];
341            put_bits(&pb, 6, idx_table[0]);
342        }
343        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
344        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
345            s->mdct_out[i] *= power_val;
346            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
347            pows[i] = power_idx;
348        }
349    }
350
351    ff_nelly_get_sample_bits(pows, bits);
352
353    for (block = 0; block < 2; block++) {
354        for (i = 0; i < NELLY_FILL_LEN; i++) {
355            if (bits[i] > 0) {
356                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
357                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
358                best_idx =
359                    quant_lut[av_clip (
360                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
361                            quant_lut_offset[bits[i]],
362                            quant_lut_offset[bits[i]+1] - 1
363                            )];
364                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
365                    best_idx++;
366
367                put_bits(&pb, bits[i], best_idx);
368            }
369        }
370        if (!block)
371            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
372    }
373
374    flush_put_bits(&pb);
375    memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
376}
377
378static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
379                        const AVFrame *frame, int *got_packet_ptr)
380{
381    NellyMoserEncodeContext *s = avctx->priv_data;
382    int ret;
383
384    if (s->last_frame)
385        return 0;
386
387    memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
388    if (frame) {
389        memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
390               frame->nb_samples * sizeof(*s->buf));
391        if (frame->nb_samples < NELLY_SAMPLES) {
392            memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
393                   (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
394            if (frame->nb_samples >= NELLY_BUF_LEN)
395                s->last_frame = 1;
396        }
397        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
398            return ret;
399    } else {
400        memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
401        s->last_frame = 1;
402    }
403
404    if ((ret = ff_get_encode_buffer(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
405        return ret;
406    encode_block(s, avpkt->data, avpkt->size);
407
408    /* Get the next frame pts/duration */
409    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
410                       &avpkt->duration);
411
412    *got_packet_ptr = 1;
413    return 0;
414}
415
416const FFCodec ff_nellymoser_encoder = {
417    .p.name         = "nellymoser",
418    .p.long_name    = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
419    .p.type         = AVMEDIA_TYPE_AUDIO,
420    .p.id           = AV_CODEC_ID_NELLYMOSER,
421    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
422                      AV_CODEC_CAP_SMALL_LAST_FRAME,
423    .priv_data_size = sizeof(NellyMoserEncodeContext),
424    .init           = encode_init,
425    FF_CODEC_ENCODE_CB(encode_frame),
426    .close          = encode_end,
427    .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
428                                                     AV_SAMPLE_FMT_NONE },
429    .p.ch_layouts   = (const AVChannelLayout[]){ AV_CHANNEL_LAYOUT_MONO, { 0 } },
430    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
431};
432