1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 #include "libavutil/thread.h"
42 
43 #include "audio_frame_queue.h"
44 #include "avcodec.h"
45 #include "codec_internal.h"
46 #include "encode.h"
47 #include "fft.h"
48 #include "nellymoser.h"
49 #include "sinewin.h"
50 
51 #define BITSTREAM_WRITER_LE
52 #include "put_bits.h"
53 
54 #define POW_TABLE_SIZE (1<<11)
55 #define POW_TABLE_OFFSET 3
56 #define OPT_SIZE ((1<<15) + 3000)
57 
58 typedef struct NellyMoserEncodeContext {
59     AVCodecContext  *avctx;
60     int             last_frame;
61     AVFloatDSPContext *fdsp;
62     FFTContext      mdct_ctx;
63     AudioFrameQueue afq;
64     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
65     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
66     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
67     float           (*opt )[OPT_SIZE];
68     uint8_t         (*path)[OPT_SIZE];
69 } NellyMoserEncodeContext;
70 
71 static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
72 
73 static const uint8_t sf_lut[96] = {
74      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
75      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
76     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
77     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
78     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
79     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 };
81 
82 static const uint8_t sf_delta_lut[78] = {
83      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
84      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
85     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
86     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
87     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 };
89 
90 static const uint8_t quant_lut[230] = {
91      0,
92 
93      0,  1,  2,
94 
95      0,  1,  2,  3,  4,  5,  6,
96 
97      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
98     12, 13, 13, 13, 14,
99 
100      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
101      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
102     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103     30,
104 
105      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
106      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
107     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
108     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
109     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
110     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
111     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
112     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
113     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114     61, 61, 61, 61, 62,
115 };
116 
117 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
118 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
119 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120 
apply_mdct(NellyMoserEncodeContext *s)121 static void apply_mdct(NellyMoserEncodeContext *s)
122 {
123     float *in0 = s->buf;
124     float *in1 = s->buf + NELLY_BUF_LEN;
125     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126 
127     s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
128     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
129     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130 
131     s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
132     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
133     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 }
135 
encode_end(AVCodecContext *avctx)136 static av_cold int encode_end(AVCodecContext *avctx)
137 {
138     NellyMoserEncodeContext *s = avctx->priv_data;
139 
140     ff_mdct_end(&s->mdct_ctx);
141 
142     av_freep(&s->opt);
143     av_freep(&s->path);
144     ff_af_queue_close(&s->afq);
145     av_freep(&s->fdsp);
146 
147     return 0;
148 }
149 
nellymoser_init_static(void)150 static av_cold void nellymoser_init_static(void)
151 {
152     /* faster way of doing
153     for (int i = 0; i < POW_TABLE_SIZE; i++)
154        pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
155     pow_table[0] = 1;
156     pow_table[1024] = M_SQRT1_2;
157     for (int i = 1; i < 513; i++) {
158         double tmp = exp2(-i / 2048.0);
159         pow_table[i] = tmp;
160         pow_table[1024-i] = M_SQRT1_2 / tmp;
161         pow_table[1024+i] = tmp * M_SQRT1_2;
162         pow_table[2048-i] = 0.5 / tmp;
163     }
164     /* Generate overlap window */
165     ff_init_ff_sine_windows(7);
166 }
167 
encode_init(AVCodecContext *avctx)168 static av_cold int encode_init(AVCodecContext *avctx)
169 {
170     static AVOnce init_static_once = AV_ONCE_INIT;
171     NellyMoserEncodeContext *s = avctx->priv_data;
172     int ret;
173 
174     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
175         avctx->sample_rate != 11025 &&
176         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
177         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
178         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
179         return AVERROR(EINVAL);
180     }
181 
182     avctx->frame_size = NELLY_SAMPLES;
183     avctx->initial_padding = NELLY_BUF_LEN;
184     ff_af_queue_init(avctx, &s->afq);
185     s->avctx = avctx;
186     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
187         return ret;
188     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
189     if (!s->fdsp)
190         return AVERROR(ENOMEM);
191 
192     if (s->avctx->trellis) {
193         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
194         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
195         if (!s->opt || !s->path)
196             return AVERROR(ENOMEM);
197     }
198 
199     ff_thread_once(&init_static_once, nellymoser_init_static);
200 
201     return 0;
202 }
203 
204 #define find_best(val, table, LUT, LUT_add, LUT_size) \
205     best_idx = \
206         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
207     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
208         best_idx++;
209 
get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)210 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211 {
212     int band, best_idx, power_idx = 0;
213     float power_candidate;
214 
215     //base exponent
216     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
217     idx_table[0] = best_idx;
218     power_idx = ff_nelly_init_table[best_idx];
219 
220     for (band = 1; band < NELLY_BANDS; band++) {
221         power_candidate = cand[band] - power_idx;
222         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
223         idx_table[band] = best_idx;
224         power_idx += ff_nelly_delta_table[best_idx];
225     }
226 }
227 
distance(float x, float y, int band)228 static inline float distance(float x, float y, int band)
229 {
230     //return pow(fabs(x-y), 2.0);
231     float tmp = x - y;
232     return tmp * tmp;
233 }
234 
get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)235 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
236 {
237     int i, j, band, best_idx;
238     float power_candidate, best_val;
239 
240     float  (*opt )[OPT_SIZE] = s->opt ;
241     uint8_t(*path)[OPT_SIZE] = s->path;
242 
243     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
244         opt[0][i] = INFINITY;
245     }
246 
247     for (i = 0; i < 64; i++) {
248         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
249         path[0][ff_nelly_init_table[i]] = i;
250     }
251 
252     for (band = 1; band < NELLY_BANDS; band++) {
253         int q, c = 0;
254         float tmp;
255         int idx_min, idx_max, idx;
256         power_candidate = cand[band];
257         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
258             idx_min = FFMAX(0, cand[band] - q);
259             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
260             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
261                 if ( isinf(opt[band - 1][i]) )
262                     continue;
263                 for (j = 0; j < 32; j++) {
264                     idx = i + ff_nelly_delta_table[j];
265                     if (idx > idx_max)
266                         break;
267                     if (idx >= idx_min) {
268                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
269                         if (opt[band][idx] > tmp) {
270                             opt[band][idx] = tmp;
271                             path[band][idx] = j;
272                             c = 1;
273                         }
274                     }
275                 }
276             }
277         }
278         av_assert1(c); //FIXME
279     }
280 
281     best_val = INFINITY;
282     best_idx = -1;
283     band = NELLY_BANDS - 1;
284     for (i = 0; i < OPT_SIZE; i++) {
285         if (best_val > opt[band][i]) {
286             best_val = opt[band][i];
287             best_idx = i;
288         }
289     }
290     for (band = NELLY_BANDS - 1; band >= 0; band--) {
291         idx_table[band] = path[band][best_idx];
292         if (band) {
293             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
294         }
295     }
296 }
297 
298 /**
299  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
300  *  @param s               encoder context
301  *  @param output          output buffer
302  *  @param output_size     size of output buffer
303  */
encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)304 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
305 {
306     PutBitContext pb;
307     int i, j, band, block, best_idx, power_idx = 0;
308     float power_val, coeff, coeff_sum;
309     float pows[NELLY_FILL_LEN];
310     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
311     float cand[NELLY_BANDS];
312 
313     apply_mdct(s);
314 
315     init_put_bits(&pb, output, output_size);
316 
317     i = 0;
318     for (band = 0; band < NELLY_BANDS; band++) {
319         coeff_sum = 0;
320         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
322                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
323         }
324         cand[band] =
325             log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
326     }
327 
328     if (s->avctx->trellis) {
329         get_exponent_dynamic(s, cand, idx_table);
330     } else {
331         get_exponent_greedy(s, cand, idx_table);
332     }
333 
334     i = 0;
335     for (band = 0; band < NELLY_BANDS; band++) {
336         if (band) {
337             power_idx += ff_nelly_delta_table[idx_table[band]];
338             put_bits(&pb, 5, idx_table[band]);
339         } else {
340             power_idx = ff_nelly_init_table[idx_table[0]];
341             put_bits(&pb, 6, idx_table[0]);
342         }
343         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
344         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
345             s->mdct_out[i] *= power_val;
346             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
347             pows[i] = power_idx;
348         }
349     }
350 
351     ff_nelly_get_sample_bits(pows, bits);
352 
353     for (block = 0; block < 2; block++) {
354         for (i = 0; i < NELLY_FILL_LEN; i++) {
355             if (bits[i] > 0) {
356                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
357                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
358                 best_idx =
359                     quant_lut[av_clip (
360                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
361                             quant_lut_offset[bits[i]],
362                             quant_lut_offset[bits[i]+1] - 1
363                             )];
364                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
365                     best_idx++;
366 
367                 put_bits(&pb, bits[i], best_idx);
368             }
369         }
370         if (!block)
371             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
372     }
373 
374     flush_put_bits(&pb);
375     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
376 }
377 
encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)378 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
379                         const AVFrame *frame, int *got_packet_ptr)
380 {
381     NellyMoserEncodeContext *s = avctx->priv_data;
382     int ret;
383 
384     if (s->last_frame)
385         return 0;
386 
387     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
388     if (frame) {
389         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
390                frame->nb_samples * sizeof(*s->buf));
391         if (frame->nb_samples < NELLY_SAMPLES) {
392             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
393                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
394             if (frame->nb_samples >= NELLY_BUF_LEN)
395                 s->last_frame = 1;
396         }
397         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
398             return ret;
399     } else {
400         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
401         s->last_frame = 1;
402     }
403 
404     if ((ret = ff_get_encode_buffer(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
405         return ret;
406     encode_block(s, avpkt->data, avpkt->size);
407 
408     /* Get the next frame pts/duration */
409     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
410                        &avpkt->duration);
411 
412     *got_packet_ptr = 1;
413     return 0;
414 }
415 
416 const FFCodec ff_nellymoser_encoder = {
417     .p.name         = "nellymoser",
418     .p.long_name    = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
419     .p.type         = AVMEDIA_TYPE_AUDIO,
420     .p.id           = AV_CODEC_ID_NELLYMOSER,
421     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
422                       AV_CODEC_CAP_SMALL_LAST_FRAME,
423     .priv_data_size = sizeof(NellyMoserEncodeContext),
424     .init           = encode_init,
425     FF_CODEC_ENCODE_CB(encode_frame),
426     .close          = encode_end,
427     .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
428                                                      AV_SAMPLE_FMT_NONE },
429     .p.ch_layouts   = (const AVChannelLayout[]){ AV_CHANNEL_LAYOUT_MONO, { 0 } },
430     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
431 };
432