1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 #include "libavutil/thread.h"
42
43 #include "audio_frame_queue.h"
44 #include "avcodec.h"
45 #include "codec_internal.h"
46 #include "encode.h"
47 #include "fft.h"
48 #include "nellymoser.h"
49 #include "sinewin.h"
50
51 #define BITSTREAM_WRITER_LE
52 #include "put_bits.h"
53
54 #define POW_TABLE_SIZE (1<<11)
55 #define POW_TABLE_OFFSET 3
56 #define OPT_SIZE ((1<<15) + 3000)
57
58 typedef struct NellyMoserEncodeContext {
59 AVCodecContext *avctx;
60 int last_frame;
61 AVFloatDSPContext *fdsp;
62 FFTContext mdct_ctx;
63 AudioFrameQueue afq;
64 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
65 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
66 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
67 float (*opt )[OPT_SIZE];
68 uint8_t (*path)[OPT_SIZE];
69 } NellyMoserEncodeContext;
70
71 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
72
73 static const uint8_t sf_lut[96] = {
74 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
75 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
76 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
77 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
78 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
79 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 };
81
82 static const uint8_t sf_delta_lut[78] = {
83 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
84 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
85 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
86 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
87 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 };
89
90 static const uint8_t quant_lut[230] = {
91 0,
92
93 0, 1, 2,
94
95 0, 1, 2, 3, 4, 5, 6,
96
97 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
98 12, 13, 13, 13, 14,
99
100 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
101 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
102 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103 30,
104
105 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
106 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
107 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
108 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
109 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
110 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
111 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
112 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
113 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114 61, 61, 61, 61, 62,
115 };
116
117 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
118 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
119 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120
apply_mdct(NellyMoserEncodeContext *s)121 static void apply_mdct(NellyMoserEncodeContext *s)
122 {
123 float *in0 = s->buf;
124 float *in1 = s->buf + NELLY_BUF_LEN;
125 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126
127 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
128 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
129 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130
131 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
132 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
133 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 }
135
encode_end(AVCodecContext *avctx)136 static av_cold int encode_end(AVCodecContext *avctx)
137 {
138 NellyMoserEncodeContext *s = avctx->priv_data;
139
140 ff_mdct_end(&s->mdct_ctx);
141
142 av_freep(&s->opt);
143 av_freep(&s->path);
144 ff_af_queue_close(&s->afq);
145 av_freep(&s->fdsp);
146
147 return 0;
148 }
149
nellymoser_init_static(void)150 static av_cold void nellymoser_init_static(void)
151 {
152 /* faster way of doing
153 for (int i = 0; i < POW_TABLE_SIZE; i++)
154 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
155 pow_table[0] = 1;
156 pow_table[1024] = M_SQRT1_2;
157 for (int i = 1; i < 513; i++) {
158 double tmp = exp2(-i / 2048.0);
159 pow_table[i] = tmp;
160 pow_table[1024-i] = M_SQRT1_2 / tmp;
161 pow_table[1024+i] = tmp * M_SQRT1_2;
162 pow_table[2048-i] = 0.5 / tmp;
163 }
164 /* Generate overlap window */
165 ff_init_ff_sine_windows(7);
166 }
167
encode_init(AVCodecContext *avctx)168 static av_cold int encode_init(AVCodecContext *avctx)
169 {
170 static AVOnce init_static_once = AV_ONCE_INIT;
171 NellyMoserEncodeContext *s = avctx->priv_data;
172 int ret;
173
174 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
175 avctx->sample_rate != 11025 &&
176 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
177 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
178 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
179 return AVERROR(EINVAL);
180 }
181
182 avctx->frame_size = NELLY_SAMPLES;
183 avctx->initial_padding = NELLY_BUF_LEN;
184 ff_af_queue_init(avctx, &s->afq);
185 s->avctx = avctx;
186 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
187 return ret;
188 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
189 if (!s->fdsp)
190 return AVERROR(ENOMEM);
191
192 if (s->avctx->trellis) {
193 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
194 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
195 if (!s->opt || !s->path)
196 return AVERROR(ENOMEM);
197 }
198
199 ff_thread_once(&init_static_once, nellymoser_init_static);
200
201 return 0;
202 }
203
204 #define find_best(val, table, LUT, LUT_add, LUT_size) \
205 best_idx = \
206 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
207 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
208 best_idx++;
209
get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)210 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211 {
212 int band, best_idx, power_idx = 0;
213 float power_candidate;
214
215 //base exponent
216 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
217 idx_table[0] = best_idx;
218 power_idx = ff_nelly_init_table[best_idx];
219
220 for (band = 1; band < NELLY_BANDS; band++) {
221 power_candidate = cand[band] - power_idx;
222 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
223 idx_table[band] = best_idx;
224 power_idx += ff_nelly_delta_table[best_idx];
225 }
226 }
227
distance(float x, float y, int band)228 static inline float distance(float x, float y, int band)
229 {
230 //return pow(fabs(x-y), 2.0);
231 float tmp = x - y;
232 return tmp * tmp;
233 }
234
get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)235 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
236 {
237 int i, j, band, best_idx;
238 float power_candidate, best_val;
239
240 float (*opt )[OPT_SIZE] = s->opt ;
241 uint8_t(*path)[OPT_SIZE] = s->path;
242
243 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
244 opt[0][i] = INFINITY;
245 }
246
247 for (i = 0; i < 64; i++) {
248 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
249 path[0][ff_nelly_init_table[i]] = i;
250 }
251
252 for (band = 1; band < NELLY_BANDS; band++) {
253 int q, c = 0;
254 float tmp;
255 int idx_min, idx_max, idx;
256 power_candidate = cand[band];
257 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
258 idx_min = FFMAX(0, cand[band] - q);
259 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
260 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
261 if ( isinf(opt[band - 1][i]) )
262 continue;
263 for (j = 0; j < 32; j++) {
264 idx = i + ff_nelly_delta_table[j];
265 if (idx > idx_max)
266 break;
267 if (idx >= idx_min) {
268 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
269 if (opt[band][idx] > tmp) {
270 opt[band][idx] = tmp;
271 path[band][idx] = j;
272 c = 1;
273 }
274 }
275 }
276 }
277 }
278 av_assert1(c); //FIXME
279 }
280
281 best_val = INFINITY;
282 best_idx = -1;
283 band = NELLY_BANDS - 1;
284 for (i = 0; i < OPT_SIZE; i++) {
285 if (best_val > opt[band][i]) {
286 best_val = opt[band][i];
287 best_idx = i;
288 }
289 }
290 for (band = NELLY_BANDS - 1; band >= 0; band--) {
291 idx_table[band] = path[band][best_idx];
292 if (band) {
293 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
294 }
295 }
296 }
297
298 /**
299 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
300 * @param s encoder context
301 * @param output output buffer
302 * @param output_size size of output buffer
303 */
encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)304 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
305 {
306 PutBitContext pb;
307 int i, j, band, block, best_idx, power_idx = 0;
308 float power_val, coeff, coeff_sum;
309 float pows[NELLY_FILL_LEN];
310 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
311 float cand[NELLY_BANDS];
312
313 apply_mdct(s);
314
315 init_put_bits(&pb, output, output_size);
316
317 i = 0;
318 for (band = 0; band < NELLY_BANDS; band++) {
319 coeff_sum = 0;
320 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
322 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
323 }
324 cand[band] =
325 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
326 }
327
328 if (s->avctx->trellis) {
329 get_exponent_dynamic(s, cand, idx_table);
330 } else {
331 get_exponent_greedy(s, cand, idx_table);
332 }
333
334 i = 0;
335 for (band = 0; band < NELLY_BANDS; band++) {
336 if (band) {
337 power_idx += ff_nelly_delta_table[idx_table[band]];
338 put_bits(&pb, 5, idx_table[band]);
339 } else {
340 power_idx = ff_nelly_init_table[idx_table[0]];
341 put_bits(&pb, 6, idx_table[0]);
342 }
343 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
344 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
345 s->mdct_out[i] *= power_val;
346 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
347 pows[i] = power_idx;
348 }
349 }
350
351 ff_nelly_get_sample_bits(pows, bits);
352
353 for (block = 0; block < 2; block++) {
354 for (i = 0; i < NELLY_FILL_LEN; i++) {
355 if (bits[i] > 0) {
356 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
357 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
358 best_idx =
359 quant_lut[av_clip (
360 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
361 quant_lut_offset[bits[i]],
362 quant_lut_offset[bits[i]+1] - 1
363 )];
364 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
365 best_idx++;
366
367 put_bits(&pb, bits[i], best_idx);
368 }
369 }
370 if (!block)
371 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
372 }
373
374 flush_put_bits(&pb);
375 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
376 }
377
encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)378 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
379 const AVFrame *frame, int *got_packet_ptr)
380 {
381 NellyMoserEncodeContext *s = avctx->priv_data;
382 int ret;
383
384 if (s->last_frame)
385 return 0;
386
387 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
388 if (frame) {
389 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
390 frame->nb_samples * sizeof(*s->buf));
391 if (frame->nb_samples < NELLY_SAMPLES) {
392 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
393 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
394 if (frame->nb_samples >= NELLY_BUF_LEN)
395 s->last_frame = 1;
396 }
397 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
398 return ret;
399 } else {
400 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
401 s->last_frame = 1;
402 }
403
404 if ((ret = ff_get_encode_buffer(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
405 return ret;
406 encode_block(s, avpkt->data, avpkt->size);
407
408 /* Get the next frame pts/duration */
409 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
410 &avpkt->duration);
411
412 *got_packet_ptr = 1;
413 return 0;
414 }
415
416 const FFCodec ff_nellymoser_encoder = {
417 .p.name = "nellymoser",
418 .p.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
419 .p.type = AVMEDIA_TYPE_AUDIO,
420 .p.id = AV_CODEC_ID_NELLYMOSER,
421 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
422 AV_CODEC_CAP_SMALL_LAST_FRAME,
423 .priv_data_size = sizeof(NellyMoserEncodeContext),
424 .init = encode_init,
425 FF_CODEC_ENCODE_CB(encode_frame),
426 .close = encode_end,
427 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
428 AV_SAMPLE_FMT_NONE },
429 .p.ch_layouts = (const AVChannelLayout[]){ AV_CHANNEL_LAYOUT_MONO, { 0 } },
430 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
431 };
432