xref: /third_party/ffmpeg/libavcodec/wmaenc.c (revision cabdff1a)
1/*
2 * WMA compatible encoder
3 * Copyright (c) 2007 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "config_components.h"
23
24#include "libavutil/attributes.h"
25#include "libavutil/ffmath.h"
26
27#include "avcodec.h"
28#include "codec_internal.h"
29#include "encode.h"
30#include "internal.h"
31#include "wma.h"
32#include "libavutil/avassert.h"
33
34
35static av_cold int encode_init(AVCodecContext *avctx)
36{
37    WMACodecContext *s = avctx->priv_data;
38    int i, flags1, flags2, block_align;
39    uint8_t *extradata;
40    int ret;
41
42    s->avctx = avctx;
43
44    if (avctx->ch_layout.nb_channels > MAX_CHANNELS) {
45        av_log(avctx, AV_LOG_ERROR,
46               "too many channels: got %i, need %i or fewer\n",
47               avctx->ch_layout.nb_channels, MAX_CHANNELS);
48        return AVERROR(EINVAL);
49    }
50
51    if (avctx->sample_rate > 48000) {
52        av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
53               avctx->sample_rate);
54        return AVERROR(EINVAL);
55    }
56
57    if (avctx->bit_rate < 24 * 1000) {
58        av_log(avctx, AV_LOG_ERROR,
59               "bitrate too low: got %"PRId64", need 24000 or higher\n",
60               avctx->bit_rate);
61        return AVERROR(EINVAL);
62    }
63
64    /* extract flag info */
65    flags1 = 0;
66    flags2 = 1;
67    if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
68        extradata             = av_malloc(4);
69        if (!extradata)
70            return AVERROR(ENOMEM);
71        avctx->extradata_size = 4;
72        AV_WL16(extradata, flags1);
73        AV_WL16(extradata + 2, flags2);
74    } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
75        extradata             = av_mallocz(10);
76        if (!extradata)
77            return AVERROR(ENOMEM);
78        avctx->extradata_size = 10;
79        AV_WL32(extradata, flags1);
80        AV_WL16(extradata + 4, flags2);
81    } else {
82        av_assert0(0);
83    }
84    avctx->extradata          = extradata;
85    s->use_exp_vlc            = flags2 & 0x0001;
86    s->use_bit_reservoir      = flags2 & 0x0002;
87    s->use_variable_block_len = flags2 & 0x0004;
88    if (avctx->ch_layout.nb_channels == 2)
89        s->ms_stereo = 1;
90
91    if ((ret = ff_wma_init(avctx, flags2)) < 0)
92        return ret;
93
94    /* init MDCT */
95    for (i = 0; i < s->nb_block_sizes; i++) {
96        ret = ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
97        if (ret < 0)
98            return ret;
99    }
100
101    block_align        = avctx->bit_rate * (int64_t) s->frame_len /
102                         (avctx->sample_rate * 8);
103    block_align        = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
104    avctx->block_align = block_align;
105    avctx->frame_size = avctx->initial_padding = s->frame_len;
106
107    return 0;
108}
109
110static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
111{
112    WMACodecContext *s = avctx->priv_data;
113    float **audio      = (float **) frame->extended_data;
114    int len            = frame->nb_samples;
115    int window_index   = s->frame_len_bits - s->block_len_bits;
116    FFTContext *mdct   = &s->mdct_ctx[window_index];
117    int ch;
118    const float *win   = s->windows[window_index];
119    int window_len     = 1 << s->block_len_bits;
120    float n            = 2.0 * 32768.0 / window_len;
121
122    for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
123        memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
124        s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
125        s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
126                                    win, len);
127        s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
128        mdct->mdct_calc(mdct, s->coefs[ch], s->output);
129        if (!isfinite(s->coefs[ch][0])) {
130            av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
131            return AVERROR(EINVAL);
132        }
133    }
134
135    return 0;
136}
137
138// FIXME use for decoding too
139static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
140{
141    int n;
142    const uint16_t *ptr;
143    float v, *q, max_scale, *q_end;
144
145    ptr       = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
146    q         = s->exponents[ch];
147    q_end     = q + s->block_len;
148    max_scale = 0;
149    while (q < q_end) {
150        /* XXX: use a table */
151        v         = ff_exp10(*exp_param++ *(1.0 / 16.0));
152        max_scale = FFMAX(max_scale, v);
153        n         = *ptr++;
154        do {
155            *q++ = v;
156        } while (--n);
157    }
158    s->max_exponent[ch] = max_scale;
159}
160
161static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
162{
163    int last_exp;
164    const uint16_t *ptr;
165    float *q, *q_end;
166
167    ptr   = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
168    q     = s->exponents[ch];
169    q_end = q + s->block_len;
170    if (s->version == 1) {
171        last_exp = *exp_param++;
172        av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
173        put_bits(&s->pb, 5, last_exp - 10);
174        q += *ptr++;
175    } else
176        last_exp = 36;
177    while (q < q_end) {
178        int exp  = *exp_param++;
179        int code = exp - last_exp + 60;
180        av_assert1(code >= 0 && code < 120);
181        put_bits(&s->pb, ff_aac_scalefactor_bits[code],
182                 ff_aac_scalefactor_code[code]);
183        /* XXX: use a table */
184        q       += *ptr++;
185        last_exp = exp;
186    }
187}
188
189static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
190                        int total_gain)
191{
192    int channels = s->avctx->ch_layout.nb_channels;
193    int v, bsize, ch, coef_nb_bits, parse_exponents;
194    float mdct_norm;
195    int nb_coefs[MAX_CHANNELS];
196    static const int fixed_exp[25] = {
197        20, 20, 20, 20, 20,
198        20, 20, 20, 20, 20,
199        20, 20, 20, 20, 20,
200        20, 20, 20, 20, 20,
201        20, 20, 20, 20, 20
202    };
203
204    // FIXME remove duplication relative to decoder
205    if (s->use_variable_block_len) {
206        av_assert0(0); // FIXME not implemented
207    } else {
208        /* fixed block len */
209        s->next_block_len_bits = s->frame_len_bits;
210        s->prev_block_len_bits = s->frame_len_bits;
211        s->block_len_bits      = s->frame_len_bits;
212    }
213
214    s->block_len = 1 << s->block_len_bits;
215//     av_assert0((s->block_pos + s->block_len) <= s->frame_len);
216    bsize = s->frame_len_bits - s->block_len_bits;
217
218    // FIXME factor
219    v = s->coefs_end[bsize] - s->coefs_start;
220    for (ch = 0; ch < channels; ch++)
221        nb_coefs[ch] = v;
222    {
223        int n4 = s->block_len / 2;
224        mdct_norm = 1.0 / (float) n4;
225        if (s->version == 1)
226            mdct_norm *= sqrt(n4);
227    }
228
229    if (channels == 2)
230        put_bits(&s->pb, 1, !!s->ms_stereo);
231
232    for (ch = 0; ch < channels; ch++) {
233        // FIXME only set channel_coded when needed, instead of always
234        s->channel_coded[ch] = 1;
235        if (s->channel_coded[ch])
236            init_exp(s, ch, fixed_exp);
237    }
238
239    for (ch = 0; ch < channels; ch++) {
240        if (s->channel_coded[ch]) {
241            WMACoef *coefs1;
242            float *coefs, *exponents, mult;
243            int i, n;
244
245            coefs1    = s->coefs1[ch];
246            exponents = s->exponents[ch];
247            mult      = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
248            mult     *= mdct_norm;
249            coefs     = src_coefs[ch];
250            if (s->use_noise_coding && 0) {
251                av_assert0(0); // FIXME not implemented
252            } else {
253                coefs += s->coefs_start;
254                n      = nb_coefs[ch];
255                for (i = 0; i < n; i++) {
256                    double t = *coefs++ / (exponents[i] * mult);
257                    if (t < -32768 || t > 32767)
258                        return -1;
259
260                    coefs1[i] = lrint(t);
261                }
262            }
263        }
264    }
265
266    v = 0;
267    for (ch = 0; ch < channels; ch++) {
268        int a = s->channel_coded[ch];
269        put_bits(&s->pb, 1, a);
270        v |= a;
271    }
272
273    if (!v)
274        return 1;
275
276    for (v = total_gain - 1; v >= 127; v -= 127)
277        put_bits(&s->pb, 7, 127);
278    put_bits(&s->pb, 7, v);
279
280    coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
281
282    if (s->use_noise_coding) {
283        for (ch = 0; ch < channels; ch++) {
284            if (s->channel_coded[ch]) {
285                int i, n;
286                n = s->exponent_high_sizes[bsize];
287                for (i = 0; i < n; i++) {
288                    put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
289                    if (0)
290                        nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
291                }
292            }
293        }
294    }
295
296    parse_exponents = 1;
297    if (s->block_len_bits != s->frame_len_bits)
298        put_bits(&s->pb, 1, parse_exponents);
299
300    if (parse_exponents) {
301        for (ch = 0; ch < channels; ch++) {
302            if (s->channel_coded[ch]) {
303                if (s->use_exp_vlc) {
304                    encode_exp_vlc(s, ch, fixed_exp);
305                } else {
306                    av_assert0(0); // FIXME not implemented
307//                    encode_exp_lsp(s, ch);
308                }
309            }
310        }
311    } else
312        av_assert0(0); // FIXME not implemented
313
314    for (ch = 0; ch < channels; ch++) {
315        if (s->channel_coded[ch]) {
316            int run, tindex;
317            WMACoef *ptr, *eptr;
318            tindex = (ch == 1 && s->ms_stereo);
319            ptr    = &s->coefs1[ch][0];
320            eptr   = ptr + nb_coefs[ch];
321
322            run = 0;
323            for (; ptr < eptr; ptr++) {
324                if (*ptr) {
325                    int level     = *ptr;
326                    int abs_level = FFABS(level);
327                    int code      = 0;
328                    if (abs_level <= s->coef_vlcs[tindex]->max_level)
329                        if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
330                            code = run + s->int_table[tindex][abs_level - 1];
331
332                    av_assert2(code < s->coef_vlcs[tindex]->n);
333                    put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
334                             s->coef_vlcs[tindex]->huffcodes[code]);
335
336                    if (code == 0) {
337                        if (1 << coef_nb_bits <= abs_level)
338                            return -1;
339
340                        put_bits(&s->pb, coef_nb_bits, abs_level);
341                        put_bits(&s->pb, s->frame_len_bits, run);
342                    }
343                    // FIXME the sign is flipped somewhere
344                    put_bits(&s->pb, 1, level < 0);
345                    run = 0;
346                } else
347                    run++;
348            }
349            if (run)
350                put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
351                         s->coef_vlcs[tindex]->huffcodes[1]);
352        }
353        if (s->version == 1 && channels >= 2)
354            align_put_bits(&s->pb);
355    }
356    return 0;
357}
358
359static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
360                        uint8_t *buf, int buf_size, int total_gain)
361{
362    init_put_bits(&s->pb, buf, buf_size);
363
364    if (s->use_bit_reservoir)
365        av_assert0(0); // FIXME not implemented
366    else if (encode_block(s, src_coefs, total_gain) < 0)
367        return INT_MAX;
368
369    align_put_bits(&s->pb);
370
371    return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
372}
373
374static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
375                             const AVFrame *frame, int *got_packet_ptr)
376{
377    WMACodecContext *s = avctx->priv_data;
378    int i, total_gain, ret, error;
379
380    s->block_len_bits = s->frame_len_bits; // required by non variable block len
381    s->block_len      = 1 << s->block_len_bits;
382
383    ret = apply_window_and_mdct(avctx, frame);
384
385    if (ret < 0)
386        return ret;
387
388    if (s->ms_stereo) {
389        float a, b;
390        int i;
391
392        for (i = 0; i < s->block_len; i++) {
393            a              = s->coefs[0][i] * 0.5;
394            b              = s->coefs[1][i] * 0.5;
395            s->coefs[0][i] = a + b;
396            s->coefs[1][i] = a - b;
397        }
398    }
399
400    if ((ret = ff_alloc_packet(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
401        return ret;
402
403    total_gain = 128;
404    for (i = 64; i; i >>= 1) {
405        error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
406                                 total_gain - i);
407        if (error <= 0)
408            total_gain -= i;
409    }
410
411    while(total_gain <= 128 && error > 0)
412        error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
413    if (error > 0) {
414        av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
415        avpkt->size = 0;
416        return AVERROR(EINVAL);
417    }
418    av_assert0((put_bits_count(&s->pb) & 7) == 0);
419    i = avctx->block_align - put_bytes_count(&s->pb, 0);
420    av_assert0(i>=0);
421    while(i--)
422        put_bits(&s->pb, 8, 'N');
423
424    flush_put_bits(&s->pb);
425    av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
426
427    if (frame->pts != AV_NOPTS_VALUE)
428        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
429
430    avpkt->size     = avctx->block_align;
431    *got_packet_ptr = 1;
432    return 0;
433}
434
435#if CONFIG_WMAV1_ENCODER
436const FFCodec ff_wmav1_encoder = {
437    .p.name         = "wmav1",
438    .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
439    .p.type         = AVMEDIA_TYPE_AUDIO,
440    .p.id           = AV_CODEC_ID_WMAV1,
441    .priv_data_size = sizeof(WMACodecContext),
442    .init           = encode_init,
443    FF_CODEC_ENCODE_CB(encode_superframe),
444    .close          = ff_wma_end,
445    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
446                                                      AV_SAMPLE_FMT_NONE },
447    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
448};
449#endif
450#if CONFIG_WMAV2_ENCODER
451const FFCodec ff_wmav2_encoder = {
452    .p.name         = "wmav2",
453    .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
454    .p.type         = AVMEDIA_TYPE_AUDIO,
455    .p.id           = AV_CODEC_ID_WMAV2,
456    .priv_data_size = sizeof(WMACodecContext),
457    .init           = encode_init,
458    FF_CODEC_ENCODE_CB(encode_superframe),
459    .close          = ff_wma_end,
460    .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
461                                                      AV_SAMPLE_FMT_NONE },
462    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
463};
464#endif
465