1/* 2 * WMA compatible encoder 3 * Copyright (c) 2007 Michael Niedermayer 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "config_components.h" 23 24#include "libavutil/attributes.h" 25#include "libavutil/ffmath.h" 26 27#include "avcodec.h" 28#include "codec_internal.h" 29#include "encode.h" 30#include "internal.h" 31#include "wma.h" 32#include "libavutil/avassert.h" 33 34 35static av_cold int encode_init(AVCodecContext *avctx) 36{ 37 WMACodecContext *s = avctx->priv_data; 38 int i, flags1, flags2, block_align; 39 uint8_t *extradata; 40 int ret; 41 42 s->avctx = avctx; 43 44 if (avctx->ch_layout.nb_channels > MAX_CHANNELS) { 45 av_log(avctx, AV_LOG_ERROR, 46 "too many channels: got %i, need %i or fewer\n", 47 avctx->ch_layout.nb_channels, MAX_CHANNELS); 48 return AVERROR(EINVAL); 49 } 50 51 if (avctx->sample_rate > 48000) { 52 av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n", 53 avctx->sample_rate); 54 return AVERROR(EINVAL); 55 } 56 57 if (avctx->bit_rate < 24 * 1000) { 58 av_log(avctx, AV_LOG_ERROR, 59 "bitrate too low: got %"PRId64", need 24000 or higher\n", 60 avctx->bit_rate); 61 return AVERROR(EINVAL); 62 } 63 64 /* extract flag info */ 65 flags1 = 0; 66 flags2 = 1; 67 if (avctx->codec->id == AV_CODEC_ID_WMAV1) { 68 extradata = av_malloc(4); 69 if (!extradata) 70 return AVERROR(ENOMEM); 71 avctx->extradata_size = 4; 72 AV_WL16(extradata, flags1); 73 AV_WL16(extradata + 2, flags2); 74 } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { 75 extradata = av_mallocz(10); 76 if (!extradata) 77 return AVERROR(ENOMEM); 78 avctx->extradata_size = 10; 79 AV_WL32(extradata, flags1); 80 AV_WL16(extradata + 4, flags2); 81 } else { 82 av_assert0(0); 83 } 84 avctx->extradata = extradata; 85 s->use_exp_vlc = flags2 & 0x0001; 86 s->use_bit_reservoir = flags2 & 0x0002; 87 s->use_variable_block_len = flags2 & 0x0004; 88 if (avctx->ch_layout.nb_channels == 2) 89 s->ms_stereo = 1; 90 91 if ((ret = ff_wma_init(avctx, flags2)) < 0) 92 return ret; 93 94 /* init MDCT */ 95 for (i = 0; i < s->nb_block_sizes; i++) { 96 ret = ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); 97 if (ret < 0) 98 return ret; 99 } 100 101 block_align = avctx->bit_rate * (int64_t) s->frame_len / 102 (avctx->sample_rate * 8); 103 block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE); 104 avctx->block_align = block_align; 105 avctx->frame_size = avctx->initial_padding = s->frame_len; 106 107 return 0; 108} 109 110static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame) 111{ 112 WMACodecContext *s = avctx->priv_data; 113 float **audio = (float **) frame->extended_data; 114 int len = frame->nb_samples; 115 int window_index = s->frame_len_bits - s->block_len_bits; 116 FFTContext *mdct = &s->mdct_ctx[window_index]; 117 int ch; 118 const float *win = s->windows[window_index]; 119 int window_len = 1 << s->block_len_bits; 120 float n = 2.0 * 32768.0 / window_len; 121 122 for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { 123 memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); 124 s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); 125 s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], 126 win, len); 127 s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); 128 mdct->mdct_calc(mdct, s->coefs[ch], s->output); 129 if (!isfinite(s->coefs[ch][0])) { 130 av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n"); 131 return AVERROR(EINVAL); 132 } 133 } 134 135 return 0; 136} 137 138// FIXME use for decoding too 139static void init_exp(WMACodecContext *s, int ch, const int *exp_param) 140{ 141 int n; 142 const uint16_t *ptr; 143 float v, *q, max_scale, *q_end; 144 145 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; 146 q = s->exponents[ch]; 147 q_end = q + s->block_len; 148 max_scale = 0; 149 while (q < q_end) { 150 /* XXX: use a table */ 151 v = ff_exp10(*exp_param++ *(1.0 / 16.0)); 152 max_scale = FFMAX(max_scale, v); 153 n = *ptr++; 154 do { 155 *q++ = v; 156 } while (--n); 157 } 158 s->max_exponent[ch] = max_scale; 159} 160 161static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param) 162{ 163 int last_exp; 164 const uint16_t *ptr; 165 float *q, *q_end; 166 167 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; 168 q = s->exponents[ch]; 169 q_end = q + s->block_len; 170 if (s->version == 1) { 171 last_exp = *exp_param++; 172 av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32); 173 put_bits(&s->pb, 5, last_exp - 10); 174 q += *ptr++; 175 } else 176 last_exp = 36; 177 while (q < q_end) { 178 int exp = *exp_param++; 179 int code = exp - last_exp + 60; 180 av_assert1(code >= 0 && code < 120); 181 put_bits(&s->pb, ff_aac_scalefactor_bits[code], 182 ff_aac_scalefactor_code[code]); 183 /* XXX: use a table */ 184 q += *ptr++; 185 last_exp = exp; 186 } 187} 188 189static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], 190 int total_gain) 191{ 192 int channels = s->avctx->ch_layout.nb_channels; 193 int v, bsize, ch, coef_nb_bits, parse_exponents; 194 float mdct_norm; 195 int nb_coefs[MAX_CHANNELS]; 196 static const int fixed_exp[25] = { 197 20, 20, 20, 20, 20, 198 20, 20, 20, 20, 20, 199 20, 20, 20, 20, 20, 200 20, 20, 20, 20, 20, 201 20, 20, 20, 20, 20 202 }; 203 204 // FIXME remove duplication relative to decoder 205 if (s->use_variable_block_len) { 206 av_assert0(0); // FIXME not implemented 207 } else { 208 /* fixed block len */ 209 s->next_block_len_bits = s->frame_len_bits; 210 s->prev_block_len_bits = s->frame_len_bits; 211 s->block_len_bits = s->frame_len_bits; 212 } 213 214 s->block_len = 1 << s->block_len_bits; 215// av_assert0((s->block_pos + s->block_len) <= s->frame_len); 216 bsize = s->frame_len_bits - s->block_len_bits; 217 218 // FIXME factor 219 v = s->coefs_end[bsize] - s->coefs_start; 220 for (ch = 0; ch < channels; ch++) 221 nb_coefs[ch] = v; 222 { 223 int n4 = s->block_len / 2; 224 mdct_norm = 1.0 / (float) n4; 225 if (s->version == 1) 226 mdct_norm *= sqrt(n4); 227 } 228 229 if (channels == 2) 230 put_bits(&s->pb, 1, !!s->ms_stereo); 231 232 for (ch = 0; ch < channels; ch++) { 233 // FIXME only set channel_coded when needed, instead of always 234 s->channel_coded[ch] = 1; 235 if (s->channel_coded[ch]) 236 init_exp(s, ch, fixed_exp); 237 } 238 239 for (ch = 0; ch < channels; ch++) { 240 if (s->channel_coded[ch]) { 241 WMACoef *coefs1; 242 float *coefs, *exponents, mult; 243 int i, n; 244 245 coefs1 = s->coefs1[ch]; 246 exponents = s->exponents[ch]; 247 mult = ff_exp10(total_gain * 0.05) / s->max_exponent[ch]; 248 mult *= mdct_norm; 249 coefs = src_coefs[ch]; 250 if (s->use_noise_coding && 0) { 251 av_assert0(0); // FIXME not implemented 252 } else { 253 coefs += s->coefs_start; 254 n = nb_coefs[ch]; 255 for (i = 0; i < n; i++) { 256 double t = *coefs++ / (exponents[i] * mult); 257 if (t < -32768 || t > 32767) 258 return -1; 259 260 coefs1[i] = lrint(t); 261 } 262 } 263 } 264 } 265 266 v = 0; 267 for (ch = 0; ch < channels; ch++) { 268 int a = s->channel_coded[ch]; 269 put_bits(&s->pb, 1, a); 270 v |= a; 271 } 272 273 if (!v) 274 return 1; 275 276 for (v = total_gain - 1; v >= 127; v -= 127) 277 put_bits(&s->pb, 7, 127); 278 put_bits(&s->pb, 7, v); 279 280 coef_nb_bits = ff_wma_total_gain_to_bits(total_gain); 281 282 if (s->use_noise_coding) { 283 for (ch = 0; ch < channels; ch++) { 284 if (s->channel_coded[ch]) { 285 int i, n; 286 n = s->exponent_high_sizes[bsize]; 287 for (i = 0; i < n; i++) { 288 put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0); 289 if (0) 290 nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; 291 } 292 } 293 } 294 } 295 296 parse_exponents = 1; 297 if (s->block_len_bits != s->frame_len_bits) 298 put_bits(&s->pb, 1, parse_exponents); 299 300 if (parse_exponents) { 301 for (ch = 0; ch < channels; ch++) { 302 if (s->channel_coded[ch]) { 303 if (s->use_exp_vlc) { 304 encode_exp_vlc(s, ch, fixed_exp); 305 } else { 306 av_assert0(0); // FIXME not implemented 307// encode_exp_lsp(s, ch); 308 } 309 } 310 } 311 } else 312 av_assert0(0); // FIXME not implemented 313 314 for (ch = 0; ch < channels; ch++) { 315 if (s->channel_coded[ch]) { 316 int run, tindex; 317 WMACoef *ptr, *eptr; 318 tindex = (ch == 1 && s->ms_stereo); 319 ptr = &s->coefs1[ch][0]; 320 eptr = ptr + nb_coefs[ch]; 321 322 run = 0; 323 for (; ptr < eptr; ptr++) { 324 if (*ptr) { 325 int level = *ptr; 326 int abs_level = FFABS(level); 327 int code = 0; 328 if (abs_level <= s->coef_vlcs[tindex]->max_level) 329 if (run < s->coef_vlcs[tindex]->levels[abs_level - 1]) 330 code = run + s->int_table[tindex][abs_level - 1]; 331 332 av_assert2(code < s->coef_vlcs[tindex]->n); 333 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], 334 s->coef_vlcs[tindex]->huffcodes[code]); 335 336 if (code == 0) { 337 if (1 << coef_nb_bits <= abs_level) 338 return -1; 339 340 put_bits(&s->pb, coef_nb_bits, abs_level); 341 put_bits(&s->pb, s->frame_len_bits, run); 342 } 343 // FIXME the sign is flipped somewhere 344 put_bits(&s->pb, 1, level < 0); 345 run = 0; 346 } else 347 run++; 348 } 349 if (run) 350 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], 351 s->coef_vlcs[tindex]->huffcodes[1]); 352 } 353 if (s->version == 1 && channels >= 2) 354 align_put_bits(&s->pb); 355 } 356 return 0; 357} 358 359static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], 360 uint8_t *buf, int buf_size, int total_gain) 361{ 362 init_put_bits(&s->pb, buf, buf_size); 363 364 if (s->use_bit_reservoir) 365 av_assert0(0); // FIXME not implemented 366 else if (encode_block(s, src_coefs, total_gain) < 0) 367 return INT_MAX; 368 369 align_put_bits(&s->pb); 370 371 return put_bits_count(&s->pb) / 8 - s->avctx->block_align; 372} 373 374static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt, 375 const AVFrame *frame, int *got_packet_ptr) 376{ 377 WMACodecContext *s = avctx->priv_data; 378 int i, total_gain, ret, error; 379 380 s->block_len_bits = s->frame_len_bits; // required by non variable block len 381 s->block_len = 1 << s->block_len_bits; 382 383 ret = apply_window_and_mdct(avctx, frame); 384 385 if (ret < 0) 386 return ret; 387 388 if (s->ms_stereo) { 389 float a, b; 390 int i; 391 392 for (i = 0; i < s->block_len; i++) { 393 a = s->coefs[0][i] * 0.5; 394 b = s->coefs[1][i] * 0.5; 395 s->coefs[0][i] = a + b; 396 s->coefs[1][i] = a - b; 397 } 398 } 399 400 if ((ret = ff_alloc_packet(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0) 401 return ret; 402 403 total_gain = 128; 404 for (i = 64; i; i >>= 1) { 405 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, 406 total_gain - i); 407 if (error <= 0) 408 total_gain -= i; 409 } 410 411 while(total_gain <= 128 && error > 0) 412 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++); 413 if (error > 0) { 414 av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n"); 415 avpkt->size = 0; 416 return AVERROR(EINVAL); 417 } 418 av_assert0((put_bits_count(&s->pb) & 7) == 0); 419 i = avctx->block_align - put_bytes_count(&s->pb, 0); 420 av_assert0(i>=0); 421 while(i--) 422 put_bits(&s->pb, 8, 'N'); 423 424 flush_put_bits(&s->pb); 425 av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align); 426 427 if (frame->pts != AV_NOPTS_VALUE) 428 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); 429 430 avpkt->size = avctx->block_align; 431 *got_packet_ptr = 1; 432 return 0; 433} 434 435#if CONFIG_WMAV1_ENCODER 436const FFCodec ff_wmav1_encoder = { 437 .p.name = "wmav1", 438 .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), 439 .p.type = AVMEDIA_TYPE_AUDIO, 440 .p.id = AV_CODEC_ID_WMAV1, 441 .priv_data_size = sizeof(WMACodecContext), 442 .init = encode_init, 443 FF_CODEC_ENCODE_CB(encode_superframe), 444 .close = ff_wma_end, 445 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, 446 AV_SAMPLE_FMT_NONE }, 447 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 448}; 449#endif 450#if CONFIG_WMAV2_ENCODER 451const FFCodec ff_wmav2_encoder = { 452 .p.name = "wmav2", 453 .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), 454 .p.type = AVMEDIA_TYPE_AUDIO, 455 .p.id = AV_CODEC_ID_WMAV2, 456 .priv_data_size = sizeof(WMACodecContext), 457 .init = encode_init, 458 FF_CODEC_ENCODE_CB(encode_superframe), 459 .close = ff_wma_end, 460 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, 461 AV_SAMPLE_FMT_NONE }, 462 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 463}; 464#endif 465