1/* 2 * Voxware MetaSound decoder 3 * Copyright (c) 2013 Konstantin Shishkov 4 * based on TwinVQ decoder 5 * Copyright (c) 2009 Vitor Sessak 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include <inttypes.h> 25#include <math.h> 26#include <stdint.h> 27 28#include "libavutil/channel_layout.h" 29#include "libavutil/float_dsp.h" 30 31#define BITSTREAM_READER_LE 32#include "avcodec.h" 33#include "codec_internal.h" 34#include "fft.h" 35#include "get_bits.h" 36#include "lsp.h" 37#include "sinewin.h" 38 39#include "twinvq.h" 40#include "metasound_data.h" 41 42static void add_peak(float period, int width, const float *shape, 43 float ppc_gain, float *speech, int len) 44{ 45 int i, j, center; 46 const float *shape_end = shape + len; 47 48 // First peak centered around zero 49 for (i = 0; i < width / 2; i++) 50 speech[i] += ppc_gain * *shape++; 51 52 for (i = 1; i < ROUNDED_DIV(len, width); i++) { 53 center = (int)(i * period + 0.5); 54 for (j = -width / 2; j < (width + 1) / 2; j++) 55 speech[j + center] += ppc_gain * *shape++; 56 } 57 58 // For the last block, be careful not to go beyond the end of the buffer 59 center = (int)(i * period + 0.5); 60 for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++) 61 speech[j + center] += ppc_gain * *shape++; 62} 63 64static void decode_ppc(TwinVQContext *tctx, int period_coef, int g_coef, 65 const float *shape, float *speech) 66{ 67 const TwinVQModeTab *mtab = tctx->mtab; 68 int channels = tctx->avctx->ch_layout.nb_channels; 69 int isampf = tctx->avctx->sample_rate / 1000; 70 int ibps = tctx->avctx->bit_rate / (1000 * channels); 71 int width; 72 73 float ratio = (float)mtab->size / isampf; 74 float min_period, max_period, period_range, period; 75 float some_mult; 76 77 float pgain_base, pgain_step, ppc_gain; 78 79 if (channels == 1) { 80 min_period = log2(ratio * 0.2); 81 max_period = min_period + log2(6); 82 } else { 83 min_period = (int)(ratio * 0.2 * 400 + 0.5) / 400.0; 84 max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0; 85 } 86 period_range = max_period - min_period; 87 period = min_period + period_coef * period_range / 88 ((1 << mtab->ppc_period_bit) - 1); 89 if (channels == 1) 90 period = powf(2.0, period); 91 else 92 period = (int)(period * 400 + 0.5) / 400.0; 93 94 switch (isampf) { 95 case 8: some_mult = 2.0; break; 96 case 11: some_mult = 3.0; break; 97 case 16: some_mult = 3.0; break; 98 case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break; 99 case 44: some_mult = 8.0; break; 100 default: some_mult = 4.0; 101 } 102 103 width = (int)(some_mult / (mtab->size / period) * mtab->ppc_shape_len); 104 if (isampf == 22 && ibps == 32) 105 width = (int)((2.0 / period + 1) * width + 0.5); 106 107 pgain_base = channels == 2 ? 25000.0 : 20000.0; 108 pgain_step = pgain_base / ((1 << mtab->pgain_bit) - 1); 109 ppc_gain = 1.0 / 8192 * 110 twinvq_mulawinv(pgain_step * g_coef + pgain_step / 2, 111 pgain_base, TWINVQ_PGAIN_MU); 112 113 add_peak(period, width, shape, ppc_gain, speech, mtab->ppc_shape_len); 114} 115 116static void dec_bark_env(TwinVQContext *tctx, const uint8_t *in, int use_hist, 117 int ch, float *out, float gain, 118 enum TwinVQFrameType ftype) 119{ 120 const TwinVQModeTab *mtab = tctx->mtab; 121 int i, j; 122 float *hist = tctx->bark_hist[ftype][ch]; 123 float val = ((const float []) { 0.4, 0.35, 0.28 })[ftype]; 124 int bark_n_coef = mtab->fmode[ftype].bark_n_coef; 125 int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef; 126 int idx = 0; 127 int channels = tctx->avctx->ch_layout.nb_channels; 128 129 if (channels == 1) 130 val = 0.5; 131 for (i = 0; i < fw_cb_len; i++) 132 for (j = 0; j < bark_n_coef; j++, idx++) { 133 float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len * in[j] + i] * 134 (1.0 / 2048); 135 float st; 136 137 if (channels == 1) 138 st = use_hist ? 139 tmp2 + val * hist[idx] + 1.0 : tmp2 + 1.0; 140 else 141 st = use_hist ? (1.0 - val) * tmp2 + val * hist[idx] + 1.0 142 : tmp2 + 1.0; 143 144 hist[idx] = tmp2; 145 if (st < 0.1) 146 st = 0.1; 147 148 twinvq_memset_float(out, st * gain, 149 mtab->fmode[ftype].bark_tab[idx]); 150 out += mtab->fmode[ftype].bark_tab[idx]; 151 } 152} 153 154static void read_cb_data(TwinVQContext *tctx, GetBitContext *gb, 155 uint8_t *dst, enum TwinVQFrameType ftype) 156{ 157 int i; 158 159 for (i = 0; i < tctx->n_div[ftype]; i++) { 160 int bs_second_part = (i >= tctx->bits_main_spec_change[ftype]); 161 162 *dst++ = get_bits(gb, tctx->bits_main_spec[0][ftype][bs_second_part]); 163 *dst++ = get_bits(gb, tctx->bits_main_spec[1][ftype][bs_second_part]); 164 } 165} 166 167static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx, 168 const uint8_t *buf, int buf_size) 169{ 170 TwinVQFrameData *bits; 171 const TwinVQModeTab *mtab = tctx->mtab; 172 int channels = tctx->avctx->ch_layout.nb_channels; 173 int sub; 174 GetBitContext gb; 175 int i, j, k, ret; 176 177 if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0) 178 return ret; 179 180 for (tctx->cur_frame = 0; tctx->cur_frame < tctx->frames_per_packet; 181 tctx->cur_frame++) { 182 bits = tctx->bits + tctx->cur_frame; 183 184 bits->window_type = get_bits(&gb, TWINVQ_WINDOW_TYPE_BITS); 185 186 if (bits->window_type > 8) { 187 av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n"); 188 return AVERROR_INVALIDDATA; 189 } 190 191 bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits[tctx->cur_frame].window_type]; 192 193 sub = mtab->fmode[bits->ftype].sub; 194 195 if (bits->ftype != TWINVQ_FT_SHORT && !tctx->is_6kbps) 196 get_bits(&gb, 2); 197 198 read_cb_data(tctx, &gb, bits->main_coeffs, bits->ftype); 199 200 for (i = 0; i < channels; i++) 201 for (j = 0; j < sub; j++) 202 for (k = 0; k < mtab->fmode[bits->ftype].bark_n_coef; k++) 203 bits->bark1[i][j][k] = 204 get_bits(&gb, mtab->fmode[bits->ftype].bark_n_bit); 205 206 for (i = 0; i < channels; i++) 207 for (j = 0; j < sub; j++) 208 bits->bark_use_hist[i][j] = get_bits1(&gb); 209 210 if (bits->ftype == TWINVQ_FT_LONG) { 211 for (i = 0; i < channels; i++) 212 bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS); 213 } else { 214 for (i = 0; i < channels; i++) { 215 bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS); 216 for (j = 0; j < sub; j++) 217 bits->sub_gain_bits[i * sub + j] = 218 get_bits(&gb, TWINVQ_SUB_GAIN_BITS); 219 } 220 } 221 222 for (i = 0; i < channels; i++) { 223 bits->lpc_hist_idx[i] = get_bits(&gb, mtab->lsp_bit0); 224 bits->lpc_idx1[i] = get_bits(&gb, mtab->lsp_bit1); 225 226 for (j = 0; j < mtab->lsp_split; j++) 227 bits->lpc_idx2[i][j] = get_bits(&gb, mtab->lsp_bit2); 228 } 229 230 if (bits->ftype == TWINVQ_FT_LONG) { 231 read_cb_data(tctx, &gb, bits->ppc_coeffs, 3); 232 for (i = 0; i < channels; i++) { 233 bits->p_coef[i] = get_bits(&gb, mtab->ppc_period_bit); 234 bits->g_coef[i] = get_bits(&gb, mtab->pgain_bit); 235 } 236 } 237 238 // subframes are aligned to nibbles 239 if (get_bits_count(&gb) & 3) 240 skip_bits(&gb, 4 - (get_bits_count(&gb) & 3)); 241 } 242 243 return (get_bits_count(&gb) + 7) / 8; 244} 245 246typedef struct MetasoundProps { 247 uint32_t tag; 248 int bit_rate; 249 int channels; 250 int sample_rate; 251} MetasoundProps; 252 253static const MetasoundProps codec_props[] = { 254 { MKTAG('V','X','0','3'), 6, 1, 8000 }, 255 { MKTAG('V','X','0','4'), 12, 2, 8000 }, 256 257 { MKTAG('V','O','X','i'), 8, 1, 8000 }, 258 { MKTAG('V','O','X','j'), 10, 1, 11025 }, 259 { MKTAG('V','O','X','k'), 16, 1, 16000 }, 260 { MKTAG('V','O','X','L'), 24, 1, 22050 }, 261 { MKTAG('V','O','X','q'), 32, 1, 44100 }, 262 { MKTAG('V','O','X','r'), 40, 1, 44100 }, 263 { MKTAG('V','O','X','s'), 48, 1, 44100 }, 264 { MKTAG('V','O','X','t'), 16, 2, 8000 }, 265 { MKTAG('V','O','X','u'), 20, 2, 11025 }, 266 { MKTAG('V','O','X','v'), 32, 2, 16000 }, 267 { MKTAG('V','O','X','w'), 48, 2, 22050 }, 268 { MKTAG('V','O','X','x'), 64, 2, 44100 }, 269 { MKTAG('V','O','X','y'), 80, 2, 44100 }, 270 { MKTAG('V','O','X','z'), 96, 2, 44100 }, 271 272 { 0, 0, 0, 0 } 273}; 274 275static av_cold int metasound_decode_init(AVCodecContext *avctx) 276{ 277 int isampf, ibps; 278 TwinVQContext *tctx = avctx->priv_data; 279 uint32_t tag; 280 const MetasoundProps *props = codec_props; 281 int channels; 282 283 if (!avctx->extradata || avctx->extradata_size < 16) { 284 av_log(avctx, AV_LOG_ERROR, "Missing or incomplete extradata\n"); 285 return AVERROR_INVALIDDATA; 286 } 287 288 tag = AV_RL32(avctx->extradata + 12); 289 290 for (;;) { 291 if (!props->tag) { 292 av_log(avctx, AV_LOG_ERROR, "Could not find tag %08"PRIX32"\n", tag); 293 return AVERROR_INVALIDDATA; 294 } 295 if (props->tag == tag) { 296 avctx->sample_rate = props->sample_rate; 297 channels = props->channels; 298 avctx->bit_rate = props->bit_rate * 1000; 299 isampf = avctx->sample_rate / 1000; 300 break; 301 } 302 props++; 303 } 304 305 if (channels <= 0 || channels > TWINVQ_CHANNELS_MAX) { 306 av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n", 307 channels); 308 return AVERROR_INVALIDDATA; 309 } 310 av_channel_layout_uninit(&avctx->ch_layout); 311 av_channel_layout_default(&avctx->ch_layout, channels); 312 313 ibps = avctx->bit_rate / (1000 * channels); 314 315 switch ((channels << 16) + (isampf << 8) + ibps) { 316 case (1 << 16) + ( 8 << 8) + 6: 317 tctx->mtab = &ff_metasound_mode0806; 318 break; 319 case (2 << 16) + ( 8 << 8) + 6: 320 tctx->mtab = &ff_metasound_mode0806s; 321 break; 322 case (1 << 16) + ( 8 << 8) + 8: 323 tctx->mtab = &ff_metasound_mode0808; 324 break; 325 case (2 << 16) + ( 8 << 8) + 8: 326 tctx->mtab = &ff_metasound_mode0808s; 327 break; 328 case (1 << 16) + (11 << 8) + 10: 329 tctx->mtab = &ff_metasound_mode1110; 330 break; 331 case (2 << 16) + (11 << 8) + 10: 332 tctx->mtab = &ff_metasound_mode1110s; 333 break; 334 case (1 << 16) + (16 << 8) + 16: 335 tctx->mtab = &ff_metasound_mode1616; 336 break; 337 case (2 << 16) + (16 << 8) + 16: 338 tctx->mtab = &ff_metasound_mode1616s; 339 break; 340 case (1 << 16) + (22 << 8) + 24: 341 tctx->mtab = &ff_metasound_mode2224; 342 break; 343 case (2 << 16) + (22 << 8) + 24: 344 tctx->mtab = &ff_metasound_mode2224s; 345 break; 346 case (1 << 16) + (44 << 8) + 32: 347 case (2 << 16) + (44 << 8) + 32: 348 tctx->mtab = &ff_metasound_mode4432; 349 break; 350 case (1 << 16) + (44 << 8) + 40: 351 case (2 << 16) + (44 << 8) + 40: 352 tctx->mtab = &ff_metasound_mode4440; 353 break; 354 case (1 << 16) + (44 << 8) + 48: 355 case (2 << 16) + (44 << 8) + 48: 356 tctx->mtab = &ff_metasound_mode4448; 357 break; 358 default: 359 av_log(avctx, AV_LOG_ERROR, 360 "This version does not support %d kHz - %d kbit/s/ch mode.\n", 361 isampf, ibps); 362 return AVERROR(ENOSYS); 363 } 364 365 tctx->codec = TWINVQ_CODEC_METASOUND; 366 tctx->read_bitstream = metasound_read_bitstream; 367 tctx->dec_bark_env = dec_bark_env; 368 tctx->decode_ppc = decode_ppc; 369 tctx->frame_size = avctx->bit_rate * tctx->mtab->size 370 / avctx->sample_rate; 371 tctx->is_6kbps = ibps == 6; 372 373 return ff_twinvq_decode_init(avctx); 374} 375 376const FFCodec ff_metasound_decoder = { 377 .p.name = "metasound", 378 .p.long_name = NULL_IF_CONFIG_SMALL("Voxware MetaSound"), 379 .p.type = AVMEDIA_TYPE_AUDIO, 380 .p.id = AV_CODEC_ID_METASOUND, 381 .priv_data_size = sizeof(TwinVQContext), 382 .init = metasound_decode_init, 383 .close = ff_twinvq_decode_close, 384 FF_CODEC_DECODE_CB(ff_twinvq_decode_frame), 385 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, 386 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, 387 AV_SAMPLE_FMT_NONE }, 388 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 389}; 390