1/* 2 * Copyright (c) CMU 1993 Computer Science, Speech Group 3 * Chengxiang Lu and Alex Hauptmann 4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org> 5 * Copyright (c) 2009 Kenan Gillet 6 * Copyright (c) 2010 Martin Storsjo 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25/** 26 * @file 27 * G.722 ADPCM audio encoder 28 */ 29 30#include "libavutil/avassert.h" 31#include "libavutil/channel_layout.h" 32#include "avcodec.h" 33#include "codec_internal.h" 34#include "encode.h" 35#include "internal.h" 36#include "g722.h" 37#include "libavutil/common.h" 38 39#define FREEZE_INTERVAL 128 40 41/* This is an arbitrary value. Allowing insanely large values leads to strange 42 problems, so we limit it to a reasonable value */ 43#define MAX_FRAME_SIZE 32768 44 45/* We clip the value of avctx->trellis to prevent data type overflows and 46 undefined behavior. Using larger values is insanely slow anyway. */ 47#define MIN_TRELLIS 0 48#define MAX_TRELLIS 16 49 50static av_cold int g722_encode_close(AVCodecContext *avctx) 51{ 52 G722Context *c = avctx->priv_data; 53 int i; 54 for (i = 0; i < 2; i++) { 55 av_freep(&c->paths[i]); 56 av_freep(&c->node_buf[i]); 57 av_freep(&c->nodep_buf[i]); 58 } 59 return 0; 60} 61 62static av_cold int g722_encode_init(AVCodecContext * avctx) 63{ 64 G722Context *c = avctx->priv_data; 65 66 c->band[0].scale_factor = 8; 67 c->band[1].scale_factor = 2; 68 c->prev_samples_pos = 22; 69 70 if (avctx->frame_size) { 71 /* validate frame size */ 72 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) { 73 int new_frame_size; 74 75 if (avctx->frame_size == 1) 76 new_frame_size = 2; 77 else if (avctx->frame_size > MAX_FRAME_SIZE) 78 new_frame_size = MAX_FRAME_SIZE; 79 else 80 new_frame_size = avctx->frame_size - 1; 81 82 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not " 83 "allowed. Using %d instead of %d\n", new_frame_size, 84 avctx->frame_size); 85 avctx->frame_size = new_frame_size; 86 } 87 } else { 88 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is 89 a common packet size for VoIP applications */ 90 avctx->frame_size = 320; 91 } 92 avctx->initial_padding = 22; 93 94 if (avctx->trellis) { 95 /* validate trellis */ 96 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) { 97 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS); 98 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not " 99 "allowed. Using %d instead of %d\n", new_trellis, 100 avctx->trellis); 101 avctx->trellis = new_trellis; 102 } 103 if (avctx->trellis) { 104 int frontier = 1 << avctx->trellis; 105 int max_paths = frontier * FREEZE_INTERVAL; 106 107 for (int i = 0; i < 2; i++) { 108 c->paths[i] = av_calloc(max_paths, sizeof(**c->paths)); 109 c->node_buf[i] = av_calloc(frontier, 2 * sizeof(**c->node_buf)); 110 c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf)); 111 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) 112 return AVERROR(ENOMEM); 113 } 114 } 115 } 116 117 ff_g722dsp_init(&c->dsp); 118 119 return 0; 120} 121 122static const int16_t low_quant[33] = { 123 35, 72, 110, 150, 190, 233, 276, 323, 124 370, 422, 473, 530, 587, 650, 714, 786, 125 858, 940, 1023, 1121, 1219, 1339, 1458, 1612, 126 1765, 1980, 2195, 2557, 2919 127}; 128 129static inline void filter_samples(G722Context *c, const int16_t *samples, 130 int *xlow, int *xhigh) 131{ 132 int xout[2]; 133 c->prev_samples[c->prev_samples_pos++] = samples[0]; 134 c->prev_samples[c->prev_samples_pos++] = samples[1]; 135 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout); 136 *xlow = xout[0] + xout[1] >> 14; 137 *xhigh = xout[0] - xout[1] >> 14; 138 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) { 139 memmove(c->prev_samples, 140 c->prev_samples + c->prev_samples_pos - 22, 141 22 * sizeof(c->prev_samples[0])); 142 c->prev_samples_pos = 22; 143 } 144} 145 146static inline int encode_high(const struct G722Band *state, int xhigh) 147{ 148 int diff = av_clip_int16(xhigh - state->s_predictor); 149 int pred = 141 * state->scale_factor >> 8; 150 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */ 151 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0); 152} 153 154static inline int encode_low(const struct G722Band* state, int xlow) 155{ 156 int diff = av_clip_int16(xlow - state->s_predictor); 157 /* = diff >= 0 ? diff : -(diff + 1) */ 158 int limit = diff ^ (diff >> (sizeof(diff)*8-1)); 159 int i = 0; 160 limit = limit + 1 << 10; 161 if (limit > low_quant[8] * state->scale_factor) 162 i = 9; 163 while (i < 29 && limit > low_quant[i] * state->scale_factor) 164 i++; 165 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i; 166} 167 168static void g722_encode_trellis(G722Context *c, int trellis, 169 uint8_t *dst, int nb_samples, 170 const int16_t *samples) 171{ 172 int i, j, k; 173 int frontier = 1 << trellis; 174 struct TrellisNode **nodes[2]; 175 struct TrellisNode **nodes_next[2]; 176 int pathn[2] = {0, 0}, froze = -1; 177 struct TrellisPath *p[2]; 178 179 for (i = 0; i < 2; i++) { 180 nodes[i] = c->nodep_buf[i]; 181 nodes_next[i] = c->nodep_buf[i] + frontier; 182 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i])); 183 nodes[i][0] = c->node_buf[i] + frontier; 184 nodes[i][0]->ssd = 0; 185 nodes[i][0]->path = 0; 186 nodes[i][0]->state = c->band[i]; 187 } 188 189 for (i = 0; i < nb_samples >> 1; i++) { 190 int xlow, xhigh; 191 struct TrellisNode *next[2]; 192 int heap_pos[2] = {0, 0}; 193 194 for (j = 0; j < 2; j++) { 195 next[j] = c->node_buf[j] + frontier*(i & 1); 196 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next)); 197 } 198 199 filter_samples(c, &samples[2*i], &xlow, &xhigh); 200 201 for (j = 0; j < frontier && nodes[0][j]; j++) { 202 /* Only k >> 2 affects the future adaptive state, therefore testing 203 * small steps that don't change k >> 2 is useless, the original 204 * value from encode_low is better than them. Since we step k 205 * in steps of 4, make sure range is a multiple of 4, so that 206 * we don't miss the original value from encode_low. */ 207 int range = j < frontier/2 ? 4 : 0; 208 struct TrellisNode *cur_node = nodes[0][j]; 209 210 int ilow = encode_low(&cur_node->state, xlow); 211 212 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) { 213 int decoded, dec_diff, pos; 214 uint32_t ssd; 215 struct TrellisNode* node; 216 217 if (k < 0) 218 continue; 219 220 decoded = av_clip_intp2((cur_node->state.scale_factor * 221 ff_g722_low_inv_quant6[k] >> 10) 222 + cur_node->state.s_predictor, 14); 223 dec_diff = xlow - decoded; 224 225#define STORE_NODE(index, UPDATE, VALUE)\ 226 ssd = cur_node->ssd + dec_diff*dec_diff;\ 227 /* Check for wraparound. Using 64 bit ssd counters would \ 228 * be simpler, but is slower on x86 32 bit. */\ 229 if (ssd < cur_node->ssd)\ 230 continue;\ 231 if (heap_pos[index] < frontier) {\ 232 pos = heap_pos[index]++;\ 233 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\ 234 node = nodes_next[index][pos] = next[index]++;\ 235 node->path = pathn[index]++;\ 236 } else {\ 237 /* Try to replace one of the leaf nodes with the new \ 238 * one, but not always testing the same leaf position */\ 239 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\ 240 if (ssd >= nodes_next[index][pos]->ssd)\ 241 continue;\ 242 heap_pos[index]++;\ 243 node = nodes_next[index][pos];\ 244 }\ 245 node->ssd = ssd;\ 246 node->state = cur_node->state;\ 247 UPDATE;\ 248 c->paths[index][node->path].value = VALUE;\ 249 c->paths[index][node->path].prev = cur_node->path;\ 250 /* Sift the newly inserted node up in the heap to restore \ 251 * the heap property */\ 252 while (pos > 0) {\ 253 int parent = (pos - 1) >> 1;\ 254 if (nodes_next[index][parent]->ssd <= ssd)\ 255 break;\ 256 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\ 257 nodes_next[index][pos]);\ 258 pos = parent;\ 259 } 260 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k); 261 } 262 } 263 264 for (j = 0; j < frontier && nodes[1][j]; j++) { 265 int ihigh; 266 struct TrellisNode *cur_node = nodes[1][j]; 267 268 /* We don't try to get any initial guess for ihigh via 269 * encode_high - since there's only 4 possible values, test 270 * them all. Testing all of these gives a much, much larger 271 * gain than testing a larger range around ilow. */ 272 for (ihigh = 0; ihigh < 4; ihigh++) { 273 int dhigh, decoded, dec_diff, pos; 274 uint32_t ssd; 275 struct TrellisNode* node; 276 277 dhigh = cur_node->state.scale_factor * 278 ff_g722_high_inv_quant[ihigh] >> 10; 279 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14); 280 dec_diff = xhigh - decoded; 281 282 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh); 283 } 284 } 285 286 for (j = 0; j < 2; j++) { 287 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]); 288 289 if (nodes[j][0]->ssd > (1 << 16)) { 290 for (k = 1; k < frontier && nodes[j][k]; k++) 291 nodes[j][k]->ssd -= nodes[j][0]->ssd; 292 nodes[j][0]->ssd = 0; 293 } 294 } 295 296 if (i == froze + FREEZE_INTERVAL) { 297 p[0] = &c->paths[0][nodes[0][0]->path]; 298 p[1] = &c->paths[1][nodes[1][0]->path]; 299 for (j = i; j > froze; j--) { 300 dst[j] = p[1]->value << 6 | p[0]->value; 301 p[0] = &c->paths[0][p[0]->prev]; 302 p[1] = &c->paths[1][p[1]->prev]; 303 } 304 froze = i; 305 pathn[0] = pathn[1] = 0; 306 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes)); 307 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes)); 308 } 309 } 310 311 p[0] = &c->paths[0][nodes[0][0]->path]; 312 p[1] = &c->paths[1][nodes[1][0]->path]; 313 for (j = i; j > froze; j--) { 314 dst[j] = p[1]->value << 6 | p[0]->value; 315 p[0] = &c->paths[0][p[0]->prev]; 316 p[1] = &c->paths[1][p[1]->prev]; 317 } 318 c->band[0] = nodes[0][0]->state; 319 c->band[1] = nodes[1][0]->state; 320} 321 322static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, 323 const int16_t *samples) 324{ 325 int xlow, xhigh, ilow, ihigh; 326 filter_samples(c, samples, &xlow, &xhigh); 327 ihigh = encode_high(&c->band[1], xhigh); 328 ilow = encode_low (&c->band[0], xlow); 329 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor * 330 ff_g722_high_inv_quant[ihigh] >> 10, ihigh); 331 ff_g722_update_low_predictor(&c->band[0], ilow >> 2); 332 *dst = ihigh << 6 | ilow; 333} 334 335static void g722_encode_no_trellis(G722Context *c, 336 uint8_t *dst, int nb_samples, 337 const int16_t *samples) 338{ 339 int i; 340 for (i = 0; i < nb_samples; i += 2) 341 encode_byte(c, dst++, &samples[i]); 342} 343 344static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 345 const AVFrame *frame, int *got_packet_ptr) 346{ 347 G722Context *c = avctx->priv_data; 348 const int16_t *samples = (const int16_t *)frame->data[0]; 349 int nb_samples, out_size, ret; 350 351 out_size = (frame->nb_samples + 1) / 2; 352 if ((ret = ff_get_encode_buffer(avctx, avpkt, out_size, 0)) < 0) 353 return ret; 354 355 nb_samples = frame->nb_samples - (frame->nb_samples & 1); 356 357 if (avctx->trellis) 358 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples); 359 else 360 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples); 361 362 /* handle last frame with odd frame_size */ 363 if (nb_samples < frame->nb_samples) { 364 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] }; 365 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples); 366 } 367 368 if (frame->pts != AV_NOPTS_VALUE) 369 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); 370 *got_packet_ptr = 1; 371 return 0; 372} 373 374const FFCodec ff_adpcm_g722_encoder = { 375 .p.name = "g722", 376 .p.long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"), 377 .p.type = AVMEDIA_TYPE_AUDIO, 378 .p.id = AV_CODEC_ID_ADPCM_G722, 379 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME, 380 .priv_data_size = sizeof(G722Context), 381 .init = g722_encode_init, 382 .close = g722_encode_close, 383 FF_CODEC_ENCODE_CB(g722_encode_frame), 384 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, 385#if FF_API_OLD_CHANNEL_LAYOUT 386 .p.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 }, 387#endif 388 .p.ch_layouts = (const AVChannelLayout[]){ 389 AV_CHANNEL_LAYOUT_MONO, { 0 } 390 }, 391 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 392}; 393