xref: /third_party/ffmpeg/libavcodec/g722enc.c (revision cabdff1a)
1/*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 *                        Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25/**
26 * @file
27 * G.722 ADPCM audio encoder
28 */
29
30#include "libavutil/avassert.h"
31#include "libavutil/channel_layout.h"
32#include "avcodec.h"
33#include "codec_internal.h"
34#include "encode.h"
35#include "internal.h"
36#include "g722.h"
37#include "libavutil/common.h"
38
39#define FREEZE_INTERVAL 128
40
41/* This is an arbitrary value. Allowing insanely large values leads to strange
42   problems, so we limit it to a reasonable value */
43#define MAX_FRAME_SIZE 32768
44
45/* We clip the value of avctx->trellis to prevent data type overflows and
46   undefined behavior. Using larger values is insanely slow anyway. */
47#define MIN_TRELLIS 0
48#define MAX_TRELLIS 16
49
50static av_cold int g722_encode_close(AVCodecContext *avctx)
51{
52    G722Context *c = avctx->priv_data;
53    int i;
54    for (i = 0; i < 2; i++) {
55        av_freep(&c->paths[i]);
56        av_freep(&c->node_buf[i]);
57        av_freep(&c->nodep_buf[i]);
58    }
59    return 0;
60}
61
62static av_cold int g722_encode_init(AVCodecContext * avctx)
63{
64    G722Context *c = avctx->priv_data;
65
66    c->band[0].scale_factor = 8;
67    c->band[1].scale_factor = 2;
68    c->prev_samples_pos = 22;
69
70    if (avctx->frame_size) {
71        /* validate frame size */
72        if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
73            int new_frame_size;
74
75            if (avctx->frame_size == 1)
76                new_frame_size = 2;
77            else if (avctx->frame_size > MAX_FRAME_SIZE)
78                new_frame_size = MAX_FRAME_SIZE;
79            else
80                new_frame_size = avctx->frame_size - 1;
81
82            av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
83                   "allowed. Using %d instead of %d\n", new_frame_size,
84                   avctx->frame_size);
85            avctx->frame_size = new_frame_size;
86        }
87    } else {
88        /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
89           a common packet size for VoIP applications */
90        avctx->frame_size = 320;
91    }
92    avctx->initial_padding = 22;
93
94    if (avctx->trellis) {
95        /* validate trellis */
96        if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
97            int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
98            av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
99                   "allowed. Using %d instead of %d\n", new_trellis,
100                   avctx->trellis);
101            avctx->trellis = new_trellis;
102        }
103        if (avctx->trellis) {
104            int frontier = 1 << avctx->trellis;
105            int max_paths = frontier * FREEZE_INTERVAL;
106
107            for (int i = 0; i < 2; i++) {
108                c->paths[i]     = av_calloc(max_paths, sizeof(**c->paths));
109                c->node_buf[i]  = av_calloc(frontier, 2 * sizeof(**c->node_buf));
110                c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
111                if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
112                    return AVERROR(ENOMEM);
113            }
114        }
115    }
116
117    ff_g722dsp_init(&c->dsp);
118
119    return 0;
120}
121
122static const int16_t low_quant[33] = {
123      35,   72,  110,  150,  190,  233,  276,  323,
124     370,  422,  473,  530,  587,  650,  714,  786,
125     858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
126    1765, 1980, 2195, 2557, 2919
127};
128
129static inline void filter_samples(G722Context *c, const int16_t *samples,
130                                  int *xlow, int *xhigh)
131{
132    int xout[2];
133    c->prev_samples[c->prev_samples_pos++] = samples[0];
134    c->prev_samples[c->prev_samples_pos++] = samples[1];
135    c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
136    *xlow  = xout[0] + xout[1] >> 14;
137    *xhigh = xout[0] - xout[1] >> 14;
138    if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
139        memmove(c->prev_samples,
140                c->prev_samples + c->prev_samples_pos - 22,
141                22 * sizeof(c->prev_samples[0]));
142        c->prev_samples_pos = 22;
143    }
144}
145
146static inline int encode_high(const struct G722Band *state, int xhigh)
147{
148    int diff = av_clip_int16(xhigh - state->s_predictor);
149    int pred = 141 * state->scale_factor >> 8;
150           /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
151    return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
152}
153
154static inline int encode_low(const struct G722Band* state, int xlow)
155{
156    int diff  = av_clip_int16(xlow - state->s_predictor);
157           /* = diff >= 0 ? diff : -(diff + 1) */
158    int limit = diff ^ (diff >> (sizeof(diff)*8-1));
159    int i = 0;
160    limit = limit + 1 << 10;
161    if (limit > low_quant[8] * state->scale_factor)
162        i = 9;
163    while (i < 29 && limit > low_quant[i] * state->scale_factor)
164        i++;
165    return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
166}
167
168static void g722_encode_trellis(G722Context *c, int trellis,
169                                uint8_t *dst, int nb_samples,
170                                const int16_t *samples)
171{
172    int i, j, k;
173    int frontier = 1 << trellis;
174    struct TrellisNode **nodes[2];
175    struct TrellisNode **nodes_next[2];
176    int pathn[2] = {0, 0}, froze = -1;
177    struct TrellisPath *p[2];
178
179    for (i = 0; i < 2; i++) {
180        nodes[i] = c->nodep_buf[i];
181        nodes_next[i] = c->nodep_buf[i] + frontier;
182        memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
183        nodes[i][0] = c->node_buf[i] + frontier;
184        nodes[i][0]->ssd = 0;
185        nodes[i][0]->path = 0;
186        nodes[i][0]->state = c->band[i];
187    }
188
189    for (i = 0; i < nb_samples >> 1; i++) {
190        int xlow, xhigh;
191        struct TrellisNode *next[2];
192        int heap_pos[2] = {0, 0};
193
194        for (j = 0; j < 2; j++) {
195            next[j] = c->node_buf[j] + frontier*(i & 1);
196            memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
197        }
198
199        filter_samples(c, &samples[2*i], &xlow, &xhigh);
200
201        for (j = 0; j < frontier && nodes[0][j]; j++) {
202            /* Only k >> 2 affects the future adaptive state, therefore testing
203             * small steps that don't change k >> 2 is useless, the original
204             * value from encode_low is better than them. Since we step k
205             * in steps of 4, make sure range is a multiple of 4, so that
206             * we don't miss the original value from encode_low. */
207            int range = j < frontier/2 ? 4 : 0;
208            struct TrellisNode *cur_node = nodes[0][j];
209
210            int ilow = encode_low(&cur_node->state, xlow);
211
212            for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
213                int decoded, dec_diff, pos;
214                uint32_t ssd;
215                struct TrellisNode* node;
216
217                if (k < 0)
218                    continue;
219
220                decoded = av_clip_intp2((cur_node->state.scale_factor *
221                                  ff_g722_low_inv_quant6[k] >> 10)
222                                + cur_node->state.s_predictor, 14);
223                dec_diff = xlow - decoded;
224
225#define STORE_NODE(index, UPDATE, VALUE)\
226                ssd = cur_node->ssd + dec_diff*dec_diff;\
227                /* Check for wraparound. Using 64 bit ssd counters would \
228                 * be simpler, but is slower on x86 32 bit. */\
229                if (ssd < cur_node->ssd)\
230                    continue;\
231                if (heap_pos[index] < frontier) {\
232                    pos = heap_pos[index]++;\
233                    av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
234                    node = nodes_next[index][pos] = next[index]++;\
235                    node->path = pathn[index]++;\
236                } else {\
237                    /* Try to replace one of the leaf nodes with the new \
238                     * one, but not always testing the same leaf position */\
239                    pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
240                    if (ssd >= nodes_next[index][pos]->ssd)\
241                        continue;\
242                    heap_pos[index]++;\
243                    node = nodes_next[index][pos];\
244                }\
245                node->ssd = ssd;\
246                node->state = cur_node->state;\
247                UPDATE;\
248                c->paths[index][node->path].value = VALUE;\
249                c->paths[index][node->path].prev = cur_node->path;\
250                /* Sift the newly inserted node up in the heap to restore \
251                 * the heap property */\
252                while (pos > 0) {\
253                    int parent = (pos - 1) >> 1;\
254                    if (nodes_next[index][parent]->ssd <= ssd)\
255                        break;\
256                    FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
257                                                nodes_next[index][pos]);\
258                    pos = parent;\
259                }
260                STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
261            }
262        }
263
264        for (j = 0; j < frontier && nodes[1][j]; j++) {
265            int ihigh;
266            struct TrellisNode *cur_node = nodes[1][j];
267
268            /* We don't try to get any initial guess for ihigh via
269             * encode_high - since there's only 4 possible values, test
270             * them all. Testing all of these gives a much, much larger
271             * gain than testing a larger range around ilow. */
272            for (ihigh = 0; ihigh < 4; ihigh++) {
273                int dhigh, decoded, dec_diff, pos;
274                uint32_t ssd;
275                struct TrellisNode* node;
276
277                dhigh = cur_node->state.scale_factor *
278                        ff_g722_high_inv_quant[ihigh] >> 10;
279                decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
280                dec_diff = xhigh - decoded;
281
282                STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
283            }
284        }
285
286        for (j = 0; j < 2; j++) {
287            FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
288
289            if (nodes[j][0]->ssd > (1 << 16)) {
290                for (k = 1; k < frontier && nodes[j][k]; k++)
291                    nodes[j][k]->ssd -= nodes[j][0]->ssd;
292                nodes[j][0]->ssd = 0;
293            }
294        }
295
296        if (i == froze + FREEZE_INTERVAL) {
297            p[0] = &c->paths[0][nodes[0][0]->path];
298            p[1] = &c->paths[1][nodes[1][0]->path];
299            for (j = i; j > froze; j--) {
300                dst[j] = p[1]->value << 6 | p[0]->value;
301                p[0] = &c->paths[0][p[0]->prev];
302                p[1] = &c->paths[1][p[1]->prev];
303            }
304            froze = i;
305            pathn[0] = pathn[1] = 0;
306            memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
307            memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
308        }
309    }
310
311    p[0] = &c->paths[0][nodes[0][0]->path];
312    p[1] = &c->paths[1][nodes[1][0]->path];
313    for (j = i; j > froze; j--) {
314        dst[j] = p[1]->value << 6 | p[0]->value;
315        p[0] = &c->paths[0][p[0]->prev];
316        p[1] = &c->paths[1][p[1]->prev];
317    }
318    c->band[0] = nodes[0][0]->state;
319    c->band[1] = nodes[1][0]->state;
320}
321
322static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
323                                         const int16_t *samples)
324{
325    int xlow, xhigh, ilow, ihigh;
326    filter_samples(c, samples, &xlow, &xhigh);
327    ihigh = encode_high(&c->band[1], xhigh);
328    ilow  = encode_low (&c->band[0], xlow);
329    ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
330                                ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
331    ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
332    *dst = ihigh << 6 | ilow;
333}
334
335static void g722_encode_no_trellis(G722Context *c,
336                                   uint8_t *dst, int nb_samples,
337                                   const int16_t *samples)
338{
339    int i;
340    for (i = 0; i < nb_samples; i += 2)
341        encode_byte(c, dst++, &samples[i]);
342}
343
344static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
345                             const AVFrame *frame, int *got_packet_ptr)
346{
347    G722Context *c = avctx->priv_data;
348    const int16_t *samples = (const int16_t *)frame->data[0];
349    int nb_samples, out_size, ret;
350
351    out_size = (frame->nb_samples + 1) / 2;
352    if ((ret = ff_get_encode_buffer(avctx, avpkt, out_size, 0)) < 0)
353        return ret;
354
355    nb_samples = frame->nb_samples - (frame->nb_samples & 1);
356
357    if (avctx->trellis)
358        g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
359    else
360        g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
361
362    /* handle last frame with odd frame_size */
363    if (nb_samples < frame->nb_samples) {
364        int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
365        encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
366    }
367
368    if (frame->pts != AV_NOPTS_VALUE)
369        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
370    *got_packet_ptr = 1;
371    return 0;
372}
373
374const FFCodec ff_adpcm_g722_encoder = {
375    .p.name          = "g722",
376    .p.long_name     = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
377    .p.type          = AVMEDIA_TYPE_AUDIO,
378    .p.id            = AV_CODEC_ID_ADPCM_G722,
379    .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME,
380    .priv_data_size  = sizeof(G722Context),
381    .init            = g722_encode_init,
382    .close           = g722_encode_close,
383    FF_CODEC_ENCODE_CB(g722_encode_frame),
384    .p.sample_fmts   = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
385#if FF_API_OLD_CHANNEL_LAYOUT
386    .p.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
387#endif
388    .p.ch_layouts   = (const AVChannelLayout[]){
389        AV_CHANNEL_LAYOUT_MONO, { 0 }
390    },
391    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
392};
393