1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * audio encoder psychoacoustic model 3cabdff1aSopenharmony_ci * Copyright (C) 2008 Konstantin Shishkov 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#ifndef AVCODEC_PSYMODEL_H 23cabdff1aSopenharmony_ci#define AVCODEC_PSYMODEL_H 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "avcodec.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci/** maximum possible number of bands */ 28cabdff1aSopenharmony_ci#define PSY_MAX_BANDS 128 29cabdff1aSopenharmony_ci/** maximum number of channels */ 30cabdff1aSopenharmony_ci#define PSY_MAX_CHANS 20 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci/* cutoff for VBR is purposely increased, since LP filtering actually 33cabdff1aSopenharmony_ci * hinders VBR performance rather than the opposite 34cabdff1aSopenharmony_ci */ 35cabdff1aSopenharmony_ci#define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \ 36cabdff1aSopenharmony_ci FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \ 37cabdff1aSopenharmony_ci 3000 + bit_rate/channels/4, \ 38cabdff1aSopenharmony_ci 12000 + bit_rate/channels/16), \ 39cabdff1aSopenharmony_ci 22000, \ 40cabdff1aSopenharmony_ci sample_rate / 2): (sample_rate / 2)) 41cabdff1aSopenharmony_ci#define AAC_CUTOFF(s) ( \ 42cabdff1aSopenharmony_ci (s->flags & AV_CODEC_FLAG_QSCALE) \ 43cabdff1aSopenharmony_ci ? s->sample_rate / 2 \ 44cabdff1aSopenharmony_ci : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->ch_layout.nb_channels, s->sample_rate) \ 45cabdff1aSopenharmony_ci) 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci/** 48cabdff1aSopenharmony_ci * single band psychoacoustic information 49cabdff1aSopenharmony_ci */ 50cabdff1aSopenharmony_citypedef struct FFPsyBand { 51cabdff1aSopenharmony_ci int bits; 52cabdff1aSopenharmony_ci float energy; 53cabdff1aSopenharmony_ci float threshold; 54cabdff1aSopenharmony_ci float spread; /* Energy spread over the band */ 55cabdff1aSopenharmony_ci} FFPsyBand; 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci/** 58cabdff1aSopenharmony_ci * single channel psychoacoustic information 59cabdff1aSopenharmony_ci */ 60cabdff1aSopenharmony_citypedef struct FFPsyChannel { 61cabdff1aSopenharmony_ci FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information 62cabdff1aSopenharmony_ci float entropy; ///< total PE for this channel 63cabdff1aSopenharmony_ci} FFPsyChannel; 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci/** 66cabdff1aSopenharmony_ci * psychoacoustic information for an arbitrary group of channels 67cabdff1aSopenharmony_ci */ 68cabdff1aSopenharmony_citypedef struct FFPsyChannelGroup { 69cabdff1aSopenharmony_ci FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group 70cabdff1aSopenharmony_ci uint8_t num_ch; ///< number of channels in this group 71cabdff1aSopenharmony_ci uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group 72cabdff1aSopenharmony_ci} FFPsyChannelGroup; 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci/** 75cabdff1aSopenharmony_ci * windowing related information 76cabdff1aSopenharmony_ci */ 77cabdff1aSopenharmony_citypedef struct FFPsyWindowInfo { 78cabdff1aSopenharmony_ci int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next 79cabdff1aSopenharmony_ci int window_shape; ///< window shape (sine/KBD/whatever) 80cabdff1aSopenharmony_ci int num_windows; ///< number of windows in a frame 81cabdff1aSopenharmony_ci int grouping[8]; ///< window grouping (for e.g. AAC) 82cabdff1aSopenharmony_ci float clipping[8]; ///< maximum absolute normalized intensity in the given window for clip avoidance 83cabdff1aSopenharmony_ci int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) 84cabdff1aSopenharmony_ci} FFPsyWindowInfo; 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_ci/** 87cabdff1aSopenharmony_ci * context used by psychoacoustic model 88cabdff1aSopenharmony_ci */ 89cabdff1aSopenharmony_citypedef struct FFPsyContext { 90cabdff1aSopenharmony_ci AVCodecContext *avctx; ///< encoder context 91cabdff1aSopenharmony_ci const struct FFPsyModel *model; ///< encoder-specific model functions 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_ci FFPsyChannel *ch; ///< single channel information 94cabdff1aSopenharmony_ci FFPsyChannelGroup *group; ///< channel group information 95cabdff1aSopenharmony_ci int num_groups; ///< number of channel groups 96cabdff1aSopenharmony_ci int cutoff; ///< lowpass frequency cutoff for analysis 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci uint8_t **bands; ///< scalefactor band sizes for possible frame sizes 99cabdff1aSopenharmony_ci int *num_bands; ///< number of scalefactor bands for possible frame sizes 100cabdff1aSopenharmony_ci int num_lens; ///< number of scalefactor band sets 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci struct { 103cabdff1aSopenharmony_ci int size; ///< size of the bitresevoir in bits 104cabdff1aSopenharmony_ci int bits; ///< number of bits used in the bitresevoir 105cabdff1aSopenharmony_ci int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done 106cabdff1aSopenharmony_ci } bitres; 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_ci void* model_priv_data; ///< psychoacoustic model implementation private data 109cabdff1aSopenharmony_ci} FFPsyContext; 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ci/** 112cabdff1aSopenharmony_ci * codec-specific psychoacoustic model implementation 113cabdff1aSopenharmony_ci */ 114cabdff1aSopenharmony_citypedef struct FFPsyModel { 115cabdff1aSopenharmony_ci const char *name; 116cabdff1aSopenharmony_ci int (*init) (FFPsyContext *apc); 117cabdff1aSopenharmony_ci 118cabdff1aSopenharmony_ci /** 119cabdff1aSopenharmony_ci * Suggest window sequence for channel. 120cabdff1aSopenharmony_ci * 121cabdff1aSopenharmony_ci * @param ctx model context 122cabdff1aSopenharmony_ci * @param audio samples for the current frame 123cabdff1aSopenharmony_ci * @param la lookahead samples (NULL when unavailable) 124cabdff1aSopenharmony_ci * @param channel number of channel element to analyze 125cabdff1aSopenharmony_ci * @param prev_type previous window type 126cabdff1aSopenharmony_ci * 127cabdff1aSopenharmony_ci * @return suggested window information in a structure 128cabdff1aSopenharmony_ci */ 129cabdff1aSopenharmony_ci FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci /** 132cabdff1aSopenharmony_ci * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. 133cabdff1aSopenharmony_ci * 134cabdff1aSopenharmony_ci * @param ctx model context 135cabdff1aSopenharmony_ci * @param channel channel number of the first channel in the group to perform analysis on 136cabdff1aSopenharmony_ci * @param coeffs array of pointers to the transformed coefficients 137cabdff1aSopenharmony_ci * @param wi window information for the channels in the group 138cabdff1aSopenharmony_ci */ 139cabdff1aSopenharmony_ci void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); 140cabdff1aSopenharmony_ci 141cabdff1aSopenharmony_ci void (*end) (FFPsyContext *apc); 142cabdff1aSopenharmony_ci} FFPsyModel; 143cabdff1aSopenharmony_ci 144cabdff1aSopenharmony_ci/** 145cabdff1aSopenharmony_ci * Initialize psychoacoustic model. 146cabdff1aSopenharmony_ci * 147cabdff1aSopenharmony_ci * @param ctx model context 148cabdff1aSopenharmony_ci * @param avctx codec context 149cabdff1aSopenharmony_ci * @param num_lens number of possible frame lengths 150cabdff1aSopenharmony_ci * @param bands scalefactor band lengths for all frame lengths 151cabdff1aSopenharmony_ci * @param num_bands number of scalefactor bands for all frame lengths 152cabdff1aSopenharmony_ci * @param num_groups number of channel groups 153cabdff1aSopenharmony_ci * @param group_map array with # of channels in group - 1, for each group 154cabdff1aSopenharmony_ci * 155cabdff1aSopenharmony_ci * @return zero if successful, a negative value if not 156cabdff1aSopenharmony_ci */ 157cabdff1aSopenharmony_ciint ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, 158cabdff1aSopenharmony_ci const uint8_t **bands, const int *num_bands, 159cabdff1aSopenharmony_ci int num_groups, const uint8_t *group_map); 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci/** 162cabdff1aSopenharmony_ci * Determine what group a channel belongs to. 163cabdff1aSopenharmony_ci * 164cabdff1aSopenharmony_ci * @param ctx psymodel context 165cabdff1aSopenharmony_ci * @param channel channel to locate the group for 166cabdff1aSopenharmony_ci * 167cabdff1aSopenharmony_ci * @return pointer to the FFPsyChannelGroup this channel belongs to 168cabdff1aSopenharmony_ci */ 169cabdff1aSopenharmony_ciFFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ci/** 172cabdff1aSopenharmony_ci * Cleanup model context at the end. 173cabdff1aSopenharmony_ci * 174cabdff1aSopenharmony_ci * @param ctx model context 175cabdff1aSopenharmony_ci */ 176cabdff1aSopenharmony_civoid ff_psy_end(FFPsyContext *ctx); 177cabdff1aSopenharmony_ci 178cabdff1aSopenharmony_ci 179cabdff1aSopenharmony_ci/************************************************************************** 180cabdff1aSopenharmony_ci * Audio preprocessing stuff. * 181cabdff1aSopenharmony_ci * This should be moved into some audio filter eventually. * 182cabdff1aSopenharmony_ci **************************************************************************/ 183cabdff1aSopenharmony_cistruct FFPsyPreprocessContext; 184cabdff1aSopenharmony_ci 185cabdff1aSopenharmony_ci/** 186cabdff1aSopenharmony_ci * psychoacoustic model audio preprocessing initialization 187cabdff1aSopenharmony_ci */ 188cabdff1aSopenharmony_cistruct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); 189cabdff1aSopenharmony_ci 190cabdff1aSopenharmony_ci/** 191cabdff1aSopenharmony_ci * Preprocess several channel in audio frame in order to compress it better. 192cabdff1aSopenharmony_ci * 193cabdff1aSopenharmony_ci * @param ctx preprocessing context 194cabdff1aSopenharmony_ci * @param audio samples to be filtered (in place) 195cabdff1aSopenharmony_ci * @param channels number of channel to preprocess 196cabdff1aSopenharmony_ci */ 197cabdff1aSopenharmony_civoid ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci/** 200cabdff1aSopenharmony_ci * Cleanup audio preprocessing module. 201cabdff1aSopenharmony_ci */ 202cabdff1aSopenharmony_civoid ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); 203cabdff1aSopenharmony_ci 204cabdff1aSopenharmony_ci#endif /* AVCODEC_PSYMODEL_H */ 205