1/* 2 * AAC encoder main-type prediction 3 * Copyright (C) 2015 Rostislav Pehlivanov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * AAC encoder main-type prediction 25 * @author Rostislav Pehlivanov ( atomnuker gmail com ) 26 */ 27 28#include "aactab.h" 29#include "aacenc_pred.h" 30#include "aacenc_utils.h" 31#include "aacenc_is.h" /* <- Needed for common window distortions */ 32#include "aacenc_quantization.h" 33 34#define RESTORE_PRED(sce, sfb) \ 35 if (sce->ics.prediction_used[sfb]) {\ 36 sce->ics.prediction_used[sfb] = 0;\ 37 sce->band_type[sfb] = sce->band_alt[sfb];\ 38 } 39 40static inline float flt16_round(float pf) 41{ 42 union av_intfloat32 tmp; 43 tmp.f = pf; 44 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; 45 return tmp.f; 46} 47 48static inline float flt16_even(float pf) 49{ 50 union av_intfloat32 tmp; 51 tmp.f = pf; 52 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; 53 return tmp.f; 54} 55 56static inline float flt16_trunc(float pf) 57{ 58 union av_intfloat32 pun; 59 pun.f = pf; 60 pun.i &= 0xFFFF0000U; 61 return pun.f; 62} 63 64static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set) 65{ 66 float k2; 67 const float a = 0.953125; // 61.0 / 64 68 const float alpha = 0.90625; // 29.0 / 32 69 const float k1 = ps->k1; 70 const float r0 = ps->r0, r1 = ps->r1; 71 const float cor0 = ps->cor0, cor1 = ps->cor1; 72 const float var0 = ps->var0, var1 = ps->var1; 73 const float e0 = *coef - ps->x_est; 74 const float e1 = e0 - k1 * r0; 75 76 if (set) 77 *coef = e0; 78 79 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1); 80 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1)); 81 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0); 82 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0)); 83 ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); 84 ps->r0 = flt16_trunc(a * e0); 85 86 /* Prediction for next frame */ 87 ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0; 88 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0; 89 *rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1); 90} 91 92static inline void reset_predict_state(PredictorState *ps) 93{ 94 ps->r0 = 0.0f; 95 ps->r1 = 0.0f; 96 ps->k1 = 0.0f; 97 ps->cor0 = 0.0f; 98 ps->cor1 = 0.0f; 99 ps->var0 = 1.0f; 100 ps->var1 = 1.0f; 101 ps->x_est = 0.0f; 102} 103 104static inline void reset_all_predictors(PredictorState *ps) 105{ 106 int i; 107 for (i = 0; i < MAX_PREDICTORS; i++) 108 reset_predict_state(&ps[i]); 109} 110 111static inline void reset_predictor_group(SingleChannelElement *sce, int group_num) 112{ 113 int i; 114 PredictorState *ps = sce->predictor_state; 115 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) 116 reset_predict_state(&ps[i]); 117} 118 119void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce) 120{ 121 int sfb, k; 122 const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); 123 124 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 125 for (sfb = 0; sfb < pmax; sfb++) { 126 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) { 127 predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], 128 sce->ics.predictor_present && sce->ics.prediction_used[sfb]); 129 } 130 } 131 if (sce->ics.predictor_reset_group) { 132 reset_predictor_group(sce, sce->ics.predictor_reset_group); 133 } 134 } else { 135 reset_all_predictors(sce->predictor_state); 136 } 137} 138 139/* If inc = 0 you can check if this returns 0 to see if you can reset freely */ 140static inline int update_counters(IndividualChannelStream *ics, int inc) 141{ 142 int i; 143 for (i = 1; i < 31; i++) { 144 ics->predictor_reset_count[i] += inc; 145 if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN) 146 return i; /* Reset this immediately */ 147 } 148 return 0; 149} 150 151void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe) 152{ 153 int start, w, w2, g, i, count = 0; 154 SingleChannelElement *sce0 = &cpe->ch[0]; 155 SingleChannelElement *sce1 = &cpe->ch[1]; 156 const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); 157 const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); 158 const int pmax = FFMIN(pmax0, pmax1); 159 160 if (!cpe->common_window || 161 sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE || 162 sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) 163 return; 164 165 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { 166 start = 0; 167 for (g = 0; g < sce0->ics.num_swb; g++) { 168 int sfb = w*16+g; 169 int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb]; 170 float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; 171 struct AACISError ph_err1, ph_err2, *erf; 172 if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) { 173 RESTORE_PRED(sce0, sfb); 174 RESTORE_PRED(sce1, sfb); 175 start += sce0->ics.swb_sizes[g]; 176 continue; 177 } 178 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 179 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 180 float coef0 = sce0->pcoeffs[start+(w+w2)*128+i]; 181 float coef1 = sce1->pcoeffs[start+(w+w2)*128+i]; 182 ener0 += coef0*coef0; 183 ener1 += coef1*coef1; 184 ener01 += (coef0 + coef1)*(coef0 + coef1); 185 } 186 } 187 ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g, 188 ener0, ener1, ener01, 1, -1); 189 ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g, 190 ener0, ener1, ener01, 1, +1); 191 erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2; 192 if (erf->pass) { 193 sce0->ics.prediction_used[sfb] = 1; 194 sce1->ics.prediction_used[sfb] = 1; 195 count++; 196 } else { 197 RESTORE_PRED(sce0, sfb); 198 RESTORE_PRED(sce1, sfb); 199 } 200 start += sce0->ics.swb_sizes[g]; 201 } 202 } 203 204 sce1->ics.predictor_present = sce0->ics.predictor_present = !!count; 205} 206 207static void update_pred_resets(SingleChannelElement *sce) 208{ 209 int i, max_group_id_c, max_frame = 0; 210 float avg_frame = 0.0f; 211 IndividualChannelStream *ics = &sce->ics; 212 213 /* Update the counters and immediately update any frame behind schedule */ 214 if ((ics->predictor_reset_group = update_counters(&sce->ics, 1))) 215 return; 216 217 for (i = 1; i < 31; i++) { 218 /* Count-based */ 219 if (ics->predictor_reset_count[i] > max_frame) { 220 max_group_id_c = i; 221 max_frame = ics->predictor_reset_count[i]; 222 } 223 avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2; 224 } 225 226 if (max_frame > PRED_RESET_MIN) { 227 ics->predictor_reset_group = max_group_id_c; 228 } else { 229 ics->predictor_reset_group = 0; 230 } 231} 232 233void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) 234{ 235 int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0; 236 const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); 237 float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1]; 238 float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3]; 239 float *QERR = &s->scoefs[128*4]; 240 241 if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 242 sce->ics.predictor_present = 0; 243 return; 244 } 245 246 if (!sce->ics.predictor_initialized) { 247 reset_all_predictors(sce->predictor_state); 248 sce->ics.predictor_initialized = 1; 249 memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float)); 250 for (i = 1; i < 31; i++) 251 sce->ics.predictor_reset_count[i] = i; 252 } 253 254 update_pred_resets(sce); 255 memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); 256 257 for (sfb = PRED_SFB_START; sfb < pmax; sfb++) { 258 int cost1, cost2, cb_p; 259 float dist1, dist2, dist_spec_err = 0.0f; 260 const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb]; 261 const int cb_min = sce->zeroes[sfb] ? 0 : 1; 262 const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT; 263 const int start_coef = sce->ics.swb_offset[sfb]; 264 const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef; 265 const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb]; 266 267 if (start_coef + num_coeffs > MAX_PREDICTORS || 268 (s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) || 269 sce->band_type[sfb] == NOISE_BT) 270 continue; 271 272 /* Normal coefficients */ 273 s->abs_pow34(O34, &sce->coeffs[start_coef], num_coeffs); 274 dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL, 275 O34, num_coeffs, sce->sf_idx[sfb], 276 cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0); 277 cost_coeffs += cost1; 278 279 /* Encoded coefficients - needed for #bits, band type and quant. error */ 280 for (i = 0; i < num_coeffs; i++) 281 SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i]; 282 s->abs_pow34(S34, SENT, num_coeffs); 283 if (cb_n < RESERVED_BT) 284 cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max); 285 else 286 cb_p = cb_n; 287 quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs, 288 sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY, 289 &cost2, NULL, 0); 290 291 /* Reconstructed coefficients - needed for distortion measurements */ 292 for (i = 0; i < num_coeffs; i++) 293 sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f; 294 s->abs_pow34(P34, &sce->prcoeffs[start_coef], num_coeffs); 295 if (cb_n < RESERVED_BT) 296 cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max); 297 else 298 cb_p = cb_n; 299 dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL, 300 P34, num_coeffs, sce->sf_idx[sfb], 301 cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0); 302 for (i = 0; i < num_coeffs; i++) 303 dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]); 304 dist_spec_err *= s->lambda / band->threshold; 305 dist2 += dist_spec_err; 306 307 if (dist2 <= dist1 && cb_p <= cb_n) { 308 cost_pred += cost2; 309 sce->ics.prediction_used[sfb] = 1; 310 sce->band_alt[sfb] = cb_n; 311 sce->band_type[sfb] = cb_p; 312 count++; 313 } else { 314 cost_pred += cost1; 315 sce->band_alt[sfb] = cb_p; 316 } 317 } 318 319 if (count && cost_coeffs < cost_pred) { 320 count = 0; 321 for (sfb = PRED_SFB_START; sfb < pmax; sfb++) 322 RESTORE_PRED(sce, sfb); 323 memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used)); 324 } 325 326 sce->ics.predictor_present = !!count; 327} 328 329/** 330 * Encoder predictors data. 331 */ 332void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce) 333{ 334 int sfb; 335 IndividualChannelStream *ics = &sce->ics; 336 const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); 337 338 if (s->profile != FF_PROFILE_AAC_MAIN || 339 !ics->predictor_present) 340 return; 341 342 put_bits(&s->pb, 1, !!ics->predictor_reset_group); 343 if (ics->predictor_reset_group) 344 put_bits(&s->pb, 5, ics->predictor_reset_group); 345 for (sfb = 0; sfb < pmax; sfb++) 346 put_bits(&s->pb, 1, ics->prediction_used[sfb]); 347} 348