1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/crc.h"
20 #include "libavutil/float_dsp.h"
21 #include "libavutil/intreadwrite.h"
22 #include "libavutil/mem_internal.h"
23 #include "libavutil/tx.h"
24 
25 #include "avcodec.h"
26 #include "bytestream.h"
27 #include "codec_internal.h"
28 #include "get_bits.h"
29 #include "internal.h"
30 #include "hca_data.h"
31 
32 typedef struct ChannelContext {
33     float    base[128];
34     DECLARE_ALIGNED(32, float, imdct_in)[128];
35     DECLARE_ALIGNED(32, float, imdct_out)[128];
36     DECLARE_ALIGNED(32, float, imdct_prev)[128];
37     int8_t   scale_factors[128];
38     uint8_t  scale[128];
39     int8_t   intensity[8];
40     int8_t  *hfr_scale;
41     unsigned count;
42     int      chan_type;
43 } ChannelContext;
44 
45 typedef struct HCAContext {
46     const AVCRC *crc_table;
47 
48     ChannelContext ch[16];
49 
50     uint8_t ath[128];
51 
52     int     ath_type;
53     unsigned hfr_group_count;
54     uint8_t track_count;
55     uint8_t channel_config;
56     uint8_t total_band_count;
57     uint8_t base_band_count;
58     uint8_t stereo_band_count;
59     uint8_t bands_per_hfr_group;
60 
61     av_tx_fn           tx_fn;
62     AVTXContext       *tx_ctx;
63     AVFloatDSPContext *fdsp;
64 } HCAContext;
65 
ath_init1(uint8_t *ath, int sample_rate)66 static void ath_init1(uint8_t *ath, int sample_rate)
67 {
68     unsigned int index;
69     unsigned int acc = 0;
70 
71     for (int i = 0; i < 128; i++) {
72         acc += sample_rate;
73         index = acc >> 13;
74 
75         if (index >= 654) {
76             memset(ath+i, 0xFF, (128 - i));
77             break;
78         }
79 
80         ath[i] = ath_base_curve[index];
81     }
82 }
83 
ath_init(uint8_t *ath, int type, int sample_rate)84 static int ath_init(uint8_t *ath, int type, int sample_rate)
85 {
86     switch (type) {
87     case 0:
88         /* nothing to do */
89         break;
90     case 1:
91         ath_init1(ath, sample_rate);
92         break;
93     default:
94         return AVERROR_INVALIDDATA;
95     }
96 
97     return 0;
98 }
99 
ceil2(unsigned a, unsigned b)100 static inline unsigned ceil2(unsigned a, unsigned b)
101 {
102     return (b > 0) ? (a / b + ((a % b) ? 1 : 0)) : 0;
103 }
104 
decode_init(AVCodecContext *avctx)105 static av_cold int decode_init(AVCodecContext *avctx)
106 {
107     HCAContext *c = avctx->priv_data;
108     GetByteContext gb0, *const gb = &gb0;
109     int8_t r[16] = { 0 };
110     float scale = 1.f / 8.f;
111     unsigned b, chunk;
112     int version, ret;
113 
114     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
115     c->crc_table = av_crc_get_table(AV_CRC_16_ANSI);
116 
117     if (avctx->ch_layout.nb_channels <= 0 || avctx->ch_layout.nb_channels > 16)
118         return AVERROR(EINVAL);
119 
120     if (avctx->extradata_size < 36)
121         return AVERROR_INVALIDDATA;
122     bytestream2_init(gb, avctx->extradata, avctx->extradata_size);
123 
124     bytestream2_skipu(gb, 4);
125     version = bytestream2_get_be16(gb);
126     bytestream2_skipu(gb, 2);
127 
128     c->ath_type = version >= 0x200 ? 0 : 1;
129 
130     if (bytestream2_get_be32u(gb) != MKBETAG('f', 'm', 't', 0))
131         return AVERROR_INVALIDDATA;
132     bytestream2_skipu(gb, 4);
133     bytestream2_skipu(gb, 4);
134     bytestream2_skipu(gb, 4);
135 
136     chunk = bytestream2_get_be32u(gb);
137     if (chunk == MKBETAG('c', 'o', 'm', 'p')) {
138         bytestream2_skipu(gb, 2);
139         bytestream2_skipu(gb, 1);
140         bytestream2_skipu(gb, 1);
141         c->track_count         = bytestream2_get_byteu(gb);
142         c->channel_config      = bytestream2_get_byteu(gb);
143         c->total_band_count    = bytestream2_get_byteu(gb);
144         c->base_band_count     = bytestream2_get_byteu(gb);
145         c->stereo_band_count   = bytestream2_get_byte (gb);
146         c->bands_per_hfr_group = bytestream2_get_byte (gb);
147     } else if (chunk == MKBETAG('d', 'e', 'c', 0)) {
148         bytestream2_skipu(gb, 2);
149         bytestream2_skipu(gb, 1);
150         bytestream2_skipu(gb, 1);
151         c->total_band_count = bytestream2_get_byteu(gb) + 1;
152         c->base_band_count  = bytestream2_get_byteu(gb) + 1;
153         c->track_count      = bytestream2_peek_byteu(gb) >> 4;
154         c->channel_config   = bytestream2_get_byteu(gb) & 0xF;
155         if (!bytestream2_get_byteu(gb))
156             c->base_band_count = c->total_band_count;
157         c->stereo_band_count = c->total_band_count - c->base_band_count;
158         c->bands_per_hfr_group = 0;
159     } else
160         return AVERROR_INVALIDDATA;
161 
162     if (c->total_band_count > FF_ARRAY_ELEMS(c->ch->imdct_in))
163         return AVERROR_INVALIDDATA;
164 
165 
166     while (bytestream2_get_bytes_left(gb) >= 4) {
167         chunk = bytestream2_get_be32u(gb);
168         if (chunk == MKBETAG('v', 'b', 'r', 0)) {
169             bytestream2_skip(gb, 2 + 2);
170         } else if (chunk == MKBETAG('a', 't', 'h', 0)) {
171             c->ath_type = bytestream2_get_be16(gb);
172         } else if (chunk == MKBETAG('r', 'v', 'a', 0)) {
173             bytestream2_skip(gb, 4);
174         } else if (chunk == MKBETAG('c', 'o', 'm', 'm')) {
175             bytestream2_skip(gb, bytestream2_get_byte(gb) * 8);
176         } else if (chunk == MKBETAG('c', 'i', 'p', 'h')) {
177             bytestream2_skip(gb, 2);
178         } else if (chunk == MKBETAG('l', 'o', 'o', 'p')) {
179             bytestream2_skip(gb, 4 + 4 + 2 + 2);
180         } else if (chunk == MKBETAG('p', 'a', 'd', 0)) {
181             break;
182         } else {
183             break;
184         }
185     }
186 
187     ret = ath_init(c->ath, c->ath_type, avctx->sample_rate);
188     if (ret < 0)
189         return ret;
190 
191     if (!c->track_count)
192         c->track_count = 1;
193 
194     b = avctx->ch_layout.nb_channels / c->track_count;
195     if (c->stereo_band_count && b > 1) {
196         int8_t *x = r;
197 
198         for (int i = 0; i < c->track_count; i++, x+=b) {
199             switch (b) {
200             case 2:
201             case 3:
202                 x[0] = 1;
203                 x[1] = 2;
204                 break;
205             case 4:
206                 x[0]=1; x[1] = 2;
207                 if (c->channel_config == 0) {
208                     x[2]=1;
209                     x[3]=2;
210                 }
211                 break;
212             case 5:
213                 x[0]=1; x[1] = 2;
214                 if (c->channel_config <= 2) {
215                     x[3]=1;
216                     x[4]=2;
217                 }
218                 break;
219             case 6:
220             case 7:
221                 x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2;
222                 break;
223             case 8:
224                 x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2; x[6] = 1; x[7] = 2;
225                 break;
226             }
227         }
228     }
229 
230     if (c->total_band_count < c->base_band_count)
231         return AVERROR_INVALIDDATA;
232 
233     c->hfr_group_count = ceil2(c->total_band_count - (c->base_band_count + c->stereo_band_count),
234                                c->bands_per_hfr_group);
235 
236     if (c->base_band_count + c->stereo_band_count + (unsigned long)c->hfr_group_count > 128ULL)
237         return AVERROR_INVALIDDATA;
238 
239     for (int i = 0; i < avctx->ch_layout.nb_channels; i++) {
240         c->ch[i].chan_type = r[i];
241         c->ch[i].count     = c->base_band_count + ((r[i] != 2) ? c->stereo_band_count : 0);
242         c->ch[i].hfr_scale = &c->ch[i].scale_factors[c->base_band_count + c->stereo_band_count];
243         if (c->ch[i].count > 128)
244             return AVERROR_INVALIDDATA;
245     }
246 
247     c->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
248     if (!c->fdsp)
249         return AVERROR(ENOMEM);
250 
251     return av_tx_init(&c->tx_ctx, &c->tx_fn, AV_TX_FLOAT_MDCT, 1, 128, &scale, 0);
252 }
253 
run_imdct(HCAContext *c, ChannelContext *ch, int index, float *out)254 static void run_imdct(HCAContext *c, ChannelContext *ch, int index, float *out)
255 {
256     c->tx_fn(c->tx_ctx, ch->imdct_out, ch->imdct_in, sizeof(float));
257 
258     c->fdsp->vector_fmul_window(out, ch->imdct_prev + (128 >> 1),
259                                 ch->imdct_out, window, 128 >> 1);
260 
261     memcpy(ch->imdct_prev, ch->imdct_out, 128 * sizeof(float));
262 }
263 
apply_intensity_stereo(HCAContext *s, ChannelContext *ch1, ChannelContext *ch2, int index, unsigned band_count, unsigned base_band_count, unsigned stereo_band_count)264 static void apply_intensity_stereo(HCAContext *s, ChannelContext *ch1, ChannelContext *ch2,
265                                    int index, unsigned band_count, unsigned base_band_count,
266                                    unsigned stereo_band_count)
267 {
268     float ratio_l = intensity_ratio_table[ch2->intensity[index]];
269     float ratio_r = ratio_l - 2.0f;
270     float *c1 = &ch1->imdct_in[base_band_count];
271     float *c2 = &ch2->imdct_in[base_band_count];
272 
273     if (ch1->chan_type != 1 || !stereo_band_count)
274         return;
275 
276     for (int i = 0; i < band_count; i++) {
277         *(c2++)  = *c1 * ratio_r;
278         *(c1++) *= ratio_l;
279     }
280 }
281 
reconstruct_hfr(HCAContext *s, ChannelContext *ch, unsigned hfr_group_count, unsigned bands_per_hfr_group, unsigned start_band, unsigned total_band_count)282 static void reconstruct_hfr(HCAContext *s, ChannelContext *ch,
283                             unsigned hfr_group_count,
284                             unsigned bands_per_hfr_group,
285                             unsigned start_band, unsigned total_band_count)
286 {
287     if (ch->chan_type == 2 || !bands_per_hfr_group)
288         return;
289 
290     for (int i = 0, k = start_band, l = start_band - 1; i < hfr_group_count; i++){
291         for (int j = 0; j < bands_per_hfr_group && k < total_band_count && l >= 0; j++, k++, l--){
292             ch->imdct_in[k] = scale_conversion_table[ scale_conv_bias +
293                 av_clip_intp2(ch->hfr_scale[i] - ch->scale_factors[l], 6) ] * ch->imdct_in[l];
294         }
295     }
296 
297     ch->imdct_in[127] = 0;
298 }
299 
dequantize_coefficients(HCAContext *c, ChannelContext *ch, GetBitContext *gb)300 static void dequantize_coefficients(HCAContext *c, ChannelContext *ch,
301                                     GetBitContext *gb)
302 {
303     for (int i = 0; i < ch->count; i++) {
304         unsigned scale = ch->scale[i];
305         int nb_bits = max_bits_table[scale];
306         int value = get_bitsz(gb, nb_bits);
307         float factor;
308 
309         if (scale > 7) {
310             value = (1 - ((value & 1) << 1)) * (value >> 1);
311             if (!value)
312                 skip_bits_long(gb, -1);
313             factor = value;
314         } else {
315             value += scale << 4;
316             skip_bits_long(gb, quant_spectrum_bits[value] - nb_bits);
317             factor = quant_spectrum_value[value];
318         }
319         ch->imdct_in[i] = factor * ch->base[i];
320     }
321 
322     memset(ch->imdct_in + ch->count, 0,  sizeof(ch->imdct_in) - ch->count * sizeof(ch->imdct_in[0]));
323 }
324 
unpack(HCAContext *c, ChannelContext *ch, GetBitContext *gb, unsigned hfr_group_count, int packed_noise_level, const uint8_t *ath)325 static void unpack(HCAContext *c, ChannelContext *ch,
326                    GetBitContext *gb,
327                    unsigned hfr_group_count,
328                    int packed_noise_level,
329                    const uint8_t *ath)
330 {
331     int delta_bits = get_bits(gb, 3);
332 
333     if (delta_bits > 5) {
334         for (int i = 0; i < ch->count; i++)
335             ch->scale_factors[i] = get_bits(gb, 6);
336     } else if (delta_bits) {
337         int factor = get_bits(gb, 6);
338         int max_value = (1 << delta_bits) - 1;
339         int half_max = max_value >> 1;
340 
341         ch->scale_factors[0] = factor;
342         for (int i = 1; i < ch->count; i++){
343             int delta = get_bits(gb, delta_bits);
344 
345             if (delta == max_value) {
346                 factor = get_bits(gb, 6);
347             } else {
348                 factor += delta - half_max;
349             }
350             factor = av_clip_uintp2(factor, 6);
351 
352             ch->scale_factors[i] = factor;
353         }
354     } else {
355         memset(ch->scale_factors, 0, 128);
356     }
357 
358     if (ch->chan_type == 2){
359         ch->intensity[0] = get_bits(gb, 4);
360         if (ch->intensity[0] < 15) {
361             for (int i = 1; i < 8; i++)
362                 ch->intensity[i] = get_bits(gb, 4);
363         }
364     } else {
365         for (int i = 0; i < hfr_group_count; i++)
366             ch->hfr_scale[i] = get_bits(gb, 6);
367     }
368 
369     for (int i = 0; i < ch->count; i++) {
370         int scale = ch->scale_factors[i];
371 
372         if (scale) {
373             scale = c->ath[i] + ((packed_noise_level + i) >> 8) - ((scale * 5) >> 1) + 2;
374             scale = scale_table[av_clip(scale, 0, 58)];
375         }
376         ch->scale[i] = scale;
377     }
378 
379     memset(ch->scale + ch->count, 0, sizeof(ch->scale) - ch->count);
380 
381     for (int i = 0; i < ch->count; i++)
382         ch->base[i] = dequantizer_scaling_table[ch->scale_factors[i]] * quant_step_size[ch->scale[i]];
383 }
384 
decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)385 static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
386                         int *got_frame_ptr, AVPacket *avpkt)
387 {
388     HCAContext *c = avctx->priv_data;
389     int ch, ret, packed_noise_level;
390     GetBitContext gb0, *const gb = &gb0;
391     float **samples;
392 
393     if (avctx->err_recognition & AV_EF_CRCCHECK) {
394         if (av_crc(c->crc_table, 0, avpkt->data, avpkt->size))
395             return AVERROR_INVALIDDATA;
396     }
397 
398     if ((ret = init_get_bits8(gb, avpkt->data, avpkt->size)) < 0)
399         return ret;
400 
401     if (get_bits(gb, 16) != 0xFFFF)
402         return AVERROR_INVALIDDATA;
403 
404     frame->nb_samples = 1024;
405     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
406         return ret;
407     samples = (float **)frame->extended_data;
408 
409     packed_noise_level = (get_bits(gb, 9) << 8) - get_bits(gb, 7);
410 
411     for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
412         unpack(c, &c->ch[ch], gb, c->hfr_group_count, packed_noise_level, c->ath);
413 
414     for (int i = 0; i < 8; i++) {
415         for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
416             dequantize_coefficients(c, &c->ch[ch], gb);
417         for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
418             reconstruct_hfr(c, &c->ch[ch], c->hfr_group_count, c->bands_per_hfr_group,
419                             c->stereo_band_count + c->base_band_count, c->total_band_count);
420         for (ch = 0; ch < avctx->ch_layout.nb_channels - 1; ch++)
421             apply_intensity_stereo(c, &c->ch[ch], &c->ch[ch+1], i,
422                                    c->total_band_count - c->base_band_count,
423                                    c->base_band_count, c->stereo_band_count);
424         for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++)
425             run_imdct(c, &c->ch[ch], i, samples[ch] + i * 128);
426     }
427 
428     *got_frame_ptr = 1;
429 
430     return avpkt->size;
431 }
432 
decode_close(AVCodecContext *avctx)433 static av_cold int decode_close(AVCodecContext *avctx)
434 {
435     HCAContext *c = avctx->priv_data;
436 
437     av_freep(&c->fdsp);
438     av_tx_uninit(&c->tx_ctx);
439 
440     return 0;
441 }
442 
443 const FFCodec ff_hca_decoder = {
444     .p.name         = "hca",
445     .p.long_name    = NULL_IF_CONFIG_SMALL("CRI HCA"),
446     .p.type         = AVMEDIA_TYPE_AUDIO,
447     .p.id           = AV_CODEC_ID_HCA,
448     .priv_data_size = sizeof(HCAContext),
449     .init           = decode_init,
450     FF_CODEC_DECODE_CB(decode_frame),
451     .close          = decode_close,
452     .p.capabilities = AV_CODEC_CAP_DR1,
453     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
454     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
455                                                       AV_SAMPLE_FMT_NONE },
456 };
457