1/*
2 * Apple HTTP Live Streaming Sample Encryption/Decryption
3 *
4 * Copyright (c) 2021 Nachiket Tarate
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
24 * @file
25 * Apple HTTP Live Streaming Sample Encryption
26 * https://developer.apple.com/library/ios/documentation/AudioVideo/Conceptual/HLS_Sample_Encryption
27 */
28
29#include "libavutil/aes.h"
30#include "libavutil/channel_layout.h"
31
32#include "hls_sample_encryption.h"
33
34#include "libavcodec/adts_header.h"
35#include "libavcodec/adts_parser.h"
36#include "libavcodec/ac3tab.h"
37#include "libavcodec/ac3_parser_internal.h"
38
39
40typedef struct NALUnit {
41    uint8_t     *data;
42    int         type;
43    int         length;
44    int         start_code_length;
45} NALUnit;
46
47typedef struct AudioFrame {
48    uint8_t     *data;
49    int         length;
50    int         header_length;
51} AudioFrame;
52
53typedef struct CodecParserContext {
54    const uint8_t   *buf_ptr;
55    const uint8_t   *buf_end;
56} CodecParserContext;
57
58static const int eac3_sample_rate_tab[] = { 48000, 44100, 32000, 0 };
59
60void ff_hls_senc_read_audio_setup_info(HLSAudioSetupInfo *info, const uint8_t *buf, size_t size)
61{
62    if (size < 8)
63        return;
64
65    info->codec_tag = AV_RL32(buf);
66
67    if (info->codec_tag == MKTAG('z','a','a','c'))
68        info->codec_id = AV_CODEC_ID_AAC;
69    else if (info->codec_tag == MKTAG('z','a','c','3'))
70        info->codec_id = AV_CODEC_ID_AC3;
71    else if (info->codec_tag == MKTAG('z','e','c','3'))
72        info->codec_id = AV_CODEC_ID_EAC3;
73    else
74        info->codec_id = AV_CODEC_ID_NONE;
75
76    buf += 4;
77    info->priming               = AV_RL16(buf);
78    buf += 2;
79    info->version               = *buf++;
80    info->setup_data_length     = *buf++;
81
82    if (info->setup_data_length > size - 8)
83        info->setup_data_length = size - 8;
84
85    if (info->setup_data_length > HLS_MAX_AUDIO_SETUP_DATA_LEN)
86        return;
87
88    memcpy(info->setup_data, buf, info->setup_data_length);
89}
90
91int ff_hls_senc_parse_audio_setup_info(AVStream *st, HLSAudioSetupInfo *info)
92{
93    int ret = 0;
94
95    st->codecpar->codec_tag = info->codec_tag;
96
97    if (st->codecpar->codec_id == AV_CODEC_ID_AAC)
98        return 0;
99
100    if (st->codecpar->codec_id != AV_CODEC_ID_AC3 && st->codecpar->codec_id != AV_CODEC_ID_EAC3)
101        return AVERROR_INVALIDDATA;
102
103    if (st->codecpar->codec_id == AV_CODEC_ID_AC3) {
104        AC3HeaderInfo *ac3hdr = NULL;
105
106        ret = avpriv_ac3_parse_header(&ac3hdr, info->setup_data, info->setup_data_length);
107        if (ret < 0) {
108            if (ret != AVERROR(ENOMEM))
109                av_free(ac3hdr);
110            return ret;
111        }
112
113        st->codecpar->sample_rate       = ac3hdr->sample_rate;
114        av_channel_layout_uninit(&st->codecpar->ch_layout);
115        av_channel_layout_from_mask(&st->codecpar->ch_layout, ac3hdr->channel_layout);
116        st->codecpar->bit_rate          = ac3hdr->bit_rate;
117
118        av_free(ac3hdr);
119    } else {  /*  Parse 'dec3' EC3SpecificBox */
120        GetBitContext gb;
121        uint64_t mask;
122        int data_rate, fscod, acmod, lfeon;
123
124        ret = init_get_bits8(&gb, info->setup_data, info->setup_data_length);
125        if (ret < 0)
126            return AVERROR_INVALIDDATA;
127
128        data_rate = get_bits(&gb, 13);
129        skip_bits(&gb, 3);
130        fscod = get_bits(&gb, 2);
131        skip_bits(&gb, 10);
132        acmod = get_bits(&gb, 3);
133        lfeon = get_bits(&gb, 1);
134
135        st->codecpar->sample_rate = eac3_sample_rate_tab[fscod];
136
137        mask = ff_ac3_channel_layout_tab[acmod];
138        if (lfeon)
139            mask |= AV_CH_LOW_FREQUENCY;
140
141        av_channel_layout_uninit(&st->codecpar->ch_layout);
142        av_channel_layout_from_mask(&st->codecpar->ch_layout, mask);
143
144        st->codecpar->bit_rate = data_rate*1000;
145    }
146
147    return 0;
148}
149
150/*
151 * Remove start code emulation prevention 0x03 bytes
152 */
153static void remove_scep_3_bytes(NALUnit *nalu)
154{
155    int i = 0;
156    int j = 0;
157
158    uint8_t *data = nalu->data;
159
160    while (i < nalu->length) {
161        if (nalu->length - i > 3 && AV_RB24(&data[i]) == 0x000003) {
162            data[j++] = data[i++];
163            data[j++] = data[i++];
164            i++;
165        } else {
166            data[j++] = data[i++];
167        }
168    }
169
170    nalu->length = j;
171}
172
173static int get_next_nal_unit(CodecParserContext *ctx, NALUnit *nalu)
174{
175    const uint8_t *nalu_start = ctx->buf_ptr;
176
177    if (ctx->buf_end - ctx->buf_ptr >= 4 && AV_RB32(ctx->buf_ptr) == 0x00000001)
178        nalu->start_code_length = 4;
179    else if (ctx->buf_end - ctx->buf_ptr >= 3 && AV_RB24(ctx->buf_ptr) == 0x000001)
180        nalu->start_code_length = 3;
181    else /* No start code at the beginning of the NAL unit */
182        return -1;
183
184    ctx->buf_ptr += nalu->start_code_length;
185
186    while (ctx->buf_ptr < ctx->buf_end) {
187        if (ctx->buf_end - ctx->buf_ptr >= 4 && AV_RB32(ctx->buf_ptr) == 0x00000001)
188            break;
189        else if (ctx->buf_end - ctx->buf_ptr >= 3 && AV_RB24(ctx->buf_ptr) == 0x000001)
190            break;
191        ctx->buf_ptr++;
192    }
193
194    nalu->data   = (uint8_t *)nalu_start + nalu->start_code_length;
195    nalu->length = ctx->buf_ptr - nalu->data;
196    nalu->type   = *nalu->data & 0x1F;
197
198    return 0;
199}
200
201static int decrypt_nal_unit(HLSCryptoContext *crypto_ctx, NALUnit *nalu)
202{
203    int ret = 0;
204    int rem_bytes;
205    uint8_t *data;
206    uint8_t iv[16];
207
208    ret = av_aes_init(crypto_ctx->aes_ctx, crypto_ctx->key, 16 * 8, 1);
209    if (ret < 0)
210        return ret;
211
212    /* Remove start code emulation prevention 0x03 bytes */
213    remove_scep_3_bytes(nalu);
214
215    data = nalu->data + 32;
216    rem_bytes = nalu->length - 32;
217
218    memcpy(iv, crypto_ctx->iv, 16);
219
220    while (rem_bytes > 0) {
221        if (rem_bytes > 16) {
222            av_aes_crypt(crypto_ctx->aes_ctx, data, data, 1, iv, 1);
223            data += 16;
224            rem_bytes -= 16;
225        }
226        data += FFMIN(144, rem_bytes);
227        rem_bytes -= FFMIN(144, rem_bytes);
228    }
229
230    return 0;
231}
232
233static int decrypt_video_frame(HLSCryptoContext *crypto_ctx, AVPacket *pkt)
234{
235    int ret = 0;
236    CodecParserContext  ctx;
237    NALUnit nalu;
238    uint8_t *data_ptr;
239    int move_nalu = 0;
240
241    memset(&ctx, 0, sizeof(ctx));
242    ctx.buf_ptr  = pkt->data;
243    ctx.buf_end = pkt->data + pkt->size;
244
245    data_ptr = pkt->data;
246
247    while (ctx.buf_ptr < ctx.buf_end) {
248        memset(&nalu, 0, sizeof(nalu));
249        ret = get_next_nal_unit(&ctx, &nalu);
250        if (ret < 0)
251            return ret;
252        if ((nalu.type == 0x01 || nalu.type == 0x05) && nalu.length > 48) {
253            int encrypted_nalu_length = nalu.length;
254            ret = decrypt_nal_unit(crypto_ctx, &nalu);
255            if (ret < 0)
256                return ret;
257            move_nalu = nalu.length != encrypted_nalu_length;
258        }
259        if (move_nalu)
260            memmove(data_ptr, nalu.data - nalu.start_code_length, nalu.start_code_length + nalu.length);
261        data_ptr += nalu.start_code_length + nalu.length;
262    }
263
264    av_shrink_packet(pkt, data_ptr - pkt->data);
265
266    return 0;
267}
268
269static int get_next_adts_frame(CodecParserContext *ctx, AudioFrame *frame)
270{
271    int ret = 0;
272
273    AACADTSHeaderInfo *adts_hdr = NULL;
274
275    /* Find next sync word 0xFFF */
276    while (ctx->buf_ptr < ctx->buf_end - 1) {
277        if (*ctx->buf_ptr == 0xFF && (*(ctx->buf_ptr + 1) & 0xF0) == 0xF0)
278            break;
279        ctx->buf_ptr++;
280    }
281
282    if (ctx->buf_ptr >= ctx->buf_end - 1)
283        return -1;
284
285    frame->data = (uint8_t*)ctx->buf_ptr;
286
287    ret = avpriv_adts_header_parse (&adts_hdr, frame->data, ctx->buf_end - frame->data);
288    if (ret < 0)
289        return ret;
290
291    frame->header_length = adts_hdr->crc_absent ? AV_AAC_ADTS_HEADER_SIZE : AV_AAC_ADTS_HEADER_SIZE + 2;
292    frame->length = adts_hdr->frame_length;
293
294    av_free(adts_hdr);
295
296    return 0;
297}
298
299static int get_next_ac3_eac3_sync_frame(CodecParserContext *ctx, AudioFrame *frame)
300{
301    int ret = 0;
302
303    AC3HeaderInfo *hdr = NULL;
304
305    /* Find next sync word 0x0B77 */
306    while (ctx->buf_ptr < ctx->buf_end - 1) {
307        if (*ctx->buf_ptr == 0x0B && *(ctx->buf_ptr + 1) == 0x77)
308            break;
309        ctx->buf_ptr++;
310    }
311
312    if (ctx->buf_ptr >= ctx->buf_end - 1)
313        return -1;
314
315    frame->data = (uint8_t*)ctx->buf_ptr;
316    frame->header_length = 0;
317
318    ret = avpriv_ac3_parse_header(&hdr, frame->data, ctx->buf_end - frame->data);
319    if (ret < 0) {
320        if (ret != AVERROR(ENOMEM))
321            av_free(hdr);
322        return ret;
323    }
324
325    frame->length = hdr->frame_size;
326
327    av_free(hdr);
328
329    return 0;
330}
331
332static int get_next_sync_frame(enum AVCodecID codec_id, CodecParserContext *ctx, AudioFrame *frame)
333{
334    if (codec_id == AV_CODEC_ID_AAC)
335        return get_next_adts_frame(ctx, frame);
336    else if (codec_id == AV_CODEC_ID_AC3 || codec_id == AV_CODEC_ID_EAC3)
337        return get_next_ac3_eac3_sync_frame(ctx, frame);
338    else
339        return AVERROR_INVALIDDATA;
340}
341
342static int decrypt_sync_frame(enum AVCodecID codec_id, HLSCryptoContext *crypto_ctx, AudioFrame *frame)
343{
344    int ret = 0;
345    uint8_t *data;
346    int num_of_encrypted_blocks;
347
348    ret = av_aes_init(crypto_ctx->aes_ctx, crypto_ctx->key, 16 * 8, 1);
349    if (ret < 0)
350        return ret;
351
352    data = frame->data + frame->header_length + 16;
353
354    num_of_encrypted_blocks = (frame->length - frame->header_length - 16)/16;
355
356    av_aes_crypt(crypto_ctx->aes_ctx, data, data, num_of_encrypted_blocks, crypto_ctx->iv, 1);
357
358    return 0;
359}
360
361static int decrypt_audio_frame(enum AVCodecID codec_id, HLSCryptoContext *crypto_ctx, AVPacket *pkt)
362{
363    int ret = 0;
364    CodecParserContext  ctx;
365    AudioFrame frame;
366
367    memset(&ctx, 0, sizeof(ctx));
368    ctx.buf_ptr = pkt->data;
369    ctx.buf_end = pkt->data + pkt->size;
370
371    while (ctx.buf_ptr < ctx.buf_end) {
372        memset(&frame, 0, sizeof(frame));
373        ret = get_next_sync_frame(codec_id, &ctx, &frame);
374        if (ret < 0)
375            return ret;
376        if (frame.length - frame.header_length > 31) {
377            ret = decrypt_sync_frame(codec_id, crypto_ctx, &frame);
378            if (ret < 0)
379                return ret;
380        }
381        ctx.buf_ptr += frame.length;
382    }
383
384    return 0;
385}
386
387int ff_hls_senc_decrypt_frame(enum AVCodecID codec_id, HLSCryptoContext *crypto_ctx, AVPacket *pkt)
388{
389    if (codec_id == AV_CODEC_ID_H264)
390        return decrypt_video_frame(crypto_ctx, pkt);
391    else if (codec_id == AV_CODEC_ID_AAC || codec_id == AV_CODEC_ID_AC3 || codec_id == AV_CODEC_ID_EAC3)
392        return decrypt_audio_frame(codec_id, crypto_ctx, pkt);
393
394    return AVERROR_INVALIDDATA;
395}
396