1 /*
2  * Copyright (c) 2023-2024 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "audio_aac_codec.h"
17 #include <cstdint>
18 #include <libswresample/swresample.h>
19 #include <memory>
20 #include <securec.h>
21 #include "common_macro.h"
22 #include "const_def.h"
23 #include "sharing_log.h"
24 
25 namespace OHOS {
26 namespace Sharing {
27 constexpr uint32_t ADTS_HEADER_SIZE = 7;
28 constexpr uint32_t ADTS_HEADER_BEGIN = 0xFF;
29 constexpr uint32_t ADTS_HEADER_END = 0xFC;
30 constexpr uint32_t ADTS_HEADER_MPEG4_AACLC = 0xF1;
31 constexpr uint32_t ADTS_HEADER_PROFILE_SHIFT = 6;
32 constexpr uint32_t ADTS_HEADER_SAMPLE_MASK = 0x0F;
33 constexpr uint32_t ADTS_HEADER_SAMPLE_SHIFT = 2;
34 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT = 2;
35 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK = 0x01;
36 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT1 = 6;
37 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK1 = 0x03;
38 constexpr uint32_t ADTS_HEADER_DATA_SZIE_OFFSET = 7;
39 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT = 11;
40 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT1 = 3;
41 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT2 = 5;
42 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK = 0xFF;
43 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK1 = 0x1F;
44 constexpr uint32_t ADTS_HEADER_INDEX_2 = 2;
45 constexpr uint32_t ADTS_HEADER_INDEX_3 = 3;
46 constexpr uint32_t ADTS_HEADER_INDEX_4 = 4;
47 constexpr uint32_t ADTS_HEADER_INDEX_5 = 5;
48 constexpr uint32_t ADTS_HEADER_INDEX_6 = 6;
49 
50 static std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
51 static uint64_t duration = 0;
52 
AudioAACDecoder()53 AudioAACDecoder::AudioAACDecoder()
54 {
55     SHARING_LOGD("trace.");
56 }
57 
~AudioAACDecoder()58 AudioAACDecoder::~AudioAACDecoder()
59 {
60     SHARING_LOGD("trace.");
61     if (avFrame_) {
62         av_frame_free(&avFrame_);
63     }
64 
65     if (avPacket_) {
66         av_packet_free(&avPacket_);
67     }
68 
69     if (swrContext_) {
70         swr_free(&swrContext_);
71     }
72 
73     if (swrOutBuffer_) {
74         av_freep(&swrOutBuffer_);
75     }
76 }
77 
Init()78 int32_t AudioAACDecoder::Init()
79 {
80     SHARING_LOGD("trace.");
81     const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_AAC);
82     if (!dec) {
83         SHARING_LOGE("Failed to find codec.");
84         return -1;
85     }
86 
87     codecCtx_ = avcodec_alloc_context3(dec);
88     if (!codecCtx_) {
89         SHARING_LOGE("Failed to allocate the codec context.");
90         return -1;
91     }
92 
93     if (avcodec_open2(codecCtx_, dec, nullptr) < 0) {
94         SHARING_LOGE("Failed to open codec.");
95         return -1;
96     }
97 
98     avPacket_ = av_packet_alloc();
99     if (avPacket_ == nullptr) {
100         SHARING_LOGE("Failed to alloc packet.");
101         return -1;
102     }
103 
104     avFrame_ = av_frame_alloc();
105     if (avFrame_ == nullptr) {
106         SHARING_LOGE("Failed to alloc frame.");
107         return -1;
108     }
109     return 0;
110 }
111 
OnFrame(const Frame::Ptr &frame)112 void AudioAACDecoder::OnFrame(const Frame::Ptr &frame)
113 {
114     if (frame == nullptr) {
115         SHARING_LOGE("frame is nullptr!");
116         return;
117     }
118 
119     if (avPacket_ == nullptr || avFrame_ == nullptr) {
120         return;
121     }
122 
123     av_packet_unref(avPacket_);
124     av_frame_unref(avFrame_);
125 
126     avPacket_->data = frame->Data();
127     avPacket_->size = frame->Size();
128 
129     avcodec_send_packet(codecCtx_, avPacket_);
130     avcodec_receive_frame(codecCtx_, avFrame_);
131 
132     if (swrContext_ == nullptr) {
133         swrContext_ = swr_alloc_set_opts(nullptr, (int64_t)avFrame_->channel_layout, // out_ch_layout
134                                          AV_SAMPLE_FMT_S16,                          // out_sample_fmt
135                                          avFrame_->sample_rate,                      // out_sample_rate
136                                          (int64_t)avFrame_->channel_layout,          // in_ch_layout
137                                          (AVSampleFormat)avFrame_->format,           // AV_SAMPLE_FMT_FLTP
138                                          avFrame_->sample_rate,                      // out_sample_rate
139                                          0, nullptr);
140         if (swrContext_ == nullptr) {
141             SHARING_LOGE("swrContext_ alloc failed!");
142             return;
143         }
144 
145         swr_init(swrContext_);
146 
147         swrOutBufferSize_ =
148             av_samples_get_buffer_size(nullptr, avFrame_->channels, avFrame_->nb_samples, AV_SAMPLE_FMT_S16, 0);
149         swrOutBuffer_ = (uint8_t *)av_malloc(swrOutBufferSize_);
150         if (swrOutBuffer_ == nullptr) {
151             SHARING_LOGE("swrOutBuffer_ av_malloc failed!");
152             return;
153         }
154     }
155 
156     int nbSamples = swr_convert(swrContext_, &swrOutBuffer_, avFrame_->nb_samples, (const uint8_t **)avFrame_->data,
157                                 avFrame_->nb_samples);
158     if (nbSamples != avFrame_->nb_samples) {
159         SHARING_LOGE("swr_convert failed!");
160         return;
161     }
162 
163     auto pcmFrame = FrameImpl::Create();
164     pcmFrame->codecId_ = CODEC_PCM;
165     pcmFrame->Assign((char *)swrOutBuffer_, swrOutBufferSize_);
166     DeliverFrame(pcmFrame);
167 }
168 
AudioAACEncoder()169 AudioAACEncoder::AudioAACEncoder()
170 {
171     SHARING_LOGD("trace.");
172 }
173 
~AudioAACEncoder()174 AudioAACEncoder::~AudioAACEncoder()
175 {
176     SHARING_LOGD("trace.");
177     if (encFrame_) {
178         av_frame_free(&encFrame_);
179     }
180 
181     if (encPacket_) {
182         av_packet_free(&encPacket_);
183     }
184 
185     if (swr_) {
186         swr_free(&swr_);
187     }
188 
189     if (swrData_) {
190         av_freep(&swrData_);
191     }
192 }
193 
InitSwr()194 int AudioAACEncoder::InitSwr()
195 {
196     int64_t in_ch_layout = AV_CH_LAYOUT_STEREO;
197     if (inChannels_ == 1) {
198         in_ch_layout = AV_CH_LAYOUT_MONO;
199     }
200     AVSampleFormat in_sample_fmt = AV_SAMPLE_FMT_S16;
201     if (inSampleBit_ == AUDIO_SAMPLE_BIT_U8) {
202         in_sample_fmt = AV_SAMPLE_FMT_U8;
203     }
204     int in_sample_rate = inSampleRate_;
205     swr_ = swr_alloc_set_opts(NULL, enc_->channel_layout, enc_->sample_fmt, enc_->sample_rate, in_ch_layout,
206                               in_sample_fmt, in_sample_rate, 0, NULL);
207     if (!swr_) {
208         SHARING_LOGE("alloc swr failed.");
209     }
210 
211     int error;
212     char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
213     if ((error = swr_init(swr_)) < 0) {
214         SHARING_LOGE("open swr(%{public}d:%{public}s)", error,
215                      av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
216     }
217 
218     if (!(swrData_ = (uint8_t **)calloc(enc_->channels, sizeof(*swrData_)))) {
219         SHARING_LOGE("alloc swr buffer failed!");
220     }
221 
222     if ((error = av_samples_alloc(swrData_, NULL, enc_->channels, enc_->frame_size, enc_->sample_fmt, 0)) < 0) {
223         SHARING_LOGE("alloc swr buffer(%{public}d:%{public}s)\n", error,
224                      av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
225     }
226 
227     return 0;
228 }
229 
InitEncoderCtx(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)230 void AudioAACEncoder::InitEncoderCtx(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
231 {
232     enc_->sample_rate = sampleRate; // dst_samplerate;
233     enc_->channels = channels;      // dst_channels;
234     enc_->channel_layout = av_get_default_channel_layout(channels);
235     enc_->bit_rate = AUDIO_BIT_RATE_12800;
236     enc_->time_base.num = 1;
237     enc_->time_base.den = sampleRate;
238     enc_->compression_level = 1;
239     enc_->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
240 }
241 
InitEncPacket()242 void AudioAACEncoder::InitEncPacket()
243 {
244     av_init_packet(encPacket_);
245     encPacket_->data = NULL;
246     encPacket_->size = 0;
247 }
248 
Init(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)249 int32_t AudioAACEncoder::Init(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
250 {
251     SHARING_LOGD("trace.");
252     inChannels_ = channels;
253     inSampleBit_ = sampleBit;
254     inSampleRate_ = sampleRate;
255     const AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
256     if (!codec) {
257         SHARING_LOGE("Codec not found failed!");
258         return 1;
259     }
260 
261     enc_ = avcodec_alloc_context3(codec);
262     if (!enc_) {
263         SHARING_LOGE("Could not allocate audio codec context ");
264         return 1;
265     }
266     enc_->sample_fmt = codec->sample_fmts[0]; // only supports AV_SAMPLE_FMT_FLTP
267     InitEncoderCtx(channels, sampleBit, sampleRate);
268 
269     if (avcodec_open2(enc_, codec, NULL) < 0) {
270         SHARING_LOGE("Could not open codec");
271     }
272 
273     encFrame_ = av_frame_alloc();
274     if (!encFrame_) {
275         SHARING_LOGE("Could not allocate audio encode in frame");
276         return 1;
277     }
278     encFrame_->format = enc_->sample_fmt;
279     encFrame_->nb_samples = enc_->frame_size;
280     encFrame_->channel_layout = enc_->channel_layout;
281 
282     if (av_frame_get_buffer(encFrame_, 0) < 0) {
283         SHARING_LOGE("Could not get audio frame buffer");
284         return 1;
285     }
286     encPacket_ = av_packet_alloc();
287     if (!encPacket_) {
288         SHARING_LOGE("Could not allocate audio encode out packet");
289         return 1;
290     }
291     if (!(fifo_ = av_audio_fifo_alloc(enc_->sample_fmt, enc_->channels, enc_->frame_size))) {
292         SHARING_LOGE("Could not allocate FIFO");
293         return 1;
294     }
295     auto bufferSize = av_samples_get_buffer_size(nullptr, encFrame_->channels, encFrame_->nb_samples,
296                                                  AVSampleFormat(encFrame_->format), 0);
297     outBuffer_ = (uint8_t *)av_malloc(bufferSize);
298     if (outBuffer_ == nullptr) {
299         SHARING_LOGE("outBuffer_ av_malloc failed!");
300         return 1;
301     }
302 
303     return 0;
304 }
305 
AddSamplesToFifo(uint8_t **samples, int frame_size)306 int AudioAACEncoder::AddSamplesToFifo(uint8_t **samples, int frame_size)
307 {
308     char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
309     int error;
310 
311     if ((error = av_audio_fifo_realloc(fifo_, av_audio_fifo_size(fifo_) + frame_size)) < 0) {
312         SHARING_LOGE("Could not reallocate FIFO(%{public}d:%{public}s)", error,
313                      av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
314     }
315 
316     if ((error = av_audio_fifo_write(fifo_, (void **)samples, frame_size)) < frame_size) {
317         SHARING_LOGE("Could not write data to FIFO(%{public}d:%{public}s)", error,
318                      av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
319     }
320 
321     return 0;
322 }
323 
AddAdtsHeader(uint8_t *data, int dataSize)324 void AddAdtsHeader(uint8_t *data, int dataSize)
325 {
326     // ADTS header format (7 or 9 bytes):
327     // 12 bits syncword (0xFFF)
328     // 1 bit MPEG version (0 for MPEG-4, 1 for MPEG-2)
329     // 2 bits layer (always 0 for MPEG-4)
330     // 1 bit protection absent
331     // 2 bits profile (audio object type)
332     // 4 bits sampling frequency index
333     // 1 bit private bit
334     // 3 bits channel configuration
335     // 1 bit original/copy
336     // 1 bit home
337     // variable bits variable header length
338     // 16 bits frame length
339     // 16 bits buffer fullness
340     // 1 bit number of raw data blocks in frame (set to 0)
341 
342     uint8_t adtsHeader[ADTS_HEADER_SIZE];
343     int profile = 2;                // 2: AAC LC
344     int samplingFrequencyIndex = 3; // 3: 48Khz, 4: 44.1kHz
345     int channelConfiguration = 2;   // 2: Stereo
346 
347     adtsHeader[0] = ADTS_HEADER_BEGIN;
348     adtsHeader[1] = ADTS_HEADER_MPEG4_AACLC;
349     adtsHeader[ADTS_HEADER_INDEX_2] =
350         ((profile - 1) << ADTS_HEADER_PROFILE_SHIFT) |
351         ((samplingFrequencyIndex & ADTS_HEADER_SAMPLE_MASK) << ADTS_HEADER_SAMPLE_SHIFT) |
352         ((channelConfiguration >> ADTS_HEADER_CHANNEL_SHIFT) & ADTS_HEADER_CHANNEL_MASK);
353     adtsHeader[ADTS_HEADER_INDEX_3] =
354         ((channelConfiguration & ADTS_HEADER_CHANNEL_MASK1) << ADTS_HEADER_CHANNEL_SHIFT1) |
355         ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT);
356     adtsHeader[ADTS_HEADER_INDEX_4] =
357         ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT1) & ADTS_HEADER_DATA_SZIE_MASK;
358     adtsHeader[ADTS_HEADER_INDEX_5] =
359         ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) << ADTS_HEADER_DATA_SZIE_SHIFT2) | ADTS_HEADER_DATA_SZIE_MASK1;
360     adtsHeader[ADTS_HEADER_INDEX_6] = ADTS_HEADER_END;
361 
362     if (memcpy_s(data, sizeof(adtsHeader), adtsHeader, sizeof(adtsHeader)) != EOK) {
363         SHARING_LOGE("copy adtsHeader failed!");
364     }
365 }
366 
DoSwr(const Frame::Ptr &frame)367 void AudioAACEncoder::DoSwr(const Frame::Ptr &frame)
368 {
369     int err = 0;
370     int error = 0;
371     int in_samples = frame->Size();
372     uint8_t *in_sample[1];
373     in_sample[0] = frame->Data();
374     char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
375 
376     do {
377         int sample_size = inChannels_ * inSampleBit_ / 8;
378         in_samples = in_samples / sample_size;
379 
380         int frame_size = swr_convert(swr_, swrData_, enc_->frame_size, (const uint8_t **)in_sample, in_samples);
381         if ((error = frame_size) < 0) {
382             SHARING_LOGE("Could not convert input samples(%{public}d:%{public}s)", error,
383                          av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
384         }
385 
386         in_sample[0] = NULL;
387         in_samples = 0;
388         if ((err = AddSamplesToFifo(swrData_, frame_size)) != 0) {
389             SHARING_LOGE("write samples failed");
390         }
391     } while (swr_get_out_samples(swr_, in_samples) >= enc_->frame_size);
392 }
393 
OnFrame(const Frame::Ptr &frame)394 void AudioAACEncoder::OnFrame(const Frame::Ptr &frame)
395 {
396     RETURN_IF_NULL(frame);
397     if (duration == 0) {
398         std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
399         duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - start).count();
400     }
401 
402     int error = 0;
403     if (!swr_ && (error = InitSwr()) != 0) {
404         SHARING_LOGE("resample init failed!");
405         return;
406     }
407     DoSwr(frame);
408 
409     char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
410     encFrame_->format = AV_SAMPLE_FMT_FLTP;
411     while (av_audio_fifo_size(fifo_) >= enc_->frame_size) {
412         if (av_frame_make_writable(encFrame_) < 0) {
413             SHARING_LOGE("Could not make writable frame");
414         }
415         if (av_audio_fifo_read(fifo_, (void **)encFrame_->data, enc_->frame_size) < enc_->frame_size) {
416             SHARING_LOGE("Could not read data from FIFO");
417         }
418         encFrame_->pts = nextOutPts_;
419         nextOutPts_ += enc_->frame_size;
420         error = avcodec_send_frame(enc_, encFrame_);
421         if (error < 0) {
422             SHARING_LOGE("send failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
423         }
424 
425         InitEncPacket();
426         while (error >= 0) {
427             error = avcodec_receive_packet(enc_, encPacket_);
428             if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
429                 break;
430             } else if (error < 0) {
431                 SHARING_LOGE("recv failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
432             }
433 
434             encPacket_->dts = av_rescale(encPacket_->dts, 1000, enc_->time_base.den); // rescale time base 1000.
435             encPacket_->pts = av_rescale(encPacket_->pts, 1000, enc_->time_base.den); // rescale time base 1000.
436             if (memcpy_s(outBuffer_ + ADTS_HEADER_SIZE, encPacket_->size, encPacket_->data, encPacket_->size) != EOK) {
437                 SHARING_LOGE("copy data failed!");
438                 break;
439             }
440             AddAdtsHeader((uint8_t *)outBuffer_, encPacket_->size);
441             auto aacFrame = FrameImpl::Create();
442             aacFrame->codecId_ = CODEC_AAC;
443             aacFrame->pts_ = duration + encPacket_->pts;
444             aacFrame->Assign((char *)outBuffer_, encPacket_->size + 7); // 7: size offset
445             DeliverFrame(aacFrame);
446         }
447     }
448 }
449 } // namespace Sharing
450 } // namespace OHOS
451