1 /*
2 * Copyright (c) 2023-2024 Shenzhen Kaihong Digital Industry Development Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "audio_aac_codec.h"
17 #include <cstdint>
18 #include <libswresample/swresample.h>
19 #include <memory>
20 #include <securec.h>
21 #include "common_macro.h"
22 #include "const_def.h"
23 #include "sharing_log.h"
24
25 namespace OHOS {
26 namespace Sharing {
27 constexpr uint32_t ADTS_HEADER_SIZE = 7;
28 constexpr uint32_t ADTS_HEADER_BEGIN = 0xFF;
29 constexpr uint32_t ADTS_HEADER_END = 0xFC;
30 constexpr uint32_t ADTS_HEADER_MPEG4_AACLC = 0xF1;
31 constexpr uint32_t ADTS_HEADER_PROFILE_SHIFT = 6;
32 constexpr uint32_t ADTS_HEADER_SAMPLE_MASK = 0x0F;
33 constexpr uint32_t ADTS_HEADER_SAMPLE_SHIFT = 2;
34 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT = 2;
35 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK = 0x01;
36 constexpr uint32_t ADTS_HEADER_CHANNEL_SHIFT1 = 6;
37 constexpr uint32_t ADTS_HEADER_CHANNEL_MASK1 = 0x03;
38 constexpr uint32_t ADTS_HEADER_DATA_SZIE_OFFSET = 7;
39 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT = 11;
40 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT1 = 3;
41 constexpr uint32_t ADTS_HEADER_DATA_SZIE_SHIFT2 = 5;
42 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK = 0xFF;
43 constexpr uint32_t ADTS_HEADER_DATA_SZIE_MASK1 = 0x1F;
44 constexpr uint32_t ADTS_HEADER_INDEX_2 = 2;
45 constexpr uint32_t ADTS_HEADER_INDEX_3 = 3;
46 constexpr uint32_t ADTS_HEADER_INDEX_4 = 4;
47 constexpr uint32_t ADTS_HEADER_INDEX_5 = 5;
48 constexpr uint32_t ADTS_HEADER_INDEX_6 = 6;
49
50 static std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
51 static uint64_t duration = 0;
52
AudioAACDecoder()53 AudioAACDecoder::AudioAACDecoder()
54 {
55 SHARING_LOGD("trace.");
56 }
57
~AudioAACDecoder()58 AudioAACDecoder::~AudioAACDecoder()
59 {
60 SHARING_LOGD("trace.");
61 if (avFrame_) {
62 av_frame_free(&avFrame_);
63 }
64
65 if (avPacket_) {
66 av_packet_free(&avPacket_);
67 }
68
69 if (swrContext_) {
70 swr_free(&swrContext_);
71 }
72
73 if (swrOutBuffer_) {
74 av_freep(&swrOutBuffer_);
75 }
76 }
77
Init()78 int32_t AudioAACDecoder::Init()
79 {
80 SHARING_LOGD("trace.");
81 const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_AAC);
82 if (!dec) {
83 SHARING_LOGE("Failed to find codec.");
84 return -1;
85 }
86
87 codecCtx_ = avcodec_alloc_context3(dec);
88 if (!codecCtx_) {
89 SHARING_LOGE("Failed to allocate the codec context.");
90 return -1;
91 }
92
93 if (avcodec_open2(codecCtx_, dec, nullptr) < 0) {
94 SHARING_LOGE("Failed to open codec.");
95 return -1;
96 }
97
98 avPacket_ = av_packet_alloc();
99 if (avPacket_ == nullptr) {
100 SHARING_LOGE("Failed to alloc packet.");
101 return -1;
102 }
103
104 avFrame_ = av_frame_alloc();
105 if (avFrame_ == nullptr) {
106 SHARING_LOGE("Failed to alloc frame.");
107 return -1;
108 }
109 return 0;
110 }
111
OnFrame(const Frame::Ptr &frame)112 void AudioAACDecoder::OnFrame(const Frame::Ptr &frame)
113 {
114 if (frame == nullptr) {
115 SHARING_LOGE("frame is nullptr!");
116 return;
117 }
118
119 if (avPacket_ == nullptr || avFrame_ == nullptr) {
120 return;
121 }
122
123 av_packet_unref(avPacket_);
124 av_frame_unref(avFrame_);
125
126 avPacket_->data = frame->Data();
127 avPacket_->size = frame->Size();
128
129 avcodec_send_packet(codecCtx_, avPacket_);
130 avcodec_receive_frame(codecCtx_, avFrame_);
131
132 if (swrContext_ == nullptr) {
133 swrContext_ = swr_alloc_set_opts(nullptr, (int64_t)avFrame_->channel_layout, // out_ch_layout
134 AV_SAMPLE_FMT_S16, // out_sample_fmt
135 avFrame_->sample_rate, // out_sample_rate
136 (int64_t)avFrame_->channel_layout, // in_ch_layout
137 (AVSampleFormat)avFrame_->format, // AV_SAMPLE_FMT_FLTP
138 avFrame_->sample_rate, // out_sample_rate
139 0, nullptr);
140 if (swrContext_ == nullptr) {
141 SHARING_LOGE("swrContext_ alloc failed!");
142 return;
143 }
144
145 swr_init(swrContext_);
146
147 swrOutBufferSize_ =
148 av_samples_get_buffer_size(nullptr, avFrame_->channels, avFrame_->nb_samples, AV_SAMPLE_FMT_S16, 0);
149 swrOutBuffer_ = (uint8_t *)av_malloc(swrOutBufferSize_);
150 if (swrOutBuffer_ == nullptr) {
151 SHARING_LOGE("swrOutBuffer_ av_malloc failed!");
152 return;
153 }
154 }
155
156 int nbSamples = swr_convert(swrContext_, &swrOutBuffer_, avFrame_->nb_samples, (const uint8_t **)avFrame_->data,
157 avFrame_->nb_samples);
158 if (nbSamples != avFrame_->nb_samples) {
159 SHARING_LOGE("swr_convert failed!");
160 return;
161 }
162
163 auto pcmFrame = FrameImpl::Create();
164 pcmFrame->codecId_ = CODEC_PCM;
165 pcmFrame->Assign((char *)swrOutBuffer_, swrOutBufferSize_);
166 DeliverFrame(pcmFrame);
167 }
168
AudioAACEncoder()169 AudioAACEncoder::AudioAACEncoder()
170 {
171 SHARING_LOGD("trace.");
172 }
173
~AudioAACEncoder()174 AudioAACEncoder::~AudioAACEncoder()
175 {
176 SHARING_LOGD("trace.");
177 if (encFrame_) {
178 av_frame_free(&encFrame_);
179 }
180
181 if (encPacket_) {
182 av_packet_free(&encPacket_);
183 }
184
185 if (swr_) {
186 swr_free(&swr_);
187 }
188
189 if (swrData_) {
190 av_freep(&swrData_);
191 }
192 }
193
InitSwr()194 int AudioAACEncoder::InitSwr()
195 {
196 int64_t in_ch_layout = AV_CH_LAYOUT_STEREO;
197 if (inChannels_ == 1) {
198 in_ch_layout = AV_CH_LAYOUT_MONO;
199 }
200 AVSampleFormat in_sample_fmt = AV_SAMPLE_FMT_S16;
201 if (inSampleBit_ == AUDIO_SAMPLE_BIT_U8) {
202 in_sample_fmt = AV_SAMPLE_FMT_U8;
203 }
204 int in_sample_rate = inSampleRate_;
205 swr_ = swr_alloc_set_opts(NULL, enc_->channel_layout, enc_->sample_fmt, enc_->sample_rate, in_ch_layout,
206 in_sample_fmt, in_sample_rate, 0, NULL);
207 if (!swr_) {
208 SHARING_LOGE("alloc swr failed.");
209 }
210
211 int error;
212 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
213 if ((error = swr_init(swr_)) < 0) {
214 SHARING_LOGE("open swr(%{public}d:%{public}s)", error,
215 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
216 }
217
218 if (!(swrData_ = (uint8_t **)calloc(enc_->channels, sizeof(*swrData_)))) {
219 SHARING_LOGE("alloc swr buffer failed!");
220 }
221
222 if ((error = av_samples_alloc(swrData_, NULL, enc_->channels, enc_->frame_size, enc_->sample_fmt, 0)) < 0) {
223 SHARING_LOGE("alloc swr buffer(%{public}d:%{public}s)\n", error,
224 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
225 }
226
227 return 0;
228 }
229
InitEncoderCtx(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)230 void AudioAACEncoder::InitEncoderCtx(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
231 {
232 enc_->sample_rate = sampleRate; // dst_samplerate;
233 enc_->channels = channels; // dst_channels;
234 enc_->channel_layout = av_get_default_channel_layout(channels);
235 enc_->bit_rate = AUDIO_BIT_RATE_12800;
236 enc_->time_base.num = 1;
237 enc_->time_base.den = sampleRate;
238 enc_->compression_level = 1;
239 enc_->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
240 }
241
InitEncPacket()242 void AudioAACEncoder::InitEncPacket()
243 {
244 av_init_packet(encPacket_);
245 encPacket_->data = NULL;
246 encPacket_->size = 0;
247 }
248
Init(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)249 int32_t AudioAACEncoder::Init(uint32_t channels, uint32_t sampleBit, uint32_t sampleRate)
250 {
251 SHARING_LOGD("trace.");
252 inChannels_ = channels;
253 inSampleBit_ = sampleBit;
254 inSampleRate_ = sampleRate;
255 const AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
256 if (!codec) {
257 SHARING_LOGE("Codec not found failed!");
258 return 1;
259 }
260
261 enc_ = avcodec_alloc_context3(codec);
262 if (!enc_) {
263 SHARING_LOGE("Could not allocate audio codec context ");
264 return 1;
265 }
266 enc_->sample_fmt = codec->sample_fmts[0]; // only supports AV_SAMPLE_FMT_FLTP
267 InitEncoderCtx(channels, sampleBit, sampleRate);
268
269 if (avcodec_open2(enc_, codec, NULL) < 0) {
270 SHARING_LOGE("Could not open codec");
271 }
272
273 encFrame_ = av_frame_alloc();
274 if (!encFrame_) {
275 SHARING_LOGE("Could not allocate audio encode in frame");
276 return 1;
277 }
278 encFrame_->format = enc_->sample_fmt;
279 encFrame_->nb_samples = enc_->frame_size;
280 encFrame_->channel_layout = enc_->channel_layout;
281
282 if (av_frame_get_buffer(encFrame_, 0) < 0) {
283 SHARING_LOGE("Could not get audio frame buffer");
284 return 1;
285 }
286 encPacket_ = av_packet_alloc();
287 if (!encPacket_) {
288 SHARING_LOGE("Could not allocate audio encode out packet");
289 return 1;
290 }
291 if (!(fifo_ = av_audio_fifo_alloc(enc_->sample_fmt, enc_->channels, enc_->frame_size))) {
292 SHARING_LOGE("Could not allocate FIFO");
293 return 1;
294 }
295 auto bufferSize = av_samples_get_buffer_size(nullptr, encFrame_->channels, encFrame_->nb_samples,
296 AVSampleFormat(encFrame_->format), 0);
297 outBuffer_ = (uint8_t *)av_malloc(bufferSize);
298 if (outBuffer_ == nullptr) {
299 SHARING_LOGE("outBuffer_ av_malloc failed!");
300 return 1;
301 }
302
303 return 0;
304 }
305
AddSamplesToFifo(uint8_t **samples, int frame_size)306 int AudioAACEncoder::AddSamplesToFifo(uint8_t **samples, int frame_size)
307 {
308 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
309 int error;
310
311 if ((error = av_audio_fifo_realloc(fifo_, av_audio_fifo_size(fifo_) + frame_size)) < 0) {
312 SHARING_LOGE("Could not reallocate FIFO(%{public}d:%{public}s)", error,
313 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
314 }
315
316 if ((error = av_audio_fifo_write(fifo_, (void **)samples, frame_size)) < frame_size) {
317 SHARING_LOGE("Could not write data to FIFO(%{public}d:%{public}s)", error,
318 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
319 }
320
321 return 0;
322 }
323
AddAdtsHeader(uint8_t *data, int dataSize)324 void AddAdtsHeader(uint8_t *data, int dataSize)
325 {
326 // ADTS header format (7 or 9 bytes):
327 // 12 bits syncword (0xFFF)
328 // 1 bit MPEG version (0 for MPEG-4, 1 for MPEG-2)
329 // 2 bits layer (always 0 for MPEG-4)
330 // 1 bit protection absent
331 // 2 bits profile (audio object type)
332 // 4 bits sampling frequency index
333 // 1 bit private bit
334 // 3 bits channel configuration
335 // 1 bit original/copy
336 // 1 bit home
337 // variable bits variable header length
338 // 16 bits frame length
339 // 16 bits buffer fullness
340 // 1 bit number of raw data blocks in frame (set to 0)
341
342 uint8_t adtsHeader[ADTS_HEADER_SIZE];
343 int profile = 2; // 2: AAC LC
344 int samplingFrequencyIndex = 3; // 3: 48Khz, 4: 44.1kHz
345 int channelConfiguration = 2; // 2: Stereo
346
347 adtsHeader[0] = ADTS_HEADER_BEGIN;
348 adtsHeader[1] = ADTS_HEADER_MPEG4_AACLC;
349 adtsHeader[ADTS_HEADER_INDEX_2] =
350 ((profile - 1) << ADTS_HEADER_PROFILE_SHIFT) |
351 ((samplingFrequencyIndex & ADTS_HEADER_SAMPLE_MASK) << ADTS_HEADER_SAMPLE_SHIFT) |
352 ((channelConfiguration >> ADTS_HEADER_CHANNEL_SHIFT) & ADTS_HEADER_CHANNEL_MASK);
353 adtsHeader[ADTS_HEADER_INDEX_3] =
354 ((channelConfiguration & ADTS_HEADER_CHANNEL_MASK1) << ADTS_HEADER_CHANNEL_SHIFT1) |
355 ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT);
356 adtsHeader[ADTS_HEADER_INDEX_4] =
357 ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) >> ADTS_HEADER_DATA_SZIE_SHIFT1) & ADTS_HEADER_DATA_SZIE_MASK;
358 adtsHeader[ADTS_HEADER_INDEX_5] =
359 ((dataSize + ADTS_HEADER_DATA_SZIE_OFFSET) << ADTS_HEADER_DATA_SZIE_SHIFT2) | ADTS_HEADER_DATA_SZIE_MASK1;
360 adtsHeader[ADTS_HEADER_INDEX_6] = ADTS_HEADER_END;
361
362 if (memcpy_s(data, sizeof(adtsHeader), adtsHeader, sizeof(adtsHeader)) != EOK) {
363 SHARING_LOGE("copy adtsHeader failed!");
364 }
365 }
366
DoSwr(const Frame::Ptr &frame)367 void AudioAACEncoder::DoSwr(const Frame::Ptr &frame)
368 {
369 int err = 0;
370 int error = 0;
371 int in_samples = frame->Size();
372 uint8_t *in_sample[1];
373 in_sample[0] = frame->Data();
374 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
375
376 do {
377 int sample_size = inChannels_ * inSampleBit_ / 8;
378 in_samples = in_samples / sample_size;
379
380 int frame_size = swr_convert(swr_, swrData_, enc_->frame_size, (const uint8_t **)in_sample, in_samples);
381 if ((error = frame_size) < 0) {
382 SHARING_LOGE("Could not convert input samples(%{public}d:%{public}s)", error,
383 av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
384 }
385
386 in_sample[0] = NULL;
387 in_samples = 0;
388 if ((err = AddSamplesToFifo(swrData_, frame_size)) != 0) {
389 SHARING_LOGE("write samples failed");
390 }
391 } while (swr_get_out_samples(swr_, in_samples) >= enc_->frame_size);
392 }
393
OnFrame(const Frame::Ptr &frame)394 void AudioAACEncoder::OnFrame(const Frame::Ptr &frame)
395 {
396 RETURN_IF_NULL(frame);
397 if (duration == 0) {
398 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
399 duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - start).count();
400 }
401
402 int error = 0;
403 if (!swr_ && (error = InitSwr()) != 0) {
404 SHARING_LOGE("resample init failed!");
405 return;
406 }
407 DoSwr(frame);
408
409 char errBuf[AV_ERROR_MAX_STRING_SIZE] = {0};
410 encFrame_->format = AV_SAMPLE_FMT_FLTP;
411 while (av_audio_fifo_size(fifo_) >= enc_->frame_size) {
412 if (av_frame_make_writable(encFrame_) < 0) {
413 SHARING_LOGE("Could not make writable frame");
414 }
415 if (av_audio_fifo_read(fifo_, (void **)encFrame_->data, enc_->frame_size) < enc_->frame_size) {
416 SHARING_LOGE("Could not read data from FIFO");
417 }
418 encFrame_->pts = nextOutPts_;
419 nextOutPts_ += enc_->frame_size;
420 error = avcodec_send_frame(enc_, encFrame_);
421 if (error < 0) {
422 SHARING_LOGE("send failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
423 }
424
425 InitEncPacket();
426 while (error >= 0) {
427 error = avcodec_receive_packet(enc_, encPacket_);
428 if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
429 break;
430 } else if (error < 0) {
431 SHARING_LOGE("recv failed:%{public}s", av_make_error_string(errBuf, AV_ERROR_MAX_STRING_SIZE, error));
432 }
433
434 encPacket_->dts = av_rescale(encPacket_->dts, 1000, enc_->time_base.den); // rescale time base 1000.
435 encPacket_->pts = av_rescale(encPacket_->pts, 1000, enc_->time_base.den); // rescale time base 1000.
436 if (memcpy_s(outBuffer_ + ADTS_HEADER_SIZE, encPacket_->size, encPacket_->data, encPacket_->size) != EOK) {
437 SHARING_LOGE("copy data failed!");
438 break;
439 }
440 AddAdtsHeader((uint8_t *)outBuffer_, encPacket_->size);
441 auto aacFrame = FrameImpl::Create();
442 aacFrame->codecId_ = CODEC_AAC;
443 aacFrame->pts_ = duration + encPacket_->pts;
444 aacFrame->Assign((char *)outBuffer_, encPacket_->size + 7); // 7: size offset
445 DeliverFrame(aacFrame);
446 }
447 }
448 }
449 } // namespace Sharing
450 } // namespace OHOS
451