xref: /third_party/ffmpeg/libavcodec/mfenc.c (revision cabdff1a)
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#define COBJMACROS
20#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
21#undef _WIN32_WINNT
22#define _WIN32_WINNT 0x0602
23#endif
24
25#include "encode.h"
26#include "mf_utils.h"
27#include "libavutil/imgutils.h"
28#include "libavutil/opt.h"
29#include "libavutil/time.h"
30#include "codec_internal.h"
31#include "internal.h"
32#include "compat/w32dlfcn.h"
33
34typedef struct MFContext {
35    AVClass *av_class;
36    HMODULE library;
37    MFFunctions functions;
38    AVFrame *frame;
39    int is_video, is_audio;
40    GUID main_subtype;
41    IMFTransform *mft;
42    IMFMediaEventGenerator *async_events;
43    DWORD in_stream_id, out_stream_id;
44    MFT_INPUT_STREAM_INFO in_info;
45    MFT_OUTPUT_STREAM_INFO out_info;
46    int out_stream_provides_samples;
47    int draining, draining_done;
48    int sample_sent;
49    int async_need_input, async_have_output, async_marker;
50    int64_t reorder_delay;
51    ICodecAPI *codec_api;
52    // set by AVOption
53    int opt_enc_rc;
54    int opt_enc_quality;
55    int opt_enc_scenario;
56    int opt_enc_hw;
57} MFContext;
58
59static int mf_choose_output_type(AVCodecContext *avctx);
60static int mf_setup_context(AVCodecContext *avctx);
61
62#define MF_TIMEBASE (AVRational){1, 10000000}
63// Sentinel value only used by us.
64#define MF_INVALID_TIME AV_NOPTS_VALUE
65
66static int mf_wait_events(AVCodecContext *avctx)
67{
68    MFContext *c = avctx->priv_data;
69
70    if (!c->async_events)
71        return 0;
72
73    while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
74        IMFMediaEvent *ev = NULL;
75        MediaEventType ev_id = 0;
76        HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
77        if (FAILED(hr)) {
78            av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
79                   ff_hr_str(hr));
80            return AVERROR_EXTERNAL;
81        }
82        IMFMediaEvent_GetType(ev, &ev_id);
83        switch (ev_id) {
84        case ff_METransformNeedInput:
85            if (!c->draining)
86                c->async_need_input = 1;
87            break;
88        case ff_METransformHaveOutput:
89            c->async_have_output = 1;
90            break;
91        case ff_METransformDrainComplete:
92            c->draining_done = 1;
93            break;
94        case ff_METransformMarker:
95            c->async_marker = 1;
96            break;
97        default: ;
98        }
99        IMFMediaEvent_Release(ev);
100    }
101
102    return 0;
103}
104
105static AVRational mf_get_tb(AVCodecContext *avctx)
106{
107    if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
108        return avctx->time_base;
109    return MF_TIMEBASE;
110}
111
112static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
113{
114    if (av_pts == AV_NOPTS_VALUE)
115        return MF_INVALID_TIME;
116    return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
117}
118
119static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
120{
121    LONGLONG stime = mf_to_mf_time(avctx, av_pts);
122    if (stime != MF_INVALID_TIME)
123        IMFSample_SetSampleTime(sample, stime);
124}
125
126static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
127{
128    return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
129}
130
131static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
132{
133    LONGLONG pts;
134    HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
135    if (FAILED(hr))
136        return AV_NOPTS_VALUE;
137    return mf_from_mf_time(avctx, pts);
138}
139
140static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
141{
142    MFContext *c = avctx->priv_data;
143    HRESULT hr;
144    UINT32 sz;
145
146    if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
147        hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
148        if (!FAILED(hr) && sz > 0) {
149            avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
150            if (!avctx->extradata)
151                return AVERROR(ENOMEM);
152            avctx->extradata_size = sz;
153            hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
154            if (FAILED(hr))
155                return AVERROR_EXTERNAL;
156
157            if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
158                // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
159                avctx->extradata_size = avctx->extradata_size - 12;
160                memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
161            }
162        }
163    }
164
165    // I don't know where it's documented that we need this. It happens with the
166    // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
167    // (Certainly any lossy codec will have frames much smaller than 1 second.)
168    if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
169        hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
170        if (!FAILED(hr)) {
171            av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
172                   "assuming %d bytes instead.\n", (int)sz);
173            c->out_info.cbSize = sz;
174        }
175    }
176
177    return 0;
178}
179
180static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
181{
182    HRESULT hr;
183    UINT32 sz;
184
185    hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
186    if (!FAILED(hr) && sz > 0) {
187        uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
188        if (!extradata)
189            return AVERROR(ENOMEM);
190        hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
191        if (FAILED(hr)) {
192            av_free(extradata);
193            return AVERROR_EXTERNAL;
194        }
195        av_freep(&avctx->extradata);
196        avctx->extradata = extradata;
197        avctx->extradata_size = sz;
198    }
199
200    return 0;
201}
202
203static int mf_output_type_get(AVCodecContext *avctx)
204{
205    MFContext *c = avctx->priv_data;
206    HRESULT hr;
207    IMFMediaType *type;
208    int ret;
209
210    hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
211    if (FAILED(hr)) {
212        av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
213        return AVERROR_EXTERNAL;
214    }
215
216    av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
217    ff_media_type_dump(avctx, type);
218
219    ret = 0;
220    if (c->is_video) {
221        ret = mf_encv_output_type_get(avctx, type);
222    } else if (c->is_audio) {
223        ret = mf_enca_output_type_get(avctx, type);
224    }
225
226    if (ret < 0)
227        av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
228
229    IMFMediaType_Release(type);
230    return ret;
231}
232
233static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
234{
235    MFContext *c = avctx->priv_data;
236    HRESULT hr;
237    int ret;
238    DWORD len;
239    IMFMediaBuffer *buffer;
240    BYTE *data;
241    UINT64 t;
242    UINT32 t32;
243
244    hr = IMFSample_GetTotalLength(sample, &len);
245    if (FAILED(hr))
246        return AVERROR_EXTERNAL;
247
248    if ((ret = ff_get_encode_buffer(avctx, avpkt, len, 0)) < 0)
249        return ret;
250
251    IMFSample_ConvertToContiguousBuffer(sample, &buffer);
252    if (FAILED(hr))
253        return AVERROR_EXTERNAL;
254
255    hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
256    if (FAILED(hr)) {
257        IMFMediaBuffer_Release(buffer);
258        return AVERROR_EXTERNAL;
259    }
260
261    memcpy(avpkt->data, data, len);
262
263    IMFMediaBuffer_Unlock(buffer);
264    IMFMediaBuffer_Release(buffer);
265
266    avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
267
268    hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
269    if (c->is_audio || (!FAILED(hr) && t32 != 0))
270        avpkt->flags |= AV_PKT_FLAG_KEY;
271
272    hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
273    if (!FAILED(hr)) {
274        avpkt->dts = mf_from_mf_time(avctx, t);
275        // At least on Qualcomm's HEVC encoder on SD 835, the output dts
276        // starts from the input pts of the first frame, while the output pts
277        // is shifted forward. Therefore, shift the output values back so that
278        // the output pts matches the input.
279        if (c->reorder_delay == AV_NOPTS_VALUE)
280            c->reorder_delay = avpkt->pts - avpkt->dts;
281        avpkt->dts -= c->reorder_delay;
282        avpkt->pts -= c->reorder_delay;
283    }
284
285    return 0;
286}
287
288static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
289{
290    MFContext *c = avctx->priv_data;
291    size_t len;
292    size_t bps;
293    IMFSample *sample;
294
295    bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->ch_layout.nb_channels;
296    len = frame->nb_samples * bps;
297
298    sample = ff_create_memory_sample(&c->functions, frame->data[0], len,
299                                     c->in_info.cbAlignment);
300    if (sample)
301        IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
302    return sample;
303}
304
305static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
306{
307    MFContext *c = avctx->priv_data;
308    IMFSample *sample;
309    IMFMediaBuffer *buffer;
310    BYTE *data;
311    HRESULT hr;
312    int ret;
313    int size;
314
315    size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
316    if (size < 0)
317        return NULL;
318
319    sample = ff_create_memory_sample(&c->functions, NULL, size,
320                                     c->in_info.cbAlignment);
321    if (!sample)
322        return NULL;
323
324    hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
325    if (FAILED(hr)) {
326        IMFSample_Release(sample);
327        return NULL;
328    }
329
330    hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
331    if (FAILED(hr)) {
332        IMFMediaBuffer_Release(buffer);
333        IMFSample_Release(sample);
334        return NULL;
335    }
336
337    ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
338                                  avctx->pix_fmt, avctx->width, avctx->height, 1);
339    IMFMediaBuffer_SetCurrentLength(buffer, size);
340    IMFMediaBuffer_Unlock(buffer);
341    IMFMediaBuffer_Release(buffer);
342    if (ret < 0) {
343        IMFSample_Release(sample);
344        return NULL;
345    }
346
347    IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
348
349    return sample;
350}
351
352static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
353{
354    MFContext *c = avctx->priv_data;
355    IMFSample *sample;
356
357    if (c->is_audio) {
358        sample = mf_a_avframe_to_sample(avctx, frame);
359    } else {
360        sample = mf_v_avframe_to_sample(avctx, frame);
361    }
362
363    if (sample)
364        mf_sample_set_pts(avctx, sample, frame->pts);
365
366    return sample;
367}
368
369static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
370{
371    MFContext *c = avctx->priv_data;
372    HRESULT hr;
373    int ret;
374
375    if (sample) {
376        if (c->async_events) {
377            if ((ret = mf_wait_events(avctx)) < 0)
378                return ret;
379            if (!c->async_need_input)
380                return AVERROR(EAGAIN);
381        }
382        if (!c->sample_sent)
383            IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
384        c->sample_sent = 1;
385        hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
386        if (hr == MF_E_NOTACCEPTING) {
387            return AVERROR(EAGAIN);
388        } else if (FAILED(hr)) {
389            av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
390            return AVERROR_EXTERNAL;
391        }
392        c->async_need_input = 0;
393    } else if (!c->draining) {
394        hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
395        if (FAILED(hr))
396            av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
397        // Some MFTs (AC3) will send a frame after each drain command (???), so
398        // this is required to make draining actually terminate.
399        c->draining = 1;
400        c->async_need_input = 0;
401    } else {
402        return AVERROR_EOF;
403    }
404    return 0;
405}
406
407static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
408{
409    MFContext *c = avctx->priv_data;
410    HRESULT hr;
411    DWORD st;
412    MFT_OUTPUT_DATA_BUFFER out_buffers;
413    IMFSample *sample;
414    int ret = 0;
415
416    while (1) {
417        *out_sample = NULL;
418        sample = NULL;
419
420        if (c->async_events) {
421            if ((ret = mf_wait_events(avctx)) < 0)
422                return ret;
423            if (!c->async_have_output || c->draining_done) {
424                ret = 0;
425                break;
426            }
427        }
428
429        if (!c->out_stream_provides_samples) {
430            sample = ff_create_memory_sample(&c->functions, NULL,
431                                             c->out_info.cbSize,
432                                             c->out_info.cbAlignment);
433            if (!sample)
434                return AVERROR(ENOMEM);
435        }
436
437        out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
438            .dwStreamID = c->out_stream_id,
439            .pSample = sample,
440        };
441
442        st = 0;
443        hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
444
445        if (out_buffers.pEvents)
446            IMFCollection_Release(out_buffers.pEvents);
447
448        if (!FAILED(hr)) {
449            *out_sample = out_buffers.pSample;
450            ret = 0;
451            break;
452        }
453
454        if (out_buffers.pSample)
455            IMFSample_Release(out_buffers.pSample);
456
457        if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
458            if (c->draining)
459                c->draining_done = 1;
460            ret = 0;
461        } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
462            av_log(avctx, AV_LOG_WARNING, "stream format change\n");
463            ret = mf_choose_output_type(avctx);
464            if (ret == 0) // we don't expect renegotiating the input type
465                ret = AVERROR_EXTERNAL;
466            if (ret > 0) {
467                ret = mf_setup_context(avctx);
468                if (ret >= 0) {
469                    c->async_have_output = 0;
470                    continue;
471                }
472            }
473        } else {
474            av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
475            ret = AVERROR_EXTERNAL;
476        }
477
478        break;
479    }
480
481    c->async_have_output = 0;
482
483    if (ret >= 0 && !*out_sample)
484        ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
485
486    return ret;
487}
488
489static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
490{
491    MFContext *c = avctx->priv_data;
492    IMFSample *sample = NULL;
493    int ret;
494
495    if (!c->frame->buf[0]) {
496        ret = ff_encode_get_frame(avctx, c->frame);
497        if (ret < 0 && ret != AVERROR_EOF)
498            return ret;
499    }
500
501    if (c->frame->buf[0]) {
502        sample = mf_avframe_to_sample(avctx, c->frame);
503        if (!sample) {
504            av_frame_unref(c->frame);
505            return AVERROR(ENOMEM);
506        }
507        if (c->is_video && c->codec_api) {
508            if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
509                ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
510        }
511    }
512
513    ret = mf_send_sample(avctx, sample);
514    if (sample)
515        IMFSample_Release(sample);
516    if (ret != AVERROR(EAGAIN))
517        av_frame_unref(c->frame);
518    if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
519        return ret;
520
521    ret = mf_receive_sample(avctx, &sample);
522    if (ret < 0)
523        return ret;
524
525    ret = mf_sample_to_avpacket(avctx, sample, avpkt);
526    IMFSample_Release(sample);
527
528    return ret;
529}
530
531// Most encoders seem to enumerate supported audio formats on the output types,
532// at least as far as channel configuration and sample rate is concerned. Pick
533// the one which seems to match best.
534static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
535{
536    MFContext *c = avctx->priv_data;
537    HRESULT hr;
538    UINT32 t;
539    GUID tg;
540    int64_t score = 0;
541
542    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
543    if (!FAILED(hr) && t == avctx->sample_rate)
544        score |= 1LL << 32;
545
546    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
547    if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
548        score |= 2LL << 32;
549
550    hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
551    if (!FAILED(hr)) {
552        if (IsEqualGUID(&c->main_subtype, &tg))
553            score |= 4LL << 32;
554    }
555
556    // Select the bitrate (lowest priority).
557    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
558    if (!FAILED(hr)) {
559        int diff = (int)t - avctx->bit_rate / 8;
560        if (diff >= 0) {
561            score |= (1LL << 31) - diff; // prefer lower bitrate
562        } else {
563            score |= (1LL << 30) + diff; // prefer higher bitrate
564        }
565    }
566
567    hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
568    if (!FAILED(hr) && t != 0)
569        return -1;
570
571    return score;
572}
573
574static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
575{
576    // (some decoders allow adjusting this freely, but it can also cause failure
577    //  to set the output type - so it's commented for being too fragile)
578    //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
579    //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
580
581    return 0;
582}
583
584static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
585{
586    HRESULT hr;
587    UINT32 t;
588    int64_t score = 0;
589
590    enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
591    if (sformat == AV_SAMPLE_FMT_NONE)
592        return -1; // can not use
593
594    if (sformat == avctx->sample_fmt)
595        score |= 1;
596
597    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
598    if (!FAILED(hr) && t == avctx->sample_rate)
599        score |= 2;
600
601    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
602    if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
603        score |= 4;
604
605    return score;
606}
607
608static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
609{
610    HRESULT hr;
611    UINT32 t;
612
613    enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
614    if (sformat != avctx->sample_fmt) {
615        av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
616        return AVERROR(EINVAL);
617    }
618
619    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
620    if (FAILED(hr) || t != avctx->sample_rate) {
621        av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
622        return AVERROR(EINVAL);
623    }
624
625    hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
626    if (FAILED(hr) || t != avctx->ch_layout.nb_channels) {
627        av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
628        return AVERROR(EINVAL);
629    }
630
631    return 0;
632}
633
634static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
635{
636    MFContext *c = avctx->priv_data;
637    GUID tg;
638    HRESULT hr;
639    int score = -1;
640
641    hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
642    if (!FAILED(hr)) {
643        if (IsEqualGUID(&c->main_subtype, &tg))
644            score = 1;
645    }
646
647    return score;
648}
649
650static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
651{
652    MFContext *c = avctx->priv_data;
653    AVRational framerate;
654
655    ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
656    IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
657
658    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
659        framerate = avctx->framerate;
660    } else {
661        framerate = av_inv_q(avctx->time_base);
662        framerate.den *= avctx->ticks_per_frame;
663    }
664
665    ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
666
667    // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
668    if (avctx->codec_id == AV_CODEC_ID_H264) {
669        UINT32 profile = ff_eAVEncH264VProfile_Base;
670        switch (avctx->profile) {
671        case FF_PROFILE_H264_MAIN:
672            profile = ff_eAVEncH264VProfile_Main;
673            break;
674        case FF_PROFILE_H264_HIGH:
675            profile = ff_eAVEncH264VProfile_High;
676            break;
677        }
678        IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
679    }
680
681    IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
682
683    // Note that some of the ICodecAPI options must be set before SetOutputType.
684    if (c->codec_api) {
685        if (avctx->bit_rate)
686            ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
687
688        if (c->opt_enc_rc >= 0)
689            ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
690
691        if (c->opt_enc_quality >= 0)
692            ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
693
694        // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
695        // defaults this to 1, and that setting is buggy with many of the
696        // rate control modes. (0 or 2 b-frames works fine with most rate
697        // control modes, but 2 seems buggy with the u_vbr mode.) Setting
698        // "scenario" to "camera_record" sets it in CFR mode (where the default
699        // is VFR), which makes the encoder avoid dropping frames.
700        ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
701        avctx->has_b_frames = avctx->max_b_frames > 0;
702
703        ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
704
705        if (c->opt_enc_scenario >= 0)
706            ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
707    }
708
709    return 0;
710}
711
712static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
713{
714    enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
715    if (pix_fmt != avctx->pix_fmt)
716        return -1; // can not use
717
718    return 0;
719}
720
721static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
722{
723    enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
724    if (pix_fmt != avctx->pix_fmt) {
725        av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
726        return AVERROR(EINVAL);
727    }
728
729    //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
730
731    return 0;
732}
733
734static int mf_choose_output_type(AVCodecContext *avctx)
735{
736    MFContext *c = avctx->priv_data;
737    HRESULT hr;
738    int ret;
739    IMFMediaType *out_type = NULL;
740    int64_t out_type_score = -1;
741    int out_type_index = -1;
742    int n;
743
744    av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
745    for (n = 0; ; n++) {
746        IMFMediaType *type;
747        int64_t score = -1;
748
749        hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
750        if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
751            break;
752        if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
753            av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
754            ret = 0;
755            goto done;
756        }
757        if (FAILED(hr)) {
758            av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
759            ret = AVERROR_EXTERNAL;
760            goto done;
761        }
762
763        av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
764        ff_media_type_dump(avctx, type);
765
766        if (c->is_video) {
767            score = mf_encv_output_score(avctx, type);
768        } else if (c->is_audio) {
769            score = mf_enca_output_score(avctx, type);
770        }
771
772        if (score > out_type_score) {
773            if (out_type)
774                IMFMediaType_Release(out_type);
775            out_type = type;
776            out_type_score = score;
777            out_type_index = n;
778            IMFMediaType_AddRef(out_type);
779        }
780
781        IMFMediaType_Release(type);
782    }
783
784    if (out_type) {
785        av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
786    } else {
787        hr = c->functions.MFCreateMediaType(&out_type);
788        if (FAILED(hr)) {
789            ret = AVERROR(ENOMEM);
790            goto done;
791        }
792    }
793
794    ret = 0;
795    if (c->is_video) {
796        ret = mf_encv_output_adjust(avctx, out_type);
797    } else if (c->is_audio) {
798        ret = mf_enca_output_adjust(avctx, out_type);
799    }
800
801    if (ret >= 0) {
802        av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
803        ff_media_type_dump(avctx, out_type);
804
805        hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
806        if (!FAILED(hr)) {
807            ret = 1;
808        } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
809            av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
810            ret = 0;
811        } else {
812            av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
813            ret = AVERROR_EXTERNAL;
814        }
815    }
816
817done:
818    if (out_type)
819        IMFMediaType_Release(out_type);
820    return ret;
821}
822
823static int mf_choose_input_type(AVCodecContext *avctx)
824{
825    MFContext *c = avctx->priv_data;
826    HRESULT hr;
827    int ret;
828    IMFMediaType *in_type = NULL;
829    int64_t in_type_score = -1;
830    int in_type_index = -1;
831    int n;
832
833    av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
834    for (n = 0; ; n++) {
835        IMFMediaType *type = NULL;
836        int64_t score = -1;
837
838        hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
839        if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
840            break;
841        if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
842            av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
843            ret = 0;
844            goto done;
845        }
846        if (FAILED(hr)) {
847            av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
848            ret = AVERROR_EXTERNAL;
849            goto done;
850        }
851
852        av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
853        ff_media_type_dump(avctx, type);
854
855        if (c->is_video) {
856            score = mf_encv_input_score(avctx, type);
857        } else if (c->is_audio) {
858            score = mf_enca_input_score(avctx, type);
859        }
860
861        if (score > in_type_score) {
862            if (in_type)
863                IMFMediaType_Release(in_type);
864            in_type = type;
865            in_type_score = score;
866            in_type_index = n;
867            IMFMediaType_AddRef(in_type);
868        }
869
870        IMFMediaType_Release(type);
871    }
872
873    if (in_type) {
874        av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
875    } else {
876        // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
877        av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
878        ret = 0;
879        goto done;
880    }
881
882    ret = 0;
883    if (c->is_video) {
884        ret = mf_encv_input_adjust(avctx, in_type);
885    } else if (c->is_audio) {
886        ret = mf_enca_input_adjust(avctx, in_type);
887    }
888
889    if (ret >= 0) {
890        av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
891        ff_media_type_dump(avctx, in_type);
892
893        hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
894        if (!FAILED(hr)) {
895            ret = 1;
896        } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
897            av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
898            ret = 0;
899        } else {
900            av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
901            ret = AVERROR_EXTERNAL;
902        }
903    }
904
905done:
906    if (in_type)
907        IMFMediaType_Release(in_type);
908    return ret;
909}
910
911static int mf_negotiate_types(AVCodecContext *avctx)
912{
913    // This follows steps 1-5 on:
914    //  https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
915    // If every MFT implementer does this correctly, this loop should at worst
916    // be repeated once.
917    int need_input = 1, need_output = 1;
918    int n;
919    for (n = 0; n < 2 && (need_input || need_output); n++) {
920        int ret;
921        ret = mf_choose_input_type(avctx);
922        if (ret < 0)
923            return ret;
924        need_input = ret < 1;
925        ret = mf_choose_output_type(avctx);
926        if (ret < 0)
927            return ret;
928        need_output = ret < 1;
929    }
930    if (need_input || need_output) {
931        av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
932               need_input, need_output);
933        return AVERROR_EXTERNAL;
934    }
935    return 0;
936}
937
938static int mf_setup_context(AVCodecContext *avctx)
939{
940    MFContext *c = avctx->priv_data;
941    HRESULT hr;
942    int ret;
943
944    hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
945    if (FAILED(hr))
946        return AVERROR_EXTERNAL;
947    av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
948           (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
949
950    hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
951    if (FAILED(hr))
952        return AVERROR_EXTERNAL;
953    c->out_stream_provides_samples =
954        (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
955        (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
956    av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
957           (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
958           c->out_stream_provides_samples ? " (provides samples)" : "");
959
960    if ((ret = mf_output_type_get(avctx)) < 0)
961        return ret;
962
963    return 0;
964}
965
966static int mf_unlock_async(AVCodecContext *avctx)
967{
968    MFContext *c = avctx->priv_data;
969    HRESULT hr;
970    IMFAttributes *attrs;
971    UINT32 v;
972    int res = AVERROR_EXTERNAL;
973
974    // For hw encoding we unfortunately need to use async mode, otherwise
975    // play it safe and avoid it.
976    if (!(c->is_video && c->opt_enc_hw))
977        return 0;
978
979    hr = IMFTransform_GetAttributes(c->mft, &attrs);
980    if (FAILED(hr)) {
981        av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
982        goto err;
983    }
984
985    hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
986    if (FAILED(hr)) {
987        av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
988        goto err;
989    }
990
991    if (!v) {
992        av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
993        goto err;
994    }
995
996    hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
997    if (FAILED(hr)) {
998        av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
999        goto err;
1000    }
1001
1002    hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
1003    if (FAILED(hr)) {
1004        av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
1005        goto err;
1006    }
1007
1008    res = 0;
1009
1010err:
1011    IMFAttributes_Release(attrs);
1012    return res;
1013}
1014
1015static int mf_create(void *log, MFFunctions *f, IMFTransform **mft,
1016                     const AVCodec *codec, int use_hw)
1017{
1018    int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
1019    const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
1020    MFT_REGISTER_TYPE_INFO reg = {0};
1021    GUID category;
1022    int ret;
1023
1024    *mft = NULL;
1025
1026    if (!subtype)
1027        return AVERROR(ENOSYS);
1028
1029    reg.guidSubtype = *subtype;
1030
1031    if (is_audio) {
1032        reg.guidMajorType = MFMediaType_Audio;
1033        category = MFT_CATEGORY_AUDIO_ENCODER;
1034    } else {
1035        reg.guidMajorType = MFMediaType_Video;
1036        category = MFT_CATEGORY_VIDEO_ENCODER;
1037    }
1038
1039    if ((ret = ff_instantiate_mf(log, f, category, NULL, &reg, use_hw, mft)) < 0)
1040        return ret;
1041
1042    return 0;
1043}
1044
1045static int mf_init_encoder(AVCodecContext *avctx)
1046{
1047    MFContext *c = avctx->priv_data;
1048    HRESULT hr;
1049    int ret;
1050    const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1051    int use_hw = 0;
1052
1053    c->frame = av_frame_alloc();
1054    if (!c->frame)
1055        return AVERROR(ENOMEM);
1056
1057    c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1058    c->is_video = !c->is_audio;
1059    c->reorder_delay = AV_NOPTS_VALUE;
1060
1061    if (c->is_video && c->opt_enc_hw)
1062        use_hw = 1;
1063
1064    if (!subtype)
1065        return AVERROR(ENOSYS);
1066
1067    c->main_subtype = *subtype;
1068
1069    if ((ret = mf_create(avctx, &c->functions, &c->mft, avctx->codec, use_hw)) < 0)
1070        return ret;
1071
1072    if ((ret = mf_unlock_async(avctx)) < 0)
1073        return ret;
1074
1075    hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1076    if (!FAILED(hr))
1077        av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1078
1079
1080    hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1081    if (hr == E_NOTIMPL) {
1082        c->in_stream_id = c->out_stream_id = 0;
1083    } else if (FAILED(hr)) {
1084        av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1085        return AVERROR_EXTERNAL;
1086    }
1087
1088    if ((ret = mf_negotiate_types(avctx)) < 0)
1089        return ret;
1090
1091    if ((ret = mf_setup_context(avctx)) < 0)
1092        return ret;
1093
1094    hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1095    if (FAILED(hr)) {
1096        av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1097        return AVERROR_EXTERNAL;
1098    }
1099
1100    hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1101    if (FAILED(hr)) {
1102        av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1103        return AVERROR_EXTERNAL;
1104    }
1105
1106    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1107        c->is_video && !avctx->extradata) {
1108        int sleep = 10000, total = 0;
1109        av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1110        while (total < 70*1000) {
1111            // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1112            // immediately, but it becomes available soon after init (without
1113            // any waitable event). In practice, it's available after less
1114            // than 10 ms, but wait for up to 70 ms before giving up.
1115            // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1116            // of the QSV H264 encoder at least) don't provide extradata this
1117            // way at all, not even after encoding a frame - it's only
1118            // available prepended to frames.
1119            av_usleep(sleep);
1120            total += sleep;
1121            mf_output_type_get(avctx);
1122            if (avctx->extradata)
1123                break;
1124            sleep *= 2;
1125        }
1126        av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1127               avctx->extradata ? "Got" : "Didn't get", total / 1000);
1128    }
1129
1130    return 0;
1131}
1132
1133#if !HAVE_UWP
1134#define LOAD_MF_FUNCTION(context, func_name) \
1135    context->functions.func_name = (void *)dlsym(context->library, #func_name); \
1136    if (!context->functions.func_name) { \
1137        av_log(context, AV_LOG_ERROR, "DLL mfplat.dll failed to find function "\
1138           #func_name "\n"); \
1139        return AVERROR_UNKNOWN; \
1140    }
1141#else
1142// In UWP (which lacks LoadLibrary), just link directly against
1143// the functions - this requires building with new/complete enough
1144// import libraries.
1145#define LOAD_MF_FUNCTION(context, func_name) \
1146    context->functions.func_name = func_name; \
1147    if (!context->functions.func_name) { \
1148        av_log(context, AV_LOG_ERROR, "Failed to find function " #func_name \
1149               "\n"); \
1150        return AVERROR_UNKNOWN; \
1151    }
1152#endif
1153
1154// Windows N editions does not provide MediaFoundation by default.
1155// So to avoid DLL loading error, MediaFoundation is dynamically loaded except
1156// on UWP build since LoadLibrary is not available on it.
1157static int mf_load_library(AVCodecContext *avctx)
1158{
1159    MFContext *c = avctx->priv_data;
1160
1161#if !HAVE_UWP
1162    c->library = dlopen("mfplat.dll", 0);
1163
1164    if (!c->library) {
1165        av_log(c, AV_LOG_ERROR, "DLL mfplat.dll failed to open\n");
1166        return AVERROR_UNKNOWN;
1167    }
1168#endif
1169
1170    LOAD_MF_FUNCTION(c, MFStartup);
1171    LOAD_MF_FUNCTION(c, MFShutdown);
1172    LOAD_MF_FUNCTION(c, MFCreateAlignedMemoryBuffer);
1173    LOAD_MF_FUNCTION(c, MFCreateSample);
1174    LOAD_MF_FUNCTION(c, MFCreateMediaType);
1175    // MFTEnumEx is missing in Windows Vista's mfplat.dll.
1176    LOAD_MF_FUNCTION(c, MFTEnumEx);
1177
1178    return 0;
1179}
1180
1181static int mf_close(AVCodecContext *avctx)
1182{
1183    MFContext *c = avctx->priv_data;
1184
1185    if (c->codec_api)
1186        ICodecAPI_Release(c->codec_api);
1187
1188    if (c->async_events)
1189        IMFMediaEventGenerator_Release(c->async_events);
1190
1191#if !HAVE_UWP
1192    if (c->library)
1193        ff_free_mf(&c->functions, &c->mft);
1194
1195    dlclose(c->library);
1196    c->library = NULL;
1197#else
1198    ff_free_mf(&c->functions, &c->mft);
1199#endif
1200
1201    av_frame_free(&c->frame);
1202
1203    av_freep(&avctx->extradata);
1204    avctx->extradata_size = 0;
1205
1206    return 0;
1207}
1208
1209static int mf_init(AVCodecContext *avctx)
1210{
1211    int ret;
1212    if ((ret = mf_load_library(avctx)) == 0) {
1213           if ((ret = mf_init_encoder(avctx)) == 0) {
1214                return 0;
1215        }
1216    }
1217    mf_close(avctx);
1218    return ret;
1219}
1220
1221#define OFFSET(x) offsetof(MFContext, x)
1222
1223#define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, FMTS, CAPS) \
1224    static const AVClass ff_ ## NAME ## _mf_encoder_class = {                  \
1225        .class_name = #NAME "_mf",                                             \
1226        .item_name  = av_default_item_name,                                    \
1227        .option     = OPTS,                                                    \
1228        .version    = LIBAVUTIL_VERSION_INT,                                   \
1229    };                                                                         \
1230    const FFCodec ff_ ## NAME ## _mf_encoder = {                               \
1231        .p.priv_class   = &ff_ ## NAME ## _mf_encoder_class,                   \
1232        .p.name         = #NAME "_mf",                                         \
1233        .p.long_name    = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"),    \
1234        .p.type         = AVMEDIA_TYPE_ ## MEDIATYPE,                          \
1235        .p.id           = AV_CODEC_ID_ ## ID,                                  \
1236        .priv_data_size = sizeof(MFContext),                                   \
1237        .init           = mf_init,                                             \
1238        .close          = mf_close,                                            \
1239        FF_CODEC_RECEIVE_PACKET_CB(mf_receive_packet),                         \
1240        FMTS                                                                   \
1241        CAPS                                                                   \
1242        .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |                       \
1243                          FF_CODEC_CAP_INIT_CLEANUP,                           \
1244    };
1245
1246#define AFMTS \
1247        .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,    \
1248                                                         AV_SAMPLE_FMT_NONE },
1249#define ACAPS \
1250        .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1251                          AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE,
1252
1253MF_ENCODER(AUDIO, aac,         AAC, NULL, AFMTS, ACAPS);
1254MF_ENCODER(AUDIO, ac3,         AC3, NULL, AFMTS, ACAPS);
1255MF_ENCODER(AUDIO, mp3,         MP3, NULL, AFMTS, ACAPS);
1256
1257#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1258static const AVOption venc_opts[] = {
1259    {"rate_control",  "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
1260    { "default",      "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
1261    { "cbr",          "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
1262    { "pc_vbr",       "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
1263    { "u_vbr",        "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
1264    { "quality",      "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
1265    // The following rate_control modes require Windows 8.
1266    { "ld_vbr",       "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
1267    { "g_vbr",        "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
1268    { "gld_vbr",      "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
1269
1270    {"scenario",          "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
1271    { "default",          "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
1272    { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
1273    { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
1274    { "archive",          "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
1275    { "live_streaming",   "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
1276    { "camera_record",    "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
1277    { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
1278
1279    {"quality",       "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1280    {"hw_encoding",   "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1281    {NULL}
1282};
1283
1284#define VFMTS \
1285        .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,       \
1286                                                        AV_PIX_FMT_YUV420P,    \
1287                                                        AV_PIX_FMT_NONE },
1288#define VCAPS \
1289        .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1290                          AV_CODEC_CAP_DR1,
1291
1292MF_ENCODER(VIDEO, h264,        H264, venc_opts, VFMTS, VCAPS);
1293MF_ENCODER(VIDEO, hevc,        HEVC, venc_opts, VFMTS, VCAPS);
1294