1/*
2 * Intel MediaSDK QSV based HEVC encoder
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21
22#include <stdint.h>
23#include <sys/types.h>
24
25#include <mfx/mfxvideo.h>
26
27#include "libavutil/common.h"
28#include "libavutil/opt.h"
29
30#include "avcodec.h"
31#include "bytestream.h"
32#include "codec_internal.h"
33#include "get_bits.h"
34#include "hevc.h"
35#include "hevcdec.h"
36#include "h2645_parse.h"
37#include "qsv.h"
38#include "qsv_internal.h"
39#include "qsvenc.h"
40
41enum LoadPlugin {
42    LOAD_PLUGIN_NONE,
43    LOAD_PLUGIN_HEVC_SW,
44    LOAD_PLUGIN_HEVC_HW,
45};
46
47typedef struct QSVHEVCEncContext {
48    AVClass *class;
49    QSVEncContext qsv;
50    int load_plugin;
51} QSVHEVCEncContext;
52
53static int generate_fake_vps(QSVEncContext *q, AVCodecContext *avctx)
54{
55    GetByteContext gbc;
56    PutByteContext pbc;
57
58    GetBitContext gb;
59    H2645RBSP sps_rbsp = { NULL };
60    H2645NAL sps_nal = { NULL };
61    HEVCSPS sps = { 0 };
62    HEVCVPS vps = { 0 };
63    uint8_t vps_buf[128], vps_rbsp_buf[128];
64    uint8_t *new_extradata;
65    unsigned int sps_id;
66    int ret, i, type, vps_size;
67
68    if (!avctx->extradata_size) {
69        av_log(avctx, AV_LOG_ERROR, "No extradata returned from libmfx\n");
70        return AVERROR_UNKNOWN;
71    }
72
73    av_fast_padded_malloc(&sps_rbsp.rbsp_buffer, &sps_rbsp.rbsp_buffer_alloc_size, avctx->extradata_size);
74    if (!sps_rbsp.rbsp_buffer)
75        return AVERROR(ENOMEM);
76
77    /* parse the SPS */
78    ret = ff_h2645_extract_rbsp(avctx->extradata + 4, avctx->extradata_size - 4, &sps_rbsp, &sps_nal, 1);
79    if (ret < 0) {
80        av_log(avctx, AV_LOG_ERROR, "Error unescaping the SPS buffer\n");
81        return ret;
82    }
83
84    ret = init_get_bits8(&gb, sps_nal.data, sps_nal.size);
85    if (ret < 0) {
86        av_freep(&sps_rbsp.rbsp_buffer);
87        return ret;
88    }
89
90    get_bits(&gb, 1);
91    type = get_bits(&gb, 6);
92    if (type != HEVC_NAL_SPS) {
93        av_log(avctx, AV_LOG_ERROR, "Unexpected NAL type in the extradata: %d\n",
94               type);
95        av_freep(&sps_rbsp.rbsp_buffer);
96        return AVERROR_INVALIDDATA;
97    }
98    get_bits(&gb, 9);
99
100    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, NULL, avctx);
101    av_freep(&sps_rbsp.rbsp_buffer);
102    if (ret < 0) {
103        av_log(avctx, AV_LOG_ERROR, "Error parsing the SPS\n");
104        return ret;
105    }
106
107    /* generate the VPS */
108    vps.vps_max_layers     = 1;
109    vps.vps_max_sub_layers = sps.max_sub_layers;
110    vps.vps_temporal_id_nesting_flag = sps.temporal_id_nesting_flag;
111    memcpy(&vps.ptl, &sps.ptl, sizeof(vps.ptl));
112    vps.vps_sub_layer_ordering_info_present_flag = 1;
113    for (i = 0; i < HEVC_MAX_SUB_LAYERS; i++) {
114        vps.vps_max_dec_pic_buffering[i] = sps.temporal_layer[i].max_dec_pic_buffering;
115        vps.vps_num_reorder_pics[i]      = sps.temporal_layer[i].num_reorder_pics;
116        vps.vps_max_latency_increase[i]  = sps.temporal_layer[i].max_latency_increase;
117    }
118
119    vps.vps_num_layer_sets                  = 1;
120    vps.vps_timing_info_present_flag        = sps.vui.vui_timing_info_present_flag;
121    vps.vps_num_units_in_tick               = sps.vui.vui_num_units_in_tick;
122    vps.vps_time_scale                      = sps.vui.vui_time_scale;
123    vps.vps_poc_proportional_to_timing_flag = sps.vui.vui_poc_proportional_to_timing_flag;
124    vps.vps_num_ticks_poc_diff_one          = sps.vui.vui_num_ticks_poc_diff_one_minus1 + 1;
125    vps.vps_num_hrd_parameters              = 0;
126
127    /* generate the encoded RBSP form of the VPS */
128    ret = ff_hevc_encode_nal_vps(&vps, sps.vps_id, vps_rbsp_buf, sizeof(vps_rbsp_buf));
129    if (ret < 0) {
130        av_log(avctx, AV_LOG_ERROR, "Error writing the VPS\n");
131        return ret;
132    }
133
134    /* escape and add the startcode */
135    bytestream2_init(&gbc, vps_rbsp_buf, ret);
136    bytestream2_init_writer(&pbc, vps_buf, sizeof(vps_buf));
137
138    bytestream2_put_be32(&pbc, 1);                 // startcode
139    bytestream2_put_byte(&pbc, HEVC_NAL_VPS << 1); // NAL
140    bytestream2_put_byte(&pbc, 1);                 // header
141
142    while (bytestream2_get_bytes_left(&gbc)) {
143        if (bytestream2_get_bytes_left(&gbc) >= 3 && bytestream2_peek_be24(&gbc) <= 3) {
144            bytestream2_put_be24(&pbc, 3);
145            bytestream2_skip(&gbc, 2);
146        } else
147            bytestream2_put_byte(&pbc, bytestream2_get_byte(&gbc));
148    }
149
150    vps_size = bytestream2_tell_p(&pbc);
151    new_extradata = av_mallocz(vps_size + avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
152    if (!new_extradata)
153        return AVERROR(ENOMEM);
154    memcpy(new_extradata, vps_buf, vps_size);
155    memcpy(new_extradata + vps_size, avctx->extradata, avctx->extradata_size);
156
157    av_freep(&avctx->extradata);
158    avctx->extradata       = new_extradata;
159    avctx->extradata_size += vps_size;
160
161    return 0;
162}
163
164static av_cold int qsv_enc_init(AVCodecContext *avctx)
165{
166    QSVHEVCEncContext *q = avctx->priv_data;
167    int ret;
168
169    if (q->load_plugin != LOAD_PLUGIN_NONE) {
170        static const char * const uid_hevcenc_sw = "2fca99749fdb49aeb121a5b63ef568f7";
171        static const char * const uid_hevcenc_hw = "6fadc791a0c2eb479ab6dcd5ea9da347";
172
173        if (q->qsv.load_plugins[0]) {
174            av_log(avctx, AV_LOG_WARNING,
175                   "load_plugins is not empty, but load_plugin is not set to 'none'."
176                   "The load_plugin value will be ignored.\n");
177        } else {
178            av_freep(&q->qsv.load_plugins);
179
180            if (q->load_plugin == LOAD_PLUGIN_HEVC_SW)
181                q->qsv.load_plugins = av_strdup(uid_hevcenc_sw);
182            else
183                q->qsv.load_plugins = av_strdup(uid_hevcenc_hw);
184
185            if (!q->qsv.load_plugins)
186                return AVERROR(ENOMEM);
187        }
188    }
189
190    // HEVC and H264 meaning of the value is shifted by 1, make it consistent
191    q->qsv.idr_interval++;
192
193    ret = ff_qsv_enc_init(avctx, &q->qsv);
194    if (ret < 0)
195        return ret;
196
197    if (!q->qsv.hevc_vps) {
198        ret = generate_fake_vps(&q->qsv, avctx);
199        if (ret < 0) {
200            ff_qsv_enc_close(avctx, &q->qsv);
201            return ret;
202        }
203    }
204
205    return 0;
206}
207
208static int qsv_enc_frame(AVCodecContext *avctx, AVPacket *pkt,
209                         const AVFrame *frame, int *got_packet)
210{
211    QSVHEVCEncContext *q = avctx->priv_data;
212
213    return ff_qsv_encode(avctx, &q->qsv, pkt, frame, got_packet);
214}
215
216static av_cold int qsv_enc_close(AVCodecContext *avctx)
217{
218    QSVHEVCEncContext *q = avctx->priv_data;
219
220    return ff_qsv_enc_close(avctx, &q->qsv);
221}
222
223#define OFFSET(x) offsetof(QSVHEVCEncContext, x)
224#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
225static const AVOption options[] = {
226    QSV_COMMON_OPTS
227    QSV_OPTION_RDO
228    QSV_OPTION_MAX_FRAME_SIZE
229    QSV_OPTION_MAX_SLICE_SIZE
230    QSV_OPTION_MBBRC
231    QSV_OPTION_EXTBRC
232    QSV_OPTION_P_STRATEGY
233    QSV_OPTION_B_STRATEGY
234    QSV_OPTION_DBLK_IDC
235    QSV_OPTION_LOW_DELAY_BRC
236    QSV_OPTION_MAX_MIN_QP
237
238    { "idr_interval", "Distance (in I-frames) between IDR frames", OFFSET(qsv.idr_interval), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE, "idr_interval" },
239    { "begin_only", "Output an IDR-frame only at the beginning of the stream", 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, VE, "idr_interval" },
240    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_HW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
241    { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VE, "load_plugin" },
242    { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VE, "load_plugin" },
243    { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VE, "load_plugin" },
244
245    { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session",
246        OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VE },
247
248    { "look_ahead_depth", "Depth of look ahead in number frames, available when extbrc option is enabled", OFFSET(qsv.look_ahead_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, VE },
249    { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
250    { "unknown", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN      }, INT_MIN, INT_MAX,     VE, "profile" },
251    { "main",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAIN    }, INT_MIN, INT_MAX,     VE, "profile" },
252    { "main10",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAIN10  }, INT_MIN, INT_MAX,     VE, "profile" },
253    { "mainsp",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAINSP  }, INT_MIN, INT_MAX,     VE, "profile" },
254    { "rext",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_REXT    }, INT_MIN, INT_MAX,     VE, "profile" },
255#if QSV_VERSION_ATLEAST(1, 32)
256    { "scc",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_SCC     }, INT_MIN, INT_MAX,     VE, "profile" },
257#endif
258
259    { "gpb", "1: GPB (generalized P/B frame); 0: regular P frame", OFFSET(qsv.gpb), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE},
260
261    { "tile_cols",  "Number of columns for tiled encoding",   OFFSET(qsv.tile_cols),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, UINT16_MAX, VE },
262    { "tile_rows",  "Number of rows for tiled encoding",      OFFSET(qsv.tile_rows),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, UINT16_MAX, VE },
263    { "recovery_point_sei", "Insert recovery point SEI messages",       OFFSET(qsv.recovery_point_sei),      AV_OPT_TYPE_INT, { .i64 = -1 },               -1,          1, VE },
264    { "aud", "Insert the Access Unit Delimiter NAL", OFFSET(qsv.aud), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
265    { "pic_timing_sei",    "Insert picture timing SEI with pic_struct_syntax element", OFFSET(qsv.pic_timing_sei), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
266    { "transform_skip", "Turn this option ON to enable transformskip",   OFFSET(qsv.transform_skip),          AV_OPT_TYPE_INT,    { .i64 = -1},   -1, 1,  VE},
267    { "int_ref_type", "Intra refresh type. B frames should be set to 0",         OFFSET(qsv.int_ref_type),            AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE, "int_ref_type" },
268        { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, .flags = VE, "int_ref_type" },
269        { "vertical", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, .flags = VE, "int_ref_type" },
270        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, .flags = VE, "int_ref_type" },
271    { "int_ref_cycle_size", "Number of frames in the intra refresh cycle",       OFFSET(qsv.int_ref_cycle_size),      AV_OPT_TYPE_INT, { .i64 = -1 },               -1, UINT16_MAX, VE },
272    { "int_ref_qp_delta",   "QP difference for the refresh MBs",                 OFFSET(qsv.int_ref_qp_delta),        AV_OPT_TYPE_INT, { .i64 = INT16_MIN }, INT16_MIN,  INT16_MAX, VE },
273    { "int_ref_cycle_dist",   "Distance between the beginnings of the intra-refresh cycles in frames",  OFFSET(qsv.int_ref_cycle_dist),      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT16_MAX, VE },
274
275    { NULL },
276};
277
278static const AVClass class = {
279    .class_name = "hevc_qsv encoder",
280    .item_name  = av_default_item_name,
281    .option     = options,
282    .version    = LIBAVUTIL_VERSION_INT,
283};
284
285static const FFCodecDefault qsv_enc_defaults[] = {
286    { "b",         "1M"    },
287    { "refs",      "0"     },
288    // same as the x264 default
289    { "g",         "248"   },
290    { "bf",        "-1"    },
291    { "qmin",      "-1"    },
292    { "qmax",      "-1"    },
293    { "trellis",   "-1"    },
294    { "flags",     "+cgop" },
295    { NULL },
296};
297
298const FFCodec ff_hevc_qsv_encoder = {
299    .p.name         = "hevc_qsv",
300    .p.long_name    = NULL_IF_CONFIG_SMALL("HEVC (Intel Quick Sync Video acceleration)"),
301    .priv_data_size = sizeof(QSVHEVCEncContext),
302    .p.type         = AVMEDIA_TYPE_VIDEO,
303    .p.id           = AV_CODEC_ID_HEVC,
304    .init           = qsv_enc_init,
305    FF_CODEC_ENCODE_CB(qsv_enc_frame),
306    .close          = qsv_enc_close,
307    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
308    .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
309                                                    AV_PIX_FMT_P010,
310                                                    AV_PIX_FMT_YUYV422,
311                                                    AV_PIX_FMT_Y210,
312                                                    AV_PIX_FMT_QSV,
313                                                    AV_PIX_FMT_BGRA,
314                                                    AV_PIX_FMT_X2RGB10,
315                                                    AV_PIX_FMT_NONE },
316    .p.priv_class   = &class,
317    .defaults       = qsv_enc_defaults,
318    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
319    .p.wrapper_name = "qsv",
320    .hw_configs     = ff_qsv_enc_hw_configs,
321};
322