1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) STMicroelectronics SA 2015 4 * Authors: Yannick Fertre <yannick.fertre@st.com> 5 * Hugues Fruchet <hugues.fruchet@st.com> 6 */ 7 8#include "hva.h" 9#include "hva-hw.h" 10 11#define MAX_SPS_PPS_SIZE 128 12 13#define BITSTREAM_OFFSET_MASK 0x7F 14 15/* video max size*/ 16#define H264_MAX_SIZE_W 1920 17#define H264_MAX_SIZE_H 1920 18 19/* macroBlocs number (width & height) */ 20#define MB_W(w) ((w + 0xF) / 0x10) 21#define MB_H(h) ((h + 0xF) / 0x10) 22 23/* formula to get temporal or spatial data size */ 24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16) 25 26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2) 27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16) 28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8) 29#define SLICE_HEADER_SIZE (4 * 16) 30#define BRC_DATA_SIZE (5 * 16) 31 32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */ 33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2) 34 35/* 36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB 37 * for deblocking with size=4*16*MBx*2 38 */ 39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2) 40 41/* factor for bitrate and cpb buffer size max values if profile >= high */ 42#define H264_FACTOR_HIGH 1200 43 44/* factor for bitrate and cpb buffer size max values if profile < high */ 45#define H264_FACTOR_BASELINE 1000 46 47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */ 48#define H264_FILLER_DATA_SIZE 6 49 50struct h264_profile { 51 enum v4l2_mpeg_video_h264_level level; 52 u32 max_mb_per_seconds; 53 u32 max_frame_size; 54 u32 max_bitrate; 55 u32 max_cpb_size; 56 u32 min_comp_ratio; 57}; 58 59static const struct h264_profile h264_infos_list[] = { 60 {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2}, 61 {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2}, 62 {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2}, 63 {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2}, 64 {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2}, 65 {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2}, 66 {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2}, 67 {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2}, 68 {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2}, 69 {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4}, 70 {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4}, 71 {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4}, 72 {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2}, 73 {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2}, 74 {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2}, 75 {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2} 76}; 77 78enum hva_brc_type { 79 BRC_TYPE_NONE = 0, 80 BRC_TYPE_CBR = 1, 81 BRC_TYPE_VBR = 2, 82 BRC_TYPE_VBR_LOW_DELAY = 3 83}; 84 85enum hva_entropy_coding_mode { 86 CAVLC = 0, 87 CABAC = 1 88}; 89 90enum hva_picture_coding_type { 91 PICTURE_CODING_TYPE_I = 0, 92 PICTURE_CODING_TYPE_P = 1, 93 PICTURE_CODING_TYPE_B = 2 94}; 95 96enum hva_h264_sampling_mode { 97 SAMPLING_MODE_NV12 = 0, 98 SAMPLING_MODE_UYVY = 1, 99 SAMPLING_MODE_RGB3 = 3, 100 SAMPLING_MODE_XRGB4 = 4, 101 SAMPLING_MODE_NV21 = 8, 102 SAMPLING_MODE_VYUY = 9, 103 SAMPLING_MODE_BGR3 = 11, 104 SAMPLING_MODE_XBGR4 = 12, 105 SAMPLING_MODE_RGBX4 = 20, 106 SAMPLING_MODE_BGRX4 = 28 107}; 108 109enum hva_h264_nalu_type { 110 NALU_TYPE_UNKNOWN = 0, 111 NALU_TYPE_SLICE = 1, 112 NALU_TYPE_SLICE_DPA = 2, 113 NALU_TYPE_SLICE_DPB = 3, 114 NALU_TYPE_SLICE_DPC = 4, 115 NALU_TYPE_SLICE_IDR = 5, 116 NALU_TYPE_SEI = 6, 117 NALU_TYPE_SPS = 7, 118 NALU_TYPE_PPS = 8, 119 NALU_TYPE_AU_DELIMITER = 9, 120 NALU_TYPE_SEQ_END = 10, 121 NALU_TYPE_STREAM_END = 11, 122 NALU_TYPE_FILLER_DATA = 12, 123 NALU_TYPE_SPS_EXT = 13, 124 NALU_TYPE_PREFIX_UNIT = 14, 125 NALU_TYPE_SUBSET_SPS = 15, 126 NALU_TYPE_SLICE_AUX = 19, 127 NALU_TYPE_SLICE_EXT = 20 128}; 129 130enum hva_h264_sei_payload_type { 131 SEI_BUFFERING_PERIOD = 0, 132 SEI_PICTURE_TIMING = 1, 133 SEI_STEREO_VIDEO_INFO = 21, 134 SEI_FRAME_PACKING_ARRANGEMENT = 45 135}; 136 137/* 138 * stereo Video Info struct 139 */ 140struct hva_h264_stereo_video_sei { 141 u8 field_views_flag; 142 u8 top_field_is_left_view_flag; 143 u8 current_frame_is_left_view_flag; 144 u8 next_frame_is_second_view_flag; 145 u8 left_view_self_contained_flag; 146 u8 right_view_self_contained_flag; 147}; 148 149/* 150 * struct hva_h264_td 151 * 152 * @frame_width: width in pixels of the buffer containing the input frame 153 * @frame_height: height in pixels of the buffer containing the input frame 154 * @frame_num: the parameter to be written in the slice header 155 * @picture_coding_type: type I, P or B 156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2 157 * @first_picture_in_sequence: flag telling to encoder that this is the 158 * first picture in a video sequence. 159 * Used for VBR 160 * @slice_size_type: 0 = no constraint to close the slice 161 * 1= a slice is closed as soon as the slice_mb_size limit 162 * is reached 163 * 2= a slice is closed as soon as the slice_byte_size limit 164 * is reached 165 * 3= a slice is closed as soon as either the slice_byte_size 166 * limit or the slice_mb_size limit is reached 167 * @slice_mb_size: defines the slice size in number of macroblocks 168 * (used when slice_size_type=1 or slice_size_type=3) 169 * @ir_param_option: defines the number of macroblocks per frame to be 170 * refreshed by AIR algorithm OR the refresh period 171 * by CIR algorithm 172 * @intra_refresh_type: enables the adaptive intra refresh algorithm. 173 * Disable=0 / Adaptative=1 and Cycle=2 as intra refresh 174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS 175 * @transform_mode: controls the use of 4x4/8x8 transform mode 176 * @disable_deblocking_filter_idc: 177 * 0: specifies that all luma and chroma block edges of 178 * the slice are filtered. 179 * 1: specifies that deblocking is disabled for all block 180 * edges of the slice. 181 * 2: specifies that all luma and chroma block edges of 182 * the slice are filtered with exception of the block edges 183 * that coincide with slice boundaries 184 * @slice_alpha_c0_offset_div2: to be written in slice header, 185 * controls deblocking 186 * @slice_beta_offset_div2: to be written in slice header, 187 * controls deblocking 188 * @encoder_complexity: encoder complexity control (IME). 189 * 0 = I_16x16, P_16x16, Full ME Complexity 190 * 1 = I_16x16, I_NxN, P_16x16, Full ME Complexity 191 * 2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity 192 * 4 = I_16x16, P_16x16, Reduced ME Complexity 193 * 5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity 194 * 6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity 195 * @chroma_qp_index_offset: coming from picture parameter set 196 * (PPS see [H.264 STD] 7.4.2.2) 197 * @entropy_coding_mode: entropy coding mode. 198 * 0 = CAVLC 199 * 1 = CABAC 200 * @brc_type: selects the bit-rate control algorithm 201 * 0 = constant Qp, (no BRC) 202 * 1 = CBR 203 * 2 = VBR 204 * @quant: Quantization param used in case of fix QP encoding (no BRC) 205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler), 206 * used by BRC 207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC 208 * @bit_rate: target bitrate, for BRC 209 * @qp_min: min QP threshold 210 * @qp_max: max QP threshold 211 * @framerate_num: target framerate numerator , used by BRC 212 * @framerate_den: target framerate denomurator , used by BRC 213 * @delay: End-to-End Initial Delay 214 * @strict_HRD_compliancy: flag for HDR compliancy (1) 215 * May impact quality encoding 216 * @addr_source_buffer: address of input frame buffer for current frame 217 * @addr_fwd_Ref_Buffer: address of reference frame buffer 218 * @addr_rec_buffer: address of reconstructed frame buffer 219 * @addr_output_bitstream_start: output bitstream start address 220 * @addr_output_bitstream_end: output bitstream end address 221 * @addr_external_sw : address of external search window 222 * @addr_lctx : address of context picture buffer 223 * @addr_local_rec_buffer: address of local reconstructed buffer 224 * @addr_spatial_context: address of spatial context buffer 225 * @bitstream_offset: offset in bits between aligned bitstream start 226 * address and first bit to be written by HVA. 227 * Range value is [0..63] 228 * @sampling_mode: Input picture format . 229 * 0: YUV420 semi_planar Interleaved 230 * 1: YUV422 raster Interleaved 231 * @addr_param_out: address of output parameters structure 232 * @addr_scaling_matrix: address to the coefficient of 233 * the inverse scaling matrix 234 * @addr_scaling_matrix_dir: address to the coefficient of 235 * the direct scaling matrix 236 * @addr_cabac_context_buffer: address of cabac context buffer 237 * @GmvX: Input information about the horizontal global displacement of 238 * the encoded frame versus the previous one 239 * @GmvY: Input information about the vertical global displacement of 240 * the encoded frame versus the previous one 241 * @window_width: width in pixels of the window to be encoded inside 242 * the input frame 243 * @window_height: width in pixels of the window to be encoded inside 244 * the input frame 245 * @window_horizontal_offset: horizontal offset in pels for input window 246 * within input frame 247 * @window_vertical_offset: vertical offset in pels for input window 248 * within input frame 249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and 250 * also used for Error map. 251 * Bit 0-6 used for qp offset (value -64 to 63). 252 * Bit 7 used to force intra 253 * @addr_slice_header: address to slice header 254 * @slice_header_size_in_bits: size in bits of the Slice header 255 * @slice_header_offset0: Slice header offset where to insert 256 * first_Mb_in_slice 257 * @slice_header_offset1: Slice header offset where to insert 258 * slice_qp_delta 259 * @slice_header_offset2: Slice header offset where to insert 260 * num_MBs_in_slice 261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice 262 * @max_slice_number: Maximum number of slice in a frame 263 * (0 is strictly forbidden) 264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to 265 * YUV for the Y component. 266 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to 268 * YUV for the Y component. 269 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to 271 * YUV for the U (Cb) component. 272 * U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0) 273 * @slice_byte_size: maximum slice size in bytes 274 * (used when slice_size_type=2 or slice_size_type=3) 275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame 276 * for the AIR algorithm 277 * @brc_no_skip: Disable skipping in the Bitrate Controller 278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters 279 */ 280struct hva_h264_td { 281 u16 frame_width; 282 u16 frame_height; 283 u32 frame_num; 284 u16 picture_coding_type; 285 u16 reserved1; 286 u16 pic_order_cnt_type; 287 u16 first_picture_in_sequence; 288 u16 slice_size_type; 289 u16 reserved2; 290 u32 slice_mb_size; 291 u16 ir_param_option; 292 u16 intra_refresh_type; 293 u16 use_constrained_intra_flag; 294 u16 transform_mode; 295 u16 disable_deblocking_filter_idc; 296 s16 slice_alpha_c0_offset_div2; 297 s16 slice_beta_offset_div2; 298 u16 encoder_complexity; 299 s16 chroma_qp_index_offset; 300 u16 entropy_coding_mode; 301 u16 brc_type; 302 u16 quant; 303 u32 non_vcl_nalu_size; 304 u32 cpb_buffer_size; 305 u32 bit_rate; 306 u16 qp_min; 307 u16 qp_max; 308 u16 framerate_num; 309 u16 framerate_den; 310 u16 delay; 311 u16 strict_hrd_compliancy; 312 u32 addr_source_buffer; 313 u32 addr_fwd_ref_buffer; 314 u32 addr_rec_buffer; 315 u32 addr_output_bitstream_start; 316 u32 addr_output_bitstream_end; 317 u32 addr_external_sw; 318 u32 addr_lctx; 319 u32 addr_local_rec_buffer; 320 u32 addr_spatial_context; 321 u16 bitstream_offset; 322 u16 sampling_mode; 323 u32 addr_param_out; 324 u32 addr_scaling_matrix; 325 u32 addr_scaling_matrix_dir; 326 u32 addr_cabac_context_buffer; 327 u32 reserved3; 328 u32 reserved4; 329 s16 gmv_x; 330 s16 gmv_y; 331 u16 window_width; 332 u16 window_height; 333 u16 window_horizontal_offset; 334 u16 window_vertical_offset; 335 u32 addr_roi; 336 u32 addr_slice_header; 337 u16 slice_header_size_in_bits; 338 u16 slice_header_offset0; 339 u16 slice_header_offset1; 340 u16 slice_header_offset2; 341 u32 reserved5; 342 u32 reserved6; 343 u16 reserved7; 344 u16 reserved8; 345 u16 slice_synchro_enable; 346 u16 max_slice_number; 347 u32 rgb2_yuv_y_coeff; 348 u32 rgb2_yuv_u_coeff; 349 u32 rgb2_yuv_v_coeff; 350 u32 slice_byte_size; 351 u16 max_air_intra_mb_nb; 352 u16 brc_no_skip; 353 u32 addr_temporal_context; 354 u32 addr_brc_in_out_parameter; 355}; 356 357/* 358 * struct hva_h264_slice_po 359 * 360 * @ slice_size: slice size 361 * @ slice_start_time: start time 362 * @ slice_stop_time: stop time 363 * @ slice_num: slice number 364 */ 365struct hva_h264_slice_po { 366 u32 slice_size; 367 u32 slice_start_time; 368 u32 slice_end_time; 369 u32 slice_num; 370}; 371 372/* 373 * struct hva_h264_po 374 * 375 * @ bitstream_size: bitstream size 376 * @ dct_bitstream_size: dtc bitstream size 377 * @ stuffing_bits: number of stuffing bits inserted by the encoder 378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate) 379 * @ hvc_start_time: hvc start time 380 * @ hvc_stop_time: hvc stop time 381 * @ slice_count: slice count 382 */ 383struct hva_h264_po { 384 u32 bitstream_size; 385 u32 dct_bitstream_size; 386 u32 stuffing_bits; 387 u32 removal_time; 388 u32 hvc_start_time; 389 u32 hvc_stop_time; 390 u32 slice_count; 391 u32 reserved0; 392 struct hva_h264_slice_po slice_params[16]; 393}; 394 395struct hva_h264_task { 396 struct hva_h264_td td; 397 struct hva_h264_po po; 398}; 399 400/* 401 * struct hva_h264_ctx 402 * 403 * @seq_info: sequence information buffer 404 * @ref_frame: reference frame buffer 405 * @rec_frame: reconstructed frame buffer 406 * @task: task descriptor 407 */ 408struct hva_h264_ctx { 409 struct hva_buffer *seq_info; 410 struct hva_buffer *ref_frame; 411 struct hva_buffer *rec_frame; 412 struct hva_buffer *task; 413}; 414 415static int hva_h264_fill_slice_header(struct hva_ctx *pctx, 416 u8 *slice_header_addr, 417 struct hva_controls *ctrls, 418 int frame_num, 419 u16 *header_size, 420 u16 *header_offset0, 421 u16 *header_offset1, 422 u16 *header_offset2) 423{ 424 /* 425 * with this HVA hardware version, part of the slice header is computed 426 * on host and part by hardware. 427 * The part of host is precomputed and available through this array. 428 */ 429 struct device *dev = ctx_to_dev(pctx); 430 int cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC; 431 const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01, 432 0x41, 0x34, 0x07, 0x00}; 433 int idr_pic_id = frame_num % 2; 434 enum hva_picture_coding_type type; 435 u32 frame_order = frame_num % ctrls->gop_size; 436 437 if (!(frame_num % ctrls->gop_size)) 438 type = PICTURE_CODING_TYPE_I; 439 else 440 type = PICTURE_CODING_TYPE_P; 441 442 memcpy(slice_header_addr, slice_header, sizeof(slice_header)); 443 444 *header_size = 56; 445 *header_offset0 = 40; 446 *header_offset1 = 13; 447 *header_offset2 = 0; 448 449 if (type == PICTURE_CODING_TYPE_I) { 450 slice_header_addr[4] = 0x65; 451 slice_header_addr[5] = 0x11; 452 453 /* toggle the I frame */ 454 if ((frame_num / ctrls->gop_size) % 2) { 455 *header_size += 4; 456 *header_offset1 += 4; 457 slice_header_addr[6] = 0x04; 458 slice_header_addr[7] = 0x70; 459 460 } else { 461 *header_size += 2; 462 *header_offset1 += 2; 463 slice_header_addr[6] = 0x09; 464 slice_header_addr[7] = 0xC0; 465 } 466 } else { 467 if (ctrls->entropy_mode == cabac) { 468 *header_size += 1; 469 *header_offset1 += 1; 470 slice_header_addr[7] = 0x80; 471 } 472 /* 473 * update slice header with P frame order 474 * frame order is limited to 16 (coded on 4bits only) 475 */ 476 slice_header_addr[5] += ((frame_order & 0x0C) >> 2); 477 slice_header_addr[6] += ((frame_order & 0x03) << 6); 478 } 479 480 dev_dbg(dev, 481 "%s %s slice header order %d idrPicId %d header size %d\n", 482 pctx->name, __func__, frame_order, idr_pic_id, *header_size); 483 return 0; 484} 485 486static int hva_h264_fill_data_nal(struct hva_ctx *pctx, 487 unsigned int stuffing_bytes, u8 *addr, 488 unsigned int stream_size, unsigned int *size) 489{ 490 struct device *dev = ctx_to_dev(pctx); 491 const u8 start[] = { 0x00, 0x00, 0x00, 0x01 }; 492 493 dev_dbg(dev, "%s %s stuffing bytes %d\n", pctx->name, __func__, 494 stuffing_bytes); 495 496 if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) { 497 dev_dbg(dev, "%s %s too many stuffing bytes %d\n", 498 pctx->name, __func__, stuffing_bytes); 499 return 0; 500 } 501 502 /* start code */ 503 memcpy(addr + *size, start, sizeof(start)); 504 *size += sizeof(start); 505 506 /* nal_unit_type */ 507 addr[*size] = NALU_TYPE_FILLER_DATA; 508 *size += 1; 509 510 memset(addr + *size, 0xff, stuffing_bytes); 511 *size += stuffing_bytes; 512 513 addr[*size] = 0x80; 514 *size += 1; 515 516 return 0; 517} 518 519static int hva_h264_fill_sei_nal(struct hva_ctx *pctx, 520 enum hva_h264_sei_payload_type type, 521 u8 *addr, u32 *size) 522{ 523 struct device *dev = ctx_to_dev(pctx); 524 const u8 start[] = { 0x00, 0x00, 0x00, 0x01 }; 525 struct hva_h264_stereo_video_sei info; 526 u8 offset = 7; 527 u8 msg = 0; 528 529 /* start code */ 530 memcpy(addr + *size, start, sizeof(start)); 531 *size += sizeof(start); 532 533 /* nal_unit_type */ 534 addr[*size] = NALU_TYPE_SEI; 535 *size += 1; 536 537 /* payload type */ 538 addr[*size] = type; 539 *size += 1; 540 541 switch (type) { 542 case SEI_STEREO_VIDEO_INFO: 543 memset(&info, 0, sizeof(info)); 544 545 /* set to top/bottom frame packing arrangement */ 546 info.field_views_flag = 1; 547 info.top_field_is_left_view_flag = 1; 548 549 /* payload size */ 550 addr[*size] = 1; 551 *size += 1; 552 553 /* payload */ 554 msg = info.field_views_flag << offset--; 555 556 if (info.field_views_flag) { 557 msg |= info.top_field_is_left_view_flag << 558 offset--; 559 } else { 560 msg |= info.current_frame_is_left_view_flag << 561 offset--; 562 msg |= info.next_frame_is_second_view_flag << 563 offset--; 564 } 565 msg |= info.left_view_self_contained_flag << offset--; 566 msg |= info.right_view_self_contained_flag << offset--; 567 568 addr[*size] = msg; 569 *size += 1; 570 571 addr[*size] = 0x80; 572 *size += 1; 573 574 return 0; 575 case SEI_BUFFERING_PERIOD: 576 case SEI_PICTURE_TIMING: 577 case SEI_FRAME_PACKING_ARRANGEMENT: 578 default: 579 dev_err(dev, "%s sei nal type not supported %d\n", 580 pctx->name, type); 581 return -EINVAL; 582 } 583} 584 585static int hva_h264_prepare_task(struct hva_ctx *pctx, 586 struct hva_h264_task *task, 587 struct hva_frame *frame, 588 struct hva_stream *stream) 589{ 590 struct hva_dev *hva = ctx_to_hdev(pctx); 591 struct device *dev = ctx_to_dev(pctx); 592 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv; 593 struct hva_buffer *seq_info = ctx->seq_info; 594 struct hva_buffer *fwd_ref_frame = ctx->ref_frame; 595 struct hva_buffer *loc_rec_frame = ctx->rec_frame; 596 struct hva_h264_td *td = &task->td; 597 struct hva_controls *ctrls = &pctx->ctrls; 598 struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame; 599 int cavlc = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC; 600 u32 frame_num = pctx->stream_num; 601 u32 addr_esram = hva->esram_addr; 602 enum v4l2_mpeg_video_h264_level level; 603 dma_addr_t paddr = 0; 604 u8 *slice_header_vaddr; 605 u32 frame_width = frame->info.aligned_width; 606 u32 frame_height = frame->info.aligned_height; 607 u32 max_cpb_buffer_size; 608 unsigned int payload = stream->bytesused; 609 u32 max_bitrate; 610 611 /* check width and height parameters */ 612 if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) || 613 (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) { 614 dev_err(dev, 615 "%s width(%d) or height(%d) exceeds limits (%dx%d)\n", 616 pctx->name, frame_width, frame_height, 617 H264_MAX_SIZE_W, H264_MAX_SIZE_H); 618 pctx->frame_errors++; 619 return -EINVAL; 620 } 621 622 level = ctrls->level; 623 624 memset(td, 0, sizeof(struct hva_h264_td)); 625 626 td->frame_width = frame_width; 627 td->frame_height = frame_height; 628 629 /* set frame alignment */ 630 td->window_width = frame_width; 631 td->window_height = frame_height; 632 td->window_horizontal_offset = 0; 633 td->window_vertical_offset = 0; 634 635 td->first_picture_in_sequence = (!frame_num) ? 1 : 0; 636 637 /* pic_order_cnt_type hard coded to '2' as only I & P frames */ 638 td->pic_order_cnt_type = 2; 639 640 /* useConstrainedIntraFlag set to false for better coding efficiency */ 641 td->use_constrained_intra_flag = false; 642 td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) 643 ? BRC_TYPE_CBR : BRC_TYPE_VBR; 644 645 td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC : 646 CABAC; 647 648 td->bit_rate = ctrls->bitrate; 649 650 /* set framerate, framerate = 1 n/ time per frame */ 651 if (time_per_frame->numerator >= 536) { 652 /* 653 * due to a hardware bug, framerate denominator can't exceed 654 * 536 (BRC overflow). Compute nearest framerate 655 */ 656 td->framerate_den = 1; 657 td->framerate_num = (time_per_frame->denominator + 658 (time_per_frame->numerator >> 1) - 1) / 659 time_per_frame->numerator; 660 661 /* 662 * update bitrate to introduce a correction due to 663 * the new framerate 664 * new bitrate = (old bitrate * new framerate) / old framerate 665 */ 666 td->bit_rate /= time_per_frame->numerator; 667 td->bit_rate *= time_per_frame->denominator; 668 td->bit_rate /= td->framerate_num; 669 } else { 670 td->framerate_den = time_per_frame->numerator; 671 td->framerate_num = time_per_frame->denominator; 672 } 673 674 /* compute maximum bitrate depending on profile */ 675 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH) 676 max_bitrate = h264_infos_list[level].max_bitrate * 677 H264_FACTOR_HIGH; 678 else 679 max_bitrate = h264_infos_list[level].max_bitrate * 680 H264_FACTOR_BASELINE; 681 682 /* check if bitrate doesn't exceed max size */ 683 if (td->bit_rate > max_bitrate) { 684 dev_dbg(dev, 685 "%s bitrate (%d) larger than level and profile allow, clip to %d\n", 686 pctx->name, td->bit_rate, max_bitrate); 687 td->bit_rate = max_bitrate; 688 } 689 690 /* convert cpb_buffer_size in bits */ 691 td->cpb_buffer_size = ctrls->cpb_size * 8000; 692 693 /* compute maximum cpb buffer size depending on profile */ 694 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH) 695 max_cpb_buffer_size = 696 h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH; 697 else 698 max_cpb_buffer_size = 699 h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE; 700 701 /* check if cpb buffer size doesn't exceed max size */ 702 if (td->cpb_buffer_size > max_cpb_buffer_size) { 703 dev_dbg(dev, 704 "%s cpb size larger than level %d allows, clip to %d\n", 705 pctx->name, td->cpb_buffer_size, max_cpb_buffer_size); 706 td->cpb_buffer_size = max_cpb_buffer_size; 707 } 708 709 /* enable skipping in the Bitrate Controller */ 710 td->brc_no_skip = 0; 711 712 /* initial delay */ 713 if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) && 714 td->bit_rate) 715 td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate); 716 else 717 td->delay = 0; 718 719 switch (frame->info.pixelformat) { 720 case V4L2_PIX_FMT_NV12: 721 td->sampling_mode = SAMPLING_MODE_NV12; 722 break; 723 case V4L2_PIX_FMT_NV21: 724 td->sampling_mode = SAMPLING_MODE_NV21; 725 break; 726 default: 727 dev_err(dev, "%s invalid source pixel format\n", 728 pctx->name); 729 pctx->frame_errors++; 730 return -EINVAL; 731 } 732 733 /* 734 * fill matrix color converter (RGB to YUV) 735 * Y = 0,299 R + 0,587 G + 0,114 B 736 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128 737 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128 738 */ 739 td->rgb2_yuv_y_coeff = 0x12031008; 740 td->rgb2_yuv_u_coeff = 0x800EF7FB; 741 td->rgb2_yuv_v_coeff = 0x80FEF40E; 742 743 /* enable/disable transform mode */ 744 td->transform_mode = ctrls->dct8x8; 745 746 /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */ 747 td->encoder_complexity = 2; 748 749 /* quant fix to 28, default VBR value */ 750 td->quant = 28; 751 752 if (td->framerate_den == 0) { 753 dev_err(dev, "%s invalid framerate\n", pctx->name); 754 pctx->frame_errors++; 755 return -EINVAL; 756 } 757 758 /* if automatic framerate, deactivate bitrate controller */ 759 if (td->framerate_num == 0) 760 td->brc_type = 0; 761 762 /* compliancy fix to true */ 763 td->strict_hrd_compliancy = 1; 764 765 /* set minimum & maximum quantizers */ 766 td->qp_min = clamp_val(ctrls->qpmin, 0, 51); 767 td->qp_max = clamp_val(ctrls->qpmax, 0, 51); 768 769 td->addr_source_buffer = frame->paddr; 770 td->addr_fwd_ref_buffer = fwd_ref_frame->paddr; 771 td->addr_rec_buffer = loc_rec_frame->paddr; 772 773 td->addr_output_bitstream_end = (u32)stream->paddr + stream->size; 774 775 td->addr_output_bitstream_start = (u32)stream->paddr; 776 td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) & 777 BITSTREAM_OFFSET_MASK; 778 779 td->addr_param_out = (u32)ctx->task->paddr + 780 offsetof(struct hva_h264_task, po); 781 782 /* swap spatial and temporal context */ 783 if (frame_num % 2) { 784 paddr = seq_info->paddr; 785 td->addr_spatial_context = ALIGN(paddr, 0x100); 786 paddr = seq_info->paddr + DATA_SIZE(frame_width, 787 frame_height); 788 td->addr_temporal_context = ALIGN(paddr, 0x100); 789 } else { 790 paddr = seq_info->paddr; 791 td->addr_temporal_context = ALIGN(paddr, 0x100); 792 paddr = seq_info->paddr + DATA_SIZE(frame_width, 793 frame_height); 794 td->addr_spatial_context = ALIGN(paddr, 0x100); 795 } 796 797 paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height); 798 799 td->addr_brc_in_out_parameter = ALIGN(paddr, 0x100); 800 801 paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE; 802 td->addr_slice_header = ALIGN(paddr, 0x100); 803 td->addr_external_sw = ALIGN(addr_esram, 0x100); 804 805 addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width); 806 td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100); 807 808 addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width); 809 td->addr_lctx = ALIGN(addr_esram, 0x100); 810 811 addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)); 812 td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100); 813 814 if (!(frame_num % ctrls->gop_size)) { 815 td->picture_coding_type = PICTURE_CODING_TYPE_I; 816 stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME; 817 } else { 818 td->picture_coding_type = PICTURE_CODING_TYPE_P; 819 stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME; 820 } 821 822 /* fill the slice header part */ 823 slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header - 824 seq_info->paddr); 825 826 hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num, 827 &td->slice_header_size_in_bits, 828 &td->slice_header_offset0, 829 &td->slice_header_offset1, 830 &td->slice_header_offset2); 831 832 td->chroma_qp_index_offset = 2; 833 td->slice_synchro_enable = 0; 834 td->max_slice_number = 1; 835 836 /* 837 * check the sps/pps header size for key frame only 838 * sps/pps header was previously fill by libv4l 839 * during qbuf of stream buffer 840 */ 841 if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) && 842 (payload > MAX_SPS_PPS_SIZE)) { 843 dev_err(dev, "%s invalid sps/pps size %d\n", pctx->name, 844 payload); 845 pctx->frame_errors++; 846 return -EINVAL; 847 } 848 849 if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME) 850 payload = 0; 851 852 /* add SEI nal (video stereo info) */ 853 if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO, 854 (u8 *)stream->vaddr, 855 &payload)) { 856 dev_err(dev, "%s fail to get SEI nal\n", pctx->name); 857 pctx->frame_errors++; 858 return -EINVAL; 859 } 860 861 /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */ 862 td->non_vcl_nalu_size = payload * 8; 863 864 /* compute bitstream offset & new start address of bitstream */ 865 td->addr_output_bitstream_start += ((payload >> 4) << 4); 866 td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8; 867 868 stream->bytesused = payload; 869 870 return 0; 871} 872 873static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task) 874{ 875 struct hva_h264_po *po = &task->po; 876 877 return po->bitstream_size; 878} 879 880static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task) 881{ 882 struct hva_h264_po *po = &task->po; 883 884 return po->stuffing_bits >> 3; 885} 886 887static int hva_h264_open(struct hva_ctx *pctx) 888{ 889 struct device *dev = ctx_to_dev(pctx); 890 struct hva_h264_ctx *ctx; 891 struct hva_dev *hva = ctx_to_hdev(pctx); 892 u32 frame_width = pctx->frameinfo.aligned_width; 893 u32 frame_height = pctx->frameinfo.aligned_height; 894 u32 size; 895 int ret; 896 897 /* check esram size necessary to encode a frame */ 898 size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) + 899 LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) + 900 CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) + 901 CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width); 902 903 if (hva->esram_size < size) { 904 dev_err(dev, "%s not enough esram (max:%d request:%d)\n", 905 pctx->name, hva->esram_size, size); 906 ret = -EINVAL; 907 goto err; 908 } 909 910 /* allocate context for codec */ 911 ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); 912 if (!ctx) { 913 ret = -ENOMEM; 914 goto err; 915 } 916 917 /* allocate sequence info buffer */ 918 ret = hva_mem_alloc(pctx, 919 2 * DATA_SIZE(frame_width, frame_height) + 920 SLICE_HEADER_SIZE + 921 BRC_DATA_SIZE, 922 "hva sequence info", 923 &ctx->seq_info); 924 if (ret) { 925 dev_err(dev, 926 "%s failed to allocate sequence info buffer\n", 927 pctx->name); 928 goto err_ctx; 929 } 930 931 /* allocate reference frame buffer */ 932 ret = hva_mem_alloc(pctx, 933 frame_width * frame_height * 3 / 2, 934 "hva reference frame", 935 &ctx->ref_frame); 936 if (ret) { 937 dev_err(dev, "%s failed to allocate reference frame buffer\n", 938 pctx->name); 939 goto err_seq_info; 940 } 941 942 /* allocate reconstructed frame buffer */ 943 ret = hva_mem_alloc(pctx, 944 frame_width * frame_height * 3 / 2, 945 "hva reconstructed frame", 946 &ctx->rec_frame); 947 if (ret) { 948 dev_err(dev, 949 "%s failed to allocate reconstructed frame buffer\n", 950 pctx->name); 951 goto err_ref_frame; 952 } 953 954 /* allocate task descriptor */ 955 ret = hva_mem_alloc(pctx, 956 sizeof(struct hva_h264_task), 957 "hva task descriptor", 958 &ctx->task); 959 if (ret) { 960 dev_err(dev, 961 "%s failed to allocate task descriptor\n", 962 pctx->name); 963 goto err_rec_frame; 964 } 965 966 pctx->priv = (void *)ctx; 967 968 return 0; 969 970err_rec_frame: 971 hva_mem_free(pctx, ctx->rec_frame); 972err_ref_frame: 973 hva_mem_free(pctx, ctx->ref_frame); 974err_seq_info: 975 hva_mem_free(pctx, ctx->seq_info); 976err_ctx: 977 devm_kfree(dev, ctx); 978err: 979 pctx->sys_errors++; 980 return ret; 981} 982 983static int hva_h264_close(struct hva_ctx *pctx) 984{ 985 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv; 986 struct device *dev = ctx_to_dev(pctx); 987 988 if (ctx->seq_info) 989 hva_mem_free(pctx, ctx->seq_info); 990 991 if (ctx->ref_frame) 992 hva_mem_free(pctx, ctx->ref_frame); 993 994 if (ctx->rec_frame) 995 hva_mem_free(pctx, ctx->rec_frame); 996 997 if (ctx->task) 998 hva_mem_free(pctx, ctx->task); 999 1000 devm_kfree(dev, ctx); 1001 1002 return 0; 1003} 1004 1005static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame, 1006 struct hva_stream *stream) 1007{ 1008 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv; 1009 struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr; 1010 u32 stuffing_bytes = 0; 1011 int ret = 0; 1012 1013 ret = hva_h264_prepare_task(pctx, task, frame, stream); 1014 if (ret) 1015 goto err; 1016 1017 ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task); 1018 if (ret) 1019 goto err; 1020 1021 pctx->stream_num++; 1022 stream->bytesused += hva_h264_get_stream_size(task); 1023 1024 stuffing_bytes = hva_h264_get_stuffing_bytes(task); 1025 1026 if (stuffing_bytes) 1027 hva_h264_fill_data_nal(pctx, stuffing_bytes, 1028 (u8 *)stream->vaddr, 1029 stream->size, 1030 &stream->bytesused); 1031 1032 /* switch reference & reconstructed frame */ 1033 swap(ctx->ref_frame, ctx->rec_frame); 1034 1035 return 0; 1036err: 1037 stream->bytesused = 0; 1038 return ret; 1039} 1040 1041const struct hva_enc nv12h264enc = { 1042 .name = "H264(NV12)", 1043 .pixelformat = V4L2_PIX_FMT_NV12, 1044 .streamformat = V4L2_PIX_FMT_H264, 1045 .max_width = H264_MAX_SIZE_W, 1046 .max_height = H264_MAX_SIZE_H, 1047 .open = hva_h264_open, 1048 .close = hva_h264_close, 1049 .encode = hva_h264_encode, 1050}; 1051 1052const struct hva_enc nv21h264enc = { 1053 .name = "H264(NV21)", 1054 .pixelformat = V4L2_PIX_FMT_NV21, 1055 .streamformat = V4L2_PIX_FMT_H264, 1056 .max_width = H264_MAX_SIZE_W, 1057 .max_height = H264_MAX_SIZE_H, 1058 .open = hva_h264_open, 1059 .close = hva_h264_close, 1060 .encode = hva_h264_encode, 1061}; 1062