1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) STMicroelectronics SA 2015
4 * Authors: Yannick Fertre <yannick.fertre@st.com>
5 *          Hugues Fruchet <hugues.fruchet@st.com>
6 */
7
8#include "hva.h"
9#include "hva-hw.h"
10
11#define MAX_SPS_PPS_SIZE 128
12
13#define BITSTREAM_OFFSET_MASK 0x7F
14
15/* video max size*/
16#define H264_MAX_SIZE_W 1920
17#define H264_MAX_SIZE_H 1920
18
19/* macroBlocs number (width & height) */
20#define MB_W(w) ((w + 0xF)  / 0x10)
21#define MB_H(h) ((h + 0xF)  / 0x10)
22
23/* formula to get temporal or spatial data size */
24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
25
26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
29#define SLICE_HEADER_SIZE (4 * 16)
30#define BRC_DATA_SIZE (5 * 16)
31
32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
34
35/*
36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
37 * for deblocking with size=4*16*MBx*2
38 */
39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
40
41/* factor for bitrate and cpb buffer size max values if profile >= high */
42#define H264_FACTOR_HIGH 1200
43
44/* factor for bitrate and cpb buffer size max values if profile < high */
45#define H264_FACTOR_BASELINE 1000
46
47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
48#define H264_FILLER_DATA_SIZE 6
49
50struct h264_profile {
51	enum v4l2_mpeg_video_h264_level level;
52	u32 max_mb_per_seconds;
53	u32 max_frame_size;
54	u32 max_bitrate;
55	u32 max_cpb_size;
56	u32 min_comp_ratio;
57};
58
59static const struct h264_profile h264_infos_list[] = {
60	{V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
61	{V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
62	{V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
63	{V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
64	{V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
65	{V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
66	{V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
67	{V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
68	{V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
69	{V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
70	{V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
71	{V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
72	{V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
73	{V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
74	{V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
75	{V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
76};
77
78enum hva_brc_type {
79	BRC_TYPE_NONE = 0,
80	BRC_TYPE_CBR = 1,
81	BRC_TYPE_VBR = 2,
82	BRC_TYPE_VBR_LOW_DELAY = 3
83};
84
85enum hva_entropy_coding_mode {
86	CAVLC = 0,
87	CABAC = 1
88};
89
90enum hva_picture_coding_type {
91	PICTURE_CODING_TYPE_I = 0,
92	PICTURE_CODING_TYPE_P = 1,
93	PICTURE_CODING_TYPE_B = 2
94};
95
96enum hva_h264_sampling_mode {
97	SAMPLING_MODE_NV12 = 0,
98	SAMPLING_MODE_UYVY = 1,
99	SAMPLING_MODE_RGB3 = 3,
100	SAMPLING_MODE_XRGB4 = 4,
101	SAMPLING_MODE_NV21 = 8,
102	SAMPLING_MODE_VYUY = 9,
103	SAMPLING_MODE_BGR3 = 11,
104	SAMPLING_MODE_XBGR4 = 12,
105	SAMPLING_MODE_RGBX4 = 20,
106	SAMPLING_MODE_BGRX4 = 28
107};
108
109enum hva_h264_nalu_type {
110	NALU_TYPE_UNKNOWN = 0,
111	NALU_TYPE_SLICE = 1,
112	NALU_TYPE_SLICE_DPA = 2,
113	NALU_TYPE_SLICE_DPB = 3,
114	NALU_TYPE_SLICE_DPC = 4,
115	NALU_TYPE_SLICE_IDR = 5,
116	NALU_TYPE_SEI = 6,
117	NALU_TYPE_SPS = 7,
118	NALU_TYPE_PPS = 8,
119	NALU_TYPE_AU_DELIMITER = 9,
120	NALU_TYPE_SEQ_END = 10,
121	NALU_TYPE_STREAM_END = 11,
122	NALU_TYPE_FILLER_DATA = 12,
123	NALU_TYPE_SPS_EXT = 13,
124	NALU_TYPE_PREFIX_UNIT = 14,
125	NALU_TYPE_SUBSET_SPS = 15,
126	NALU_TYPE_SLICE_AUX = 19,
127	NALU_TYPE_SLICE_EXT = 20
128};
129
130enum hva_h264_sei_payload_type {
131	SEI_BUFFERING_PERIOD = 0,
132	SEI_PICTURE_TIMING = 1,
133	SEI_STEREO_VIDEO_INFO = 21,
134	SEI_FRAME_PACKING_ARRANGEMENT = 45
135};
136
137/*
138 * stereo Video Info struct
139 */
140struct hva_h264_stereo_video_sei {
141	u8 field_views_flag;
142	u8 top_field_is_left_view_flag;
143	u8 current_frame_is_left_view_flag;
144	u8 next_frame_is_second_view_flag;
145	u8 left_view_self_contained_flag;
146	u8 right_view_self_contained_flag;
147};
148
149/*
150 * struct hva_h264_td
151 *
152 * @frame_width: width in pixels of the buffer containing the input frame
153 * @frame_height: height in pixels of the buffer containing the input frame
154 * @frame_num: the parameter to be written in the slice header
155 * @picture_coding_type: type I, P or B
156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
157 * @first_picture_in_sequence: flag telling to encoder that this is the
158 *			       first picture in a video sequence.
159 *			       Used for VBR
160 * @slice_size_type: 0 = no constraint to close the slice
161 *		     1= a slice is closed as soon as the slice_mb_size limit
162 *			is reached
163 *		     2= a slice is closed as soon as the slice_byte_size limit
164 *			is reached
165 *		     3= a slice is closed as soon as either the slice_byte_size
166 *			limit or the slice_mb_size limit is reached
167 * @slice_mb_size: defines the slice size in number of macroblocks
168 *		   (used when slice_size_type=1 or slice_size_type=3)
169 * @ir_param_option: defines the number of macroblocks per frame to be
170 *		     refreshed by AIR algorithm OR the refresh period
171 *		     by CIR algorithm
172 * @intra_refresh_type: enables the adaptive intra refresh algorithm.
173 *			Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
175 * @transform_mode: controls the use of 4x4/8x8 transform mode
176 * @disable_deblocking_filter_idc:
177 *		     0: specifies that all luma and chroma block edges of
178 *			the slice are filtered.
179 *		     1: specifies that deblocking is disabled for all block
180 *			edges of the slice.
181 *		     2: specifies that all luma and chroma block edges of
182 *			the slice are filtered with exception of the block edges
183 *			that coincide with slice boundaries
184 * @slice_alpha_c0_offset_div2: to be written in slice header,
185 *				controls deblocking
186 * @slice_beta_offset_div2: to be written in slice header,
187 *			    controls deblocking
188 * @encoder_complexity: encoder complexity control (IME).
189 *		     0 = I_16x16, P_16x16, Full ME Complexity
190 *		     1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
191 *		     2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
192 *		     4 = I_16x16, P_16x16, Reduced ME Complexity
193 *		     5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
194 *		     6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
195 *  @chroma_qp_index_offset: coming from picture parameter set
196 *			     (PPS see [H.264 STD] 7.4.2.2)
197 *  @entropy_coding_mode: entropy coding mode.
198 *			  0 = CAVLC
199 *			  1 = CABAC
200 * @brc_type: selects the bit-rate control algorithm
201 *		     0 = constant Qp, (no BRC)
202 *		     1 = CBR
203 *		     2 = VBR
204 * @quant: Quantization param used in case of fix QP encoding (no BRC)
205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
206 *		       used by BRC
207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
208 * @bit_rate: target bitrate, for BRC
209 * @qp_min: min QP threshold
210 * @qp_max: max QP threshold
211 * @framerate_num: target framerate numerator , used by BRC
212 * @framerate_den: target framerate denomurator , used by BRC
213 * @delay: End-to-End Initial Delay
214 * @strict_HRD_compliancy: flag for HDR compliancy (1)
215 *			   May impact quality encoding
216 * @addr_source_buffer: address of input frame buffer for current frame
217 * @addr_fwd_Ref_Buffer: address of reference frame buffer
218 * @addr_rec_buffer: address of reconstructed frame buffer
219 * @addr_output_bitstream_start: output bitstream start address
220 * @addr_output_bitstream_end: output bitstream end address
221 * @addr_external_sw : address of external search window
222 * @addr_lctx : address of context picture buffer
223 * @addr_local_rec_buffer: address of local reconstructed buffer
224 * @addr_spatial_context: address of spatial context buffer
225 * @bitstream_offset: offset in bits between aligned bitstream start
226 *		      address and first bit to be written by HVA.
227 *		      Range value is [0..63]
228 * @sampling_mode: Input picture format .
229 *		     0: YUV420 semi_planar Interleaved
230 *		     1: YUV422 raster Interleaved
231 * @addr_param_out: address of output parameters structure
232 * @addr_scaling_matrix: address to the coefficient of
233 *			 the inverse scaling matrix
234 * @addr_scaling_matrix_dir: address to the coefficient of
235 *			     the direct scaling matrix
236 * @addr_cabac_context_buffer: address of cabac context buffer
237 * @GmvX: Input information about the horizontal global displacement of
238 *	  the encoded frame versus the previous one
239 * @GmvY: Input information about the vertical global displacement of
240 *	  the encoded frame versus the previous one
241 * @window_width: width in pixels of the window to be encoded inside
242 *		  the input frame
243 * @window_height: width in pixels of the window to be encoded inside
244 *		   the input frame
245 * @window_horizontal_offset: horizontal offset in pels for input window
246 *			      within input frame
247 * @window_vertical_offset: vertical offset in pels for input window
248 *			    within input frame
249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and
250 *	      also used for Error map.
251 *	      Bit 0-6 used for qp offset (value -64 to 63).
252 *	      Bit 7 used to force intra
253 * @addr_slice_header: address to slice header
254 * @slice_header_size_in_bits: size in bits of the Slice header
255 * @slice_header_offset0: Slice header offset where to insert
256 *			  first_Mb_in_slice
257 * @slice_header_offset1: Slice header offset where to insert
258 *			  slice_qp_delta
259 * @slice_header_offset2: Slice header offset where to insert
260 *			  num_MBs_in_slice
261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice
262 * @max_slice_number: Maximum number of slice in a frame
263 *		      (0 is strictly forbidden)
264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
265 *		      YUV for the Y component.
266 *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
268 *		      YUV for the Y component.
269 *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
271 *		      YUV for the U (Cb) component.
272 *		      U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
273 * @slice_byte_size: maximum slice size in bytes
274 *		     (used when slice_size_type=2 or slice_size_type=3)
275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
276 *			 for the AIR algorithm
277 * @brc_no_skip: Disable skipping in the Bitrate Controller
278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
279 */
280struct hva_h264_td {
281	u16 frame_width;
282	u16 frame_height;
283	u32 frame_num;
284	u16 picture_coding_type;
285	u16 reserved1;
286	u16 pic_order_cnt_type;
287	u16 first_picture_in_sequence;
288	u16 slice_size_type;
289	u16 reserved2;
290	u32 slice_mb_size;
291	u16 ir_param_option;
292	u16 intra_refresh_type;
293	u16 use_constrained_intra_flag;
294	u16 transform_mode;
295	u16 disable_deblocking_filter_idc;
296	s16 slice_alpha_c0_offset_div2;
297	s16 slice_beta_offset_div2;
298	u16 encoder_complexity;
299	s16 chroma_qp_index_offset;
300	u16 entropy_coding_mode;
301	u16 brc_type;
302	u16 quant;
303	u32 non_vcl_nalu_size;
304	u32 cpb_buffer_size;
305	u32 bit_rate;
306	u16 qp_min;
307	u16 qp_max;
308	u16 framerate_num;
309	u16 framerate_den;
310	u16 delay;
311	u16 strict_hrd_compliancy;
312	u32 addr_source_buffer;
313	u32 addr_fwd_ref_buffer;
314	u32 addr_rec_buffer;
315	u32 addr_output_bitstream_start;
316	u32 addr_output_bitstream_end;
317	u32 addr_external_sw;
318	u32 addr_lctx;
319	u32 addr_local_rec_buffer;
320	u32 addr_spatial_context;
321	u16 bitstream_offset;
322	u16 sampling_mode;
323	u32 addr_param_out;
324	u32 addr_scaling_matrix;
325	u32 addr_scaling_matrix_dir;
326	u32 addr_cabac_context_buffer;
327	u32 reserved3;
328	u32 reserved4;
329	s16 gmv_x;
330	s16 gmv_y;
331	u16 window_width;
332	u16 window_height;
333	u16 window_horizontal_offset;
334	u16 window_vertical_offset;
335	u32 addr_roi;
336	u32 addr_slice_header;
337	u16 slice_header_size_in_bits;
338	u16 slice_header_offset0;
339	u16 slice_header_offset1;
340	u16 slice_header_offset2;
341	u32 reserved5;
342	u32 reserved6;
343	u16 reserved7;
344	u16 reserved8;
345	u16 slice_synchro_enable;
346	u16 max_slice_number;
347	u32 rgb2_yuv_y_coeff;
348	u32 rgb2_yuv_u_coeff;
349	u32 rgb2_yuv_v_coeff;
350	u32 slice_byte_size;
351	u16 max_air_intra_mb_nb;
352	u16 brc_no_skip;
353	u32 addr_temporal_context;
354	u32 addr_brc_in_out_parameter;
355};
356
357/*
358 * struct hva_h264_slice_po
359 *
360 * @ slice_size: slice size
361 * @ slice_start_time: start time
362 * @ slice_stop_time: stop time
363 * @ slice_num: slice number
364 */
365struct hva_h264_slice_po {
366	u32 slice_size;
367	u32 slice_start_time;
368	u32 slice_end_time;
369	u32 slice_num;
370};
371
372/*
373 * struct hva_h264_po
374 *
375 * @ bitstream_size: bitstream size
376 * @ dct_bitstream_size: dtc bitstream size
377 * @ stuffing_bits: number of stuffing bits inserted by the encoder
378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
379 * @ hvc_start_time: hvc start time
380 * @ hvc_stop_time: hvc stop time
381 * @ slice_count: slice count
382 */
383struct hva_h264_po {
384	u32 bitstream_size;
385	u32 dct_bitstream_size;
386	u32 stuffing_bits;
387	u32 removal_time;
388	u32 hvc_start_time;
389	u32 hvc_stop_time;
390	u32 slice_count;
391	u32 reserved0;
392	struct hva_h264_slice_po slice_params[16];
393};
394
395struct hva_h264_task {
396	struct hva_h264_td td;
397	struct hva_h264_po po;
398};
399
400/*
401 * struct hva_h264_ctx
402 *
403 * @seq_info:  sequence information buffer
404 * @ref_frame: reference frame buffer
405 * @rec_frame: reconstructed frame buffer
406 * @task:      task descriptor
407 */
408struct hva_h264_ctx {
409	struct hva_buffer *seq_info;
410	struct hva_buffer *ref_frame;
411	struct hva_buffer *rec_frame;
412	struct hva_buffer *task;
413};
414
415static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
416				      u8 *slice_header_addr,
417				      struct hva_controls *ctrls,
418				      int frame_num,
419				      u16 *header_size,
420				      u16 *header_offset0,
421				      u16 *header_offset1,
422				      u16 *header_offset2)
423{
424	/*
425	 * with this HVA hardware version, part of the slice header is computed
426	 * on host and part by hardware.
427	 * The part of host is precomputed and available through this array.
428	 */
429	struct device *dev = ctx_to_dev(pctx);
430	int  cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
431	const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01,
432					       0x41, 0x34, 0x07, 0x00};
433	int idr_pic_id = frame_num % 2;
434	enum hva_picture_coding_type type;
435	u32 frame_order = frame_num % ctrls->gop_size;
436
437	if (!(frame_num % ctrls->gop_size))
438		type = PICTURE_CODING_TYPE_I;
439	else
440		type = PICTURE_CODING_TYPE_P;
441
442	memcpy(slice_header_addr, slice_header, sizeof(slice_header));
443
444	*header_size = 56;
445	*header_offset0 = 40;
446	*header_offset1 = 13;
447	*header_offset2 = 0;
448
449	if (type == PICTURE_CODING_TYPE_I) {
450		slice_header_addr[4] = 0x65;
451		slice_header_addr[5] = 0x11;
452
453		/* toggle the I frame */
454		if ((frame_num / ctrls->gop_size) % 2) {
455			*header_size += 4;
456			*header_offset1 += 4;
457			slice_header_addr[6] = 0x04;
458			slice_header_addr[7] = 0x70;
459
460		} else {
461			*header_size += 2;
462			*header_offset1 += 2;
463			slice_header_addr[6] = 0x09;
464			slice_header_addr[7] = 0xC0;
465		}
466	} else {
467		if (ctrls->entropy_mode == cabac) {
468			*header_size += 1;
469			*header_offset1 += 1;
470			slice_header_addr[7] = 0x80;
471		}
472		/*
473		 * update slice header with P frame order
474		 * frame order is limited to 16 (coded on 4bits only)
475		 */
476		slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
477		slice_header_addr[6] += ((frame_order & 0x03) << 6);
478	}
479
480	dev_dbg(dev,
481		"%s   %s slice header order %d idrPicId %d header size %d\n",
482		pctx->name, __func__, frame_order, idr_pic_id, *header_size);
483	return 0;
484}
485
486static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
487				  unsigned int stuffing_bytes, u8 *addr,
488				  unsigned int stream_size, unsigned int *size)
489{
490	struct device *dev = ctx_to_dev(pctx);
491	const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
492
493	dev_dbg(dev, "%s   %s stuffing bytes %d\n", pctx->name, __func__,
494		stuffing_bytes);
495
496	if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
497		dev_dbg(dev, "%s   %s too many stuffing bytes %d\n",
498			pctx->name, __func__, stuffing_bytes);
499		return 0;
500	}
501
502	/* start code */
503	memcpy(addr + *size, start, sizeof(start));
504	*size += sizeof(start);
505
506	/* nal_unit_type */
507	addr[*size] = NALU_TYPE_FILLER_DATA;
508	*size += 1;
509
510	memset(addr + *size, 0xff, stuffing_bytes);
511	*size += stuffing_bytes;
512
513	addr[*size] = 0x80;
514	*size += 1;
515
516	return 0;
517}
518
519static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
520				 enum hva_h264_sei_payload_type type,
521				 u8 *addr, u32 *size)
522{
523	struct device *dev = ctx_to_dev(pctx);
524	const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
525	struct hva_h264_stereo_video_sei info;
526	u8 offset = 7;
527	u8 msg = 0;
528
529	/* start code */
530	memcpy(addr + *size, start, sizeof(start));
531	*size += sizeof(start);
532
533	/* nal_unit_type */
534	addr[*size] = NALU_TYPE_SEI;
535	*size += 1;
536
537	/* payload type */
538	addr[*size] = type;
539	*size += 1;
540
541	switch (type) {
542	case SEI_STEREO_VIDEO_INFO:
543		memset(&info, 0, sizeof(info));
544
545		/* set to top/bottom frame packing arrangement */
546		info.field_views_flag = 1;
547		info.top_field_is_left_view_flag = 1;
548
549		/* payload size */
550		addr[*size] = 1;
551		*size += 1;
552
553		/* payload */
554		msg = info.field_views_flag << offset--;
555
556		if (info.field_views_flag) {
557			msg |= info.top_field_is_left_view_flag <<
558			       offset--;
559		} else {
560			msg |= info.current_frame_is_left_view_flag <<
561			       offset--;
562			msg |= info.next_frame_is_second_view_flag <<
563			       offset--;
564		}
565		msg |= info.left_view_self_contained_flag << offset--;
566		msg |= info.right_view_self_contained_flag << offset--;
567
568		addr[*size] = msg;
569		*size += 1;
570
571		addr[*size] = 0x80;
572		*size += 1;
573
574		return 0;
575	case SEI_BUFFERING_PERIOD:
576	case SEI_PICTURE_TIMING:
577	case SEI_FRAME_PACKING_ARRANGEMENT:
578	default:
579		dev_err(dev, "%s   sei nal type not supported %d\n",
580			pctx->name, type);
581		return -EINVAL;
582	}
583}
584
585static int hva_h264_prepare_task(struct hva_ctx *pctx,
586				 struct hva_h264_task *task,
587				 struct hva_frame *frame,
588				 struct hva_stream *stream)
589{
590	struct hva_dev *hva = ctx_to_hdev(pctx);
591	struct device *dev = ctx_to_dev(pctx);
592	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
593	struct hva_buffer *seq_info = ctx->seq_info;
594	struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
595	struct hva_buffer *loc_rec_frame = ctx->rec_frame;
596	struct hva_h264_td *td = &task->td;
597	struct hva_controls *ctrls = &pctx->ctrls;
598	struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
599	int cavlc =  V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
600	u32 frame_num = pctx->stream_num;
601	u32 addr_esram = hva->esram_addr;
602	enum v4l2_mpeg_video_h264_level level;
603	dma_addr_t paddr = 0;
604	u8 *slice_header_vaddr;
605	u32 frame_width = frame->info.aligned_width;
606	u32 frame_height = frame->info.aligned_height;
607	u32 max_cpb_buffer_size;
608	unsigned int payload = stream->bytesused;
609	u32 max_bitrate;
610
611	/* check width and height parameters */
612	if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
613	    (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
614		dev_err(dev,
615			"%s   width(%d) or height(%d) exceeds limits (%dx%d)\n",
616			pctx->name, frame_width, frame_height,
617			H264_MAX_SIZE_W, H264_MAX_SIZE_H);
618		pctx->frame_errors++;
619		return -EINVAL;
620	}
621
622	level = ctrls->level;
623
624	memset(td, 0, sizeof(struct hva_h264_td));
625
626	td->frame_width = frame_width;
627	td->frame_height = frame_height;
628
629	/* set frame alignment */
630	td->window_width =  frame_width;
631	td->window_height = frame_height;
632	td->window_horizontal_offset = 0;
633	td->window_vertical_offset = 0;
634
635	td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
636
637	/* pic_order_cnt_type hard coded to '2' as only I & P frames */
638	td->pic_order_cnt_type = 2;
639
640	/* useConstrainedIntraFlag set to false for better coding efficiency */
641	td->use_constrained_intra_flag = false;
642	td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
643			? BRC_TYPE_CBR : BRC_TYPE_VBR;
644
645	td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
646				  CABAC;
647
648	td->bit_rate = ctrls->bitrate;
649
650	/* set framerate, framerate = 1 n/ time per frame */
651	if (time_per_frame->numerator >= 536) {
652		/*
653		 * due to a hardware bug, framerate denominator can't exceed
654		 * 536 (BRC overflow). Compute nearest framerate
655		 */
656		td->framerate_den = 1;
657		td->framerate_num = (time_per_frame->denominator +
658				    (time_per_frame->numerator >> 1) - 1) /
659				    time_per_frame->numerator;
660
661		/*
662		 * update bitrate to introduce a correction due to
663		 * the new framerate
664		 * new bitrate = (old bitrate * new framerate) / old framerate
665		 */
666		td->bit_rate /= time_per_frame->numerator;
667		td->bit_rate *= time_per_frame->denominator;
668		td->bit_rate /= td->framerate_num;
669	} else {
670		td->framerate_den = time_per_frame->numerator;
671		td->framerate_num = time_per_frame->denominator;
672	}
673
674	/* compute maximum bitrate depending on profile */
675	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
676		max_bitrate = h264_infos_list[level].max_bitrate *
677			      H264_FACTOR_HIGH;
678	else
679		max_bitrate = h264_infos_list[level].max_bitrate *
680			      H264_FACTOR_BASELINE;
681
682	/* check if bitrate doesn't exceed max size */
683	if (td->bit_rate > max_bitrate) {
684		dev_dbg(dev,
685			"%s   bitrate (%d) larger than level and profile allow, clip to %d\n",
686			pctx->name, td->bit_rate, max_bitrate);
687		td->bit_rate = max_bitrate;
688	}
689
690	/* convert cpb_buffer_size in bits */
691	td->cpb_buffer_size = ctrls->cpb_size * 8000;
692
693	/* compute maximum cpb buffer size depending on profile */
694	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
695		max_cpb_buffer_size =
696		    h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
697	else
698		max_cpb_buffer_size =
699		    h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
700
701	/* check if cpb buffer size doesn't exceed max size */
702	if (td->cpb_buffer_size > max_cpb_buffer_size) {
703		dev_dbg(dev,
704			"%s   cpb size larger than level %d allows, clip to %d\n",
705			pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
706		td->cpb_buffer_size = max_cpb_buffer_size;
707	}
708
709	/* enable skipping in the Bitrate Controller */
710	td->brc_no_skip = 0;
711
712	/* initial delay */
713	if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
714	    td->bit_rate)
715		td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
716	else
717		td->delay = 0;
718
719	switch (frame->info.pixelformat) {
720	case V4L2_PIX_FMT_NV12:
721		td->sampling_mode = SAMPLING_MODE_NV12;
722		break;
723	case V4L2_PIX_FMT_NV21:
724		td->sampling_mode = SAMPLING_MODE_NV21;
725		break;
726	default:
727		dev_err(dev, "%s   invalid source pixel format\n",
728			pctx->name);
729		pctx->frame_errors++;
730		return -EINVAL;
731	}
732
733	/*
734	 * fill matrix color converter (RGB to YUV)
735	 * Y = 0,299 R + 0,587 G + 0,114 B
736	 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
737	 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
738	 */
739	td->rgb2_yuv_y_coeff = 0x12031008;
740	td->rgb2_yuv_u_coeff = 0x800EF7FB;
741	td->rgb2_yuv_v_coeff = 0x80FEF40E;
742
743	/* enable/disable transform mode */
744	td->transform_mode = ctrls->dct8x8;
745
746	/* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
747	td->encoder_complexity = 2;
748
749	/* quant fix to 28, default VBR value */
750	td->quant = 28;
751
752	if (td->framerate_den == 0) {
753		dev_err(dev, "%s   invalid framerate\n", pctx->name);
754		pctx->frame_errors++;
755		return -EINVAL;
756	}
757
758	/* if automatic framerate, deactivate bitrate controller */
759	if (td->framerate_num == 0)
760		td->brc_type = 0;
761
762	/* compliancy fix to true */
763	td->strict_hrd_compliancy = 1;
764
765	/* set minimum & maximum quantizers */
766	td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
767	td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
768
769	td->addr_source_buffer = frame->paddr;
770	td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
771	td->addr_rec_buffer = loc_rec_frame->paddr;
772
773	td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
774
775	td->addr_output_bitstream_start = (u32)stream->paddr;
776	td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
777			       BITSTREAM_OFFSET_MASK;
778
779	td->addr_param_out = (u32)ctx->task->paddr +
780			     offsetof(struct hva_h264_task, po);
781
782	/* swap spatial and temporal context */
783	if (frame_num % 2) {
784		paddr = seq_info->paddr;
785		td->addr_spatial_context =  ALIGN(paddr, 0x100);
786		paddr = seq_info->paddr + DATA_SIZE(frame_width,
787							frame_height);
788		td->addr_temporal_context = ALIGN(paddr, 0x100);
789	} else {
790		paddr = seq_info->paddr;
791		td->addr_temporal_context = ALIGN(paddr, 0x100);
792		paddr = seq_info->paddr + DATA_SIZE(frame_width,
793							frame_height);
794		td->addr_spatial_context =  ALIGN(paddr, 0x100);
795	}
796
797	paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
798
799	td->addr_brc_in_out_parameter =  ALIGN(paddr, 0x100);
800
801	paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
802	td->addr_slice_header =  ALIGN(paddr, 0x100);
803	td->addr_external_sw =  ALIGN(addr_esram, 0x100);
804
805	addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
806	td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
807
808	addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
809	td->addr_lctx = ALIGN(addr_esram, 0x100);
810
811	addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
812	td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
813
814	if (!(frame_num % ctrls->gop_size)) {
815		td->picture_coding_type = PICTURE_CODING_TYPE_I;
816		stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
817	} else {
818		td->picture_coding_type = PICTURE_CODING_TYPE_P;
819		stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
820	}
821
822	/* fill the slice header part */
823	slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
824			     seq_info->paddr);
825
826	hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
827				   &td->slice_header_size_in_bits,
828				   &td->slice_header_offset0,
829				   &td->slice_header_offset1,
830				   &td->slice_header_offset2);
831
832	td->chroma_qp_index_offset = 2;
833	td->slice_synchro_enable = 0;
834	td->max_slice_number = 1;
835
836	/*
837	 * check the sps/pps header size for key frame only
838	 * sps/pps header was previously fill by libv4l
839	 * during qbuf of stream buffer
840	 */
841	if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
842	    (payload > MAX_SPS_PPS_SIZE)) {
843		dev_err(dev, "%s   invalid sps/pps size %d\n", pctx->name,
844			payload);
845		pctx->frame_errors++;
846		return -EINVAL;
847	}
848
849	if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
850		payload = 0;
851
852	/* add SEI nal (video stereo info) */
853	if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
854						   (u8 *)stream->vaddr,
855						   &payload)) {
856		dev_err(dev, "%s   fail to get SEI nal\n", pctx->name);
857		pctx->frame_errors++;
858		return -EINVAL;
859	}
860
861	/* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
862	td->non_vcl_nalu_size = payload * 8;
863
864	/* compute bitstream offset & new start address of bitstream */
865	td->addr_output_bitstream_start += ((payload >> 4) << 4);
866	td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
867
868	stream->bytesused = payload;
869
870	return 0;
871}
872
873static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
874{
875	struct hva_h264_po *po = &task->po;
876
877	return po->bitstream_size;
878}
879
880static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
881{
882	struct hva_h264_po *po = &task->po;
883
884	return po->stuffing_bits >> 3;
885}
886
887static int hva_h264_open(struct hva_ctx *pctx)
888{
889	struct device *dev = ctx_to_dev(pctx);
890	struct hva_h264_ctx *ctx;
891	struct hva_dev *hva = ctx_to_hdev(pctx);
892	u32 frame_width = pctx->frameinfo.aligned_width;
893	u32 frame_height = pctx->frameinfo.aligned_height;
894	u32 size;
895	int ret;
896
897	/* check esram size necessary to encode a frame */
898	size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
899	       LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
900	       CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
901	       CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
902
903	if (hva->esram_size < size) {
904		dev_err(dev, "%s   not enough esram (max:%d request:%d)\n",
905			pctx->name, hva->esram_size, size);
906		ret = -EINVAL;
907		goto err;
908	}
909
910	/* allocate context for codec */
911	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
912	if (!ctx) {
913		ret = -ENOMEM;
914		goto err;
915	}
916
917	/* allocate sequence info buffer */
918	ret = hva_mem_alloc(pctx,
919			    2 * DATA_SIZE(frame_width, frame_height) +
920			    SLICE_HEADER_SIZE +
921			    BRC_DATA_SIZE,
922			    "hva sequence info",
923			    &ctx->seq_info);
924	if (ret) {
925		dev_err(dev,
926			"%s   failed to allocate sequence info buffer\n",
927			pctx->name);
928		goto err_ctx;
929	}
930
931	/* allocate reference frame buffer */
932	ret = hva_mem_alloc(pctx,
933			    frame_width * frame_height * 3 / 2,
934			    "hva reference frame",
935			    &ctx->ref_frame);
936	if (ret) {
937		dev_err(dev, "%s   failed to allocate reference frame buffer\n",
938			pctx->name);
939		goto err_seq_info;
940	}
941
942	/* allocate reconstructed frame buffer */
943	ret = hva_mem_alloc(pctx,
944			    frame_width * frame_height * 3 / 2,
945			    "hva reconstructed frame",
946			    &ctx->rec_frame);
947	if (ret) {
948		dev_err(dev,
949			"%s   failed to allocate reconstructed frame buffer\n",
950			pctx->name);
951		goto err_ref_frame;
952	}
953
954	/* allocate task descriptor */
955	ret = hva_mem_alloc(pctx,
956			    sizeof(struct hva_h264_task),
957			    "hva task descriptor",
958			    &ctx->task);
959	if (ret) {
960		dev_err(dev,
961			"%s   failed to allocate task descriptor\n",
962			pctx->name);
963		goto err_rec_frame;
964	}
965
966	pctx->priv = (void *)ctx;
967
968	return 0;
969
970err_rec_frame:
971	hva_mem_free(pctx, ctx->rec_frame);
972err_ref_frame:
973	hva_mem_free(pctx, ctx->ref_frame);
974err_seq_info:
975	hva_mem_free(pctx, ctx->seq_info);
976err_ctx:
977	devm_kfree(dev, ctx);
978err:
979	pctx->sys_errors++;
980	return ret;
981}
982
983static int hva_h264_close(struct hva_ctx *pctx)
984{
985	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
986	struct device *dev = ctx_to_dev(pctx);
987
988	if (ctx->seq_info)
989		hva_mem_free(pctx, ctx->seq_info);
990
991	if (ctx->ref_frame)
992		hva_mem_free(pctx, ctx->ref_frame);
993
994	if (ctx->rec_frame)
995		hva_mem_free(pctx, ctx->rec_frame);
996
997	if (ctx->task)
998		hva_mem_free(pctx, ctx->task);
999
1000	devm_kfree(dev, ctx);
1001
1002	return 0;
1003}
1004
1005static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
1006			   struct hva_stream *stream)
1007{
1008	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
1009	struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
1010	u32 stuffing_bytes = 0;
1011	int ret = 0;
1012
1013	ret = hva_h264_prepare_task(pctx, task, frame, stream);
1014	if (ret)
1015		goto err;
1016
1017	ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
1018	if (ret)
1019		goto err;
1020
1021	pctx->stream_num++;
1022	stream->bytesused += hva_h264_get_stream_size(task);
1023
1024	stuffing_bytes = hva_h264_get_stuffing_bytes(task);
1025
1026	if (stuffing_bytes)
1027		hva_h264_fill_data_nal(pctx, stuffing_bytes,
1028				       (u8 *)stream->vaddr,
1029				       stream->size,
1030				       &stream->bytesused);
1031
1032	/* switch reference & reconstructed frame */
1033	swap(ctx->ref_frame, ctx->rec_frame);
1034
1035	return 0;
1036err:
1037	stream->bytesused = 0;
1038	return ret;
1039}
1040
1041const struct hva_enc nv12h264enc = {
1042	.name = "H264(NV12)",
1043	.pixelformat = V4L2_PIX_FMT_NV12,
1044	.streamformat = V4L2_PIX_FMT_H264,
1045	.max_width = H264_MAX_SIZE_W,
1046	.max_height = H264_MAX_SIZE_H,
1047	.open = hva_h264_open,
1048	.close = hva_h264_close,
1049	.encode = hva_h264_encode,
1050};
1051
1052const struct hva_enc nv21h264enc = {
1053	.name = "H264(NV21)",
1054	.pixelformat = V4L2_PIX_FMT_NV21,
1055	.streamformat = V4L2_PIX_FMT_H264,
1056	.max_width = H264_MAX_SIZE_W,
1057	.max_height = H264_MAX_SIZE_H,
1058	.open = hva_h264_open,
1059	.close = hva_h264_close,
1060	.encode = hva_h264_encode,
1061};
1062