1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Coda multi-standard codec IP - JPEG support functions
4 *
5 * Copyright (C) 2014 Philipp Zabel, Pengutronix
6 */
7
8#include <asm/unaligned.h>
9#include <linux/irqreturn.h>
10#include <linux/kernel.h>
11#include <linux/ktime.h>
12#include <linux/slab.h>
13#include <linux/swab.h>
14#include <linux/videodev2.h>
15
16#include <media/v4l2-common.h>
17#include <media/v4l2-fh.h>
18#include <media/v4l2-jpeg.h>
19#include <media/v4l2-mem2mem.h>
20#include <media/videobuf2-core.h>
21#include <media/videobuf2-dma-contig.h>
22
23#include "coda.h"
24#include "trace.h"
25
26#define SOI_MARKER	0xffd8
27#define APP9_MARKER	0xffe9
28#define DRI_MARKER	0xffdd
29#define DQT_MARKER	0xffdb
30#define DHT_MARKER	0xffc4
31#define SOF_MARKER	0xffc0
32#define SOS_MARKER	0xffda
33#define EOI_MARKER	0xffd9
34
35enum {
36	CODA9_JPEG_FORMAT_420,
37	CODA9_JPEG_FORMAT_422,
38	CODA9_JPEG_FORMAT_224,
39	CODA9_JPEG_FORMAT_444,
40	CODA9_JPEG_FORMAT_400,
41};
42
43struct coda_huff_tab {
44	u8 luma_dc[16 + 12];
45	u8 chroma_dc[16 + 12];
46	u8 luma_ac[16 + 162];
47	u8 chroma_ac[16 + 162];
48
49	/* DC Luma, DC Chroma, AC Luma, AC Chroma */
50	s16	min[4 * 16];
51	s16	max[4 * 16];
52	s8	ptr[4 * 16];
53};
54
55#define CODA9_JPEG_ENC_HUFF_DATA_SIZE	(256 + 256 + 16 + 16)
56
57/*
58 * Typical Huffman tables for 8-bit precision luminance and
59 * chrominance from JPEG ITU-T.81 (ISO/IEC 10918-1) Annex K.3
60 */
61
62static const unsigned char luma_dc[16 + 12] = {
63	/* bits */
64	0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01,
65	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66	/* values */
67	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
68	0x08, 0x09, 0x0a, 0x0b,
69};
70
71static const unsigned char chroma_dc[16 + 12] = {
72	/* bits */
73	0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
74	0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
75	/* values */
76	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
77	0x08, 0x09, 0x0a, 0x0b,
78};
79
80static const unsigned char luma_ac[16 + 162 + 2] = {
81	/* bits */
82	0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03,
83	0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d,
84	/* values */
85	0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
86	0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
87	0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
88	0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
89	0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
90	0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
91	0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
92	0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
93	0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
94	0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
95	0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
96	0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
97	0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
98	0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
99	0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
100	0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
101	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
102	0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
103	0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
104	0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
105	0xf9, 0xfa, /* padded to 32-bit */
106};
107
108static const unsigned char chroma_ac[16 + 162 + 2] = {
109	/* bits */
110	0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04,
111	0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77,
112	/* values */
113	0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
114	0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
115	0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
116	0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
117	0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
118	0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
119	0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
120	0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
121	0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
122	0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
123	0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
124	0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
125	0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
126	0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
127	0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
128	0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
129	0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
130	0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
131	0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
132	0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
133	0xf9, 0xfa, /* padded to 32-bit */
134};
135
136/*
137 * Quantization tables for luminance and chrominance components in
138 * zig-zag scan order from the Freescale i.MX VPU libraries
139 */
140
141static unsigned char luma_q[64] = {
142	0x06, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x05,
143	0x05, 0x06, 0x09, 0x06, 0x05, 0x06, 0x09, 0x0b,
144	0x08, 0x06, 0x06, 0x08, 0x0b, 0x0c, 0x0a, 0x0a,
145	0x0b, 0x0a, 0x0a, 0x0c, 0x10, 0x0c, 0x0c, 0x0c,
146	0x0c, 0x0c, 0x0c, 0x10, 0x0c, 0x0c, 0x0c, 0x0c,
147	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
148	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
149	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
150};
151
152static unsigned char chroma_q[64] = {
153	0x07, 0x07, 0x07, 0x0d, 0x0c, 0x0d, 0x18, 0x10,
154	0x10, 0x18, 0x14, 0x0e, 0x0e, 0x0e, 0x14, 0x14,
155	0x0e, 0x0e, 0x0e, 0x0e, 0x14, 0x11, 0x0c, 0x0c,
156	0x0c, 0x0c, 0x0c, 0x11, 0x11, 0x0c, 0x0c, 0x0c,
157	0x0c, 0x0c, 0x0c, 0x11, 0x0c, 0x0c, 0x0c, 0x0c,
158	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
159	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
160	0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
161};
162
163static const unsigned char width_align[] = {
164	[CODA9_JPEG_FORMAT_420] = 16,
165	[CODA9_JPEG_FORMAT_422] = 16,
166	[CODA9_JPEG_FORMAT_224] = 8,
167	[CODA9_JPEG_FORMAT_444] = 8,
168	[CODA9_JPEG_FORMAT_400] = 8,
169};
170
171static const unsigned char height_align[] = {
172	[CODA9_JPEG_FORMAT_420] = 16,
173	[CODA9_JPEG_FORMAT_422] = 8,
174	[CODA9_JPEG_FORMAT_224] = 16,
175	[CODA9_JPEG_FORMAT_444] = 8,
176	[CODA9_JPEG_FORMAT_400] = 8,
177};
178
179static int coda9_jpeg_chroma_format(u32 pixfmt)
180{
181	switch (pixfmt) {
182	case V4L2_PIX_FMT_YUV420:
183	case V4L2_PIX_FMT_NV12:
184		return CODA9_JPEG_FORMAT_420;
185	case V4L2_PIX_FMT_YUV422P:
186		return CODA9_JPEG_FORMAT_422;
187	case V4L2_PIX_FMT_YUV444:
188		return CODA9_JPEG_FORMAT_444;
189	case V4L2_PIX_FMT_GREY:
190		return CODA9_JPEG_FORMAT_400;
191	}
192	return -EINVAL;
193}
194
195struct coda_memcpy_desc {
196	int offset;
197	const void *src;
198	size_t len;
199};
200
201static void coda_memcpy_parabuf(void *parabuf,
202				const struct coda_memcpy_desc *desc)
203{
204	u32 *dst = parabuf + desc->offset;
205	const u32 *src = desc->src;
206	int len = desc->len / 4;
207	int i;
208
209	for (i = 0; i < len; i += 2) {
210		dst[i + 1] = swab32(src[i]);
211		dst[i] = swab32(src[i + 1]);
212	}
213}
214
215int coda_jpeg_write_tables(struct coda_ctx *ctx)
216{
217	int i;
218	static const struct coda_memcpy_desc huff[8] = {
219		{ 0,   luma_dc,    sizeof(luma_dc)    },
220		{ 32,  luma_ac,    sizeof(luma_ac)    },
221		{ 216, chroma_dc,  sizeof(chroma_dc)  },
222		{ 248, chroma_ac,  sizeof(chroma_ac)  },
223	};
224	struct coda_memcpy_desc qmat[3] = {
225		{ 512, ctx->params.jpeg_qmat_tab[0], 64 },
226		{ 576, ctx->params.jpeg_qmat_tab[1], 64 },
227		{ 640, ctx->params.jpeg_qmat_tab[1], 64 },
228	};
229
230	/* Write huffman tables to parameter memory */
231	for (i = 0; i < ARRAY_SIZE(huff); i++)
232		coda_memcpy_parabuf(ctx->parabuf.vaddr, huff + i);
233
234	/* Write Q-matrix to parameter memory */
235	for (i = 0; i < ARRAY_SIZE(qmat); i++)
236		coda_memcpy_parabuf(ctx->parabuf.vaddr, qmat + i);
237
238	return 0;
239}
240
241bool coda_jpeg_check_buffer(struct coda_ctx *ctx, struct vb2_buffer *vb)
242{
243	void *vaddr = vb2_plane_vaddr(vb, 0);
244	u16 soi, eoi;
245	int len, i;
246
247	soi = be16_to_cpup((__be16 *)vaddr);
248	if (soi != SOI_MARKER)
249		return false;
250
251	len = vb2_get_plane_payload(vb, 0);
252	vaddr += len - 2;
253	for (i = 0; i < 32; i++) {
254		eoi = be16_to_cpup((__be16 *)(vaddr - i));
255		if (eoi == EOI_MARKER) {
256			if (i > 0)
257				vb2_set_plane_payload(vb, 0, len - i);
258			return true;
259		}
260	}
261
262	return false;
263}
264
265static int coda9_jpeg_gen_dec_huff_tab(struct coda_ctx *ctx, int tab_num);
266
267int coda_jpeg_decode_header(struct coda_ctx *ctx, struct vb2_buffer *vb)
268{
269	struct coda_dev *dev = ctx->dev;
270	u8 *buf = vb2_plane_vaddr(vb, 0);
271	size_t len = vb2_get_plane_payload(vb, 0);
272	struct v4l2_jpeg_scan_header scan_header;
273	struct v4l2_jpeg_reference quantization_tables[4] = { };
274	struct v4l2_jpeg_reference huffman_tables[4] = { };
275	struct v4l2_jpeg_header header = {
276		.scan = &scan_header,
277		.quantization_tables = quantization_tables,
278		.huffman_tables = huffman_tables,
279	};
280	struct coda_q_data *q_data_src;
281	struct coda_huff_tab *huff_tab;
282	int i, j, ret;
283
284	ret = v4l2_jpeg_parse_header(buf, len, &header);
285	if (ret < 0) {
286		v4l2_err(&dev->v4l2_dev, "failed to parse header\n");
287		return ret;
288	}
289
290	ctx->params.jpeg_restart_interval = header.restart_interval;
291
292	/* check frame header */
293	if (header.frame.height > ctx->codec->max_h ||
294	    header.frame.width > ctx->codec->max_w) {
295		v4l2_err(&dev->v4l2_dev, "invalid dimensions: %dx%d\n",
296			 header.frame.width, header.frame.height);
297		return -EINVAL;
298	}
299
300	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
301	if (header.frame.height != q_data_src->height ||
302	    header.frame.width != q_data_src->width) {
303		v4l2_err(&dev->v4l2_dev,
304			 "dimensions don't match format: %dx%d\n",
305			 header.frame.width, header.frame.height);
306		return -EINVAL;
307	}
308
309	if (header.frame.num_components != 3) {
310		v4l2_err(&dev->v4l2_dev,
311			 "unsupported number of components: %d\n",
312			 header.frame.num_components);
313		return -EINVAL;
314	}
315
316	/* install quantization tables */
317	if (quantization_tables[3].start) {
318		v4l2_err(&dev->v4l2_dev,
319			 "only 3 quantization tables supported\n");
320		return -EINVAL;
321	}
322	for (i = 0; i < 3; i++) {
323		if (!quantization_tables[i].start)
324			continue;
325		if (quantization_tables[i].length != 64) {
326			v4l2_err(&dev->v4l2_dev,
327				 "only 8-bit quantization tables supported\n");
328			continue;
329		}
330		if (!ctx->params.jpeg_qmat_tab[i]) {
331			ctx->params.jpeg_qmat_tab[i] = kmalloc(64, GFP_KERNEL);
332			if (!ctx->params.jpeg_qmat_tab[i])
333				return -ENOMEM;
334		}
335		memcpy(ctx->params.jpeg_qmat_tab[i],
336		       quantization_tables[i].start, 64);
337	}
338
339	/* install Huffman tables */
340	for (i = 0; i < 4; i++) {
341		if (!huffman_tables[i].start) {
342			v4l2_err(&dev->v4l2_dev, "missing Huffman table\n");
343			return -EINVAL;
344		}
345		/* AC tables should be between 17 -> 178, DC between 17 -> 28 */
346		if (huffman_tables[i].length < 17 ||
347		    huffman_tables[i].length > 178 ||
348		    ((i & 2) == 0 && huffman_tables[i].length > 28)) {
349			v4l2_err(&dev->v4l2_dev,
350				 "invalid Huffman table %d length: %zu\n",
351				 i, huffman_tables[i].length);
352			return -EINVAL;
353		}
354	}
355	huff_tab = ctx->params.jpeg_huff_tab;
356	if (!huff_tab) {
357		huff_tab = kzalloc(sizeof(struct coda_huff_tab), GFP_KERNEL);
358		if (!huff_tab)
359			return -ENOMEM;
360		ctx->params.jpeg_huff_tab = huff_tab;
361	}
362
363	memset(huff_tab, 0, sizeof(*huff_tab));
364	memcpy(huff_tab->luma_dc, huffman_tables[0].start, huffman_tables[0].length);
365	memcpy(huff_tab->chroma_dc, huffman_tables[1].start, huffman_tables[1].length);
366	memcpy(huff_tab->luma_ac, huffman_tables[2].start, huffman_tables[2].length);
367	memcpy(huff_tab->chroma_ac, huffman_tables[3].start, huffman_tables[3].length);
368
369	/* check scan header */
370	for (i = 0; i < scan_header.num_components; i++) {
371		struct v4l2_jpeg_scan_component_spec *scan_component;
372
373		scan_component = &scan_header.component[i];
374		for (j = 0; j < header.frame.num_components; j++) {
375			if (header.frame.component[j].component_identifier ==
376			    scan_component->component_selector)
377				break;
378		}
379		if (j == header.frame.num_components)
380			continue;
381
382		ctx->params.jpeg_huff_dc_index[j] =
383			scan_component->dc_entropy_coding_table_selector;
384		ctx->params.jpeg_huff_ac_index[j] =
385			scan_component->ac_entropy_coding_table_selector;
386	}
387
388	/* Generate Huffman table information */
389	for (i = 0; i < 4; i++)
390		coda9_jpeg_gen_dec_huff_tab(ctx, i);
391
392	/* start of entropy coded segment */
393	ctx->jpeg_ecs_offset = header.ecs_offset;
394
395	switch (header.frame.subsampling) {
396	case V4L2_JPEG_CHROMA_SUBSAMPLING_420:
397	case V4L2_JPEG_CHROMA_SUBSAMPLING_422:
398		ctx->params.jpeg_chroma_subsampling = header.frame.subsampling;
399		break;
400	default:
401		v4l2_err(&dev->v4l2_dev, "chroma subsampling not supported: %d",
402			 header.frame.subsampling);
403		return -EINVAL;
404	}
405
406	return 0;
407}
408
409static inline void coda9_jpeg_write_huff_values(struct coda_dev *dev, u8 *bits,
410						int num_values)
411{
412	s8 *values = (s8 *)(bits + 16);
413	int huff_length, i;
414
415	for (huff_length = 0, i = 0; i < 16; i++)
416		huff_length += bits[i];
417	for (i = huff_length; i < num_values; i++)
418		values[i] = -1;
419	for (i = 0; i < num_values; i++)
420		coda_write(dev, (s32)values[i], CODA9_REG_JPEG_HUFF_DATA);
421}
422
423static int coda9_jpeg_dec_huff_setup(struct coda_ctx *ctx)
424{
425	struct coda_huff_tab *huff_tab = ctx->params.jpeg_huff_tab;
426	struct coda_dev *dev = ctx->dev;
427	s16 *huff_min = huff_tab->min;
428	s16 *huff_max = huff_tab->max;
429	s8 *huff_ptr = huff_tab->ptr;
430	int i;
431
432	/* MIN Tables */
433	coda_write(dev, 0x003, CODA9_REG_JPEG_HUFF_CTRL);
434	coda_write(dev, 0x000, CODA9_REG_JPEG_HUFF_ADDR);
435	for (i = 0; i < 4 * 16; i++)
436		coda_write(dev, (s32)huff_min[i], CODA9_REG_JPEG_HUFF_DATA);
437
438	/* MAX Tables */
439	coda_write(dev, 0x403, CODA9_REG_JPEG_HUFF_CTRL);
440	coda_write(dev, 0x440, CODA9_REG_JPEG_HUFF_ADDR);
441	for (i = 0; i < 4 * 16; i++)
442		coda_write(dev, (s32)huff_max[i], CODA9_REG_JPEG_HUFF_DATA);
443
444	/* PTR Tables */
445	coda_write(dev, 0x803, CODA9_REG_JPEG_HUFF_CTRL);
446	coda_write(dev, 0x880, CODA9_REG_JPEG_HUFF_ADDR);
447	for (i = 0; i < 4 * 16; i++)
448		coda_write(dev, (s32)huff_ptr[i], CODA9_REG_JPEG_HUFF_DATA);
449
450	/* VAL Tables: DC Luma, DC Chroma, AC Luma, AC Chroma */
451	coda_write(dev, 0xc03, CODA9_REG_JPEG_HUFF_CTRL);
452	coda9_jpeg_write_huff_values(dev, huff_tab->luma_dc, 12);
453	coda9_jpeg_write_huff_values(dev, huff_tab->chroma_dc, 12);
454	coda9_jpeg_write_huff_values(dev, huff_tab->luma_ac, 162);
455	coda9_jpeg_write_huff_values(dev, huff_tab->chroma_ac, 162);
456	coda_write(dev, 0x000, CODA9_REG_JPEG_HUFF_CTRL);
457	return 0;
458}
459
460static inline void coda9_jpeg_write_qmat_tab(struct coda_dev *dev,
461					     u8 *qmat, int index)
462{
463	int i;
464
465	coda_write(dev, index | 0x3, CODA9_REG_JPEG_QMAT_CTRL);
466	for (i = 0; i < 64; i++)
467		coda_write(dev, qmat[i], CODA9_REG_JPEG_QMAT_DATA);
468	coda_write(dev, 0, CODA9_REG_JPEG_QMAT_CTRL);
469}
470
471static void coda9_jpeg_qmat_setup(struct coda_ctx *ctx)
472{
473	struct coda_dev *dev = ctx->dev;
474	int *qmat_index = ctx->params.jpeg_qmat_index;
475	u8 **qmat_tab = ctx->params.jpeg_qmat_tab;
476
477	coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[0]], 0x00);
478	coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[1]], 0x40);
479	coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[2]], 0x80);
480}
481
482static void coda9_jpeg_dec_bbc_gbu_setup(struct coda_ctx *ctx,
483					 struct vb2_buffer *buf, u32 ecs_offset)
484{
485	struct coda_dev *dev = ctx->dev;
486	int page_ptr, word_ptr, bit_ptr;
487	u32 bbc_base_addr, end_addr;
488	int bbc_cur_pos;
489	int ret, val;
490
491	bbc_base_addr = vb2_dma_contig_plane_dma_addr(buf, 0);
492	end_addr = bbc_base_addr + vb2_get_plane_payload(buf, 0);
493
494	page_ptr = ecs_offset / 256;
495	word_ptr = (ecs_offset % 256) / 4;
496	if (page_ptr & 1)
497		word_ptr += 64;
498	bit_ptr = (ecs_offset % 4) * 8;
499	if (word_ptr & 1)
500		bit_ptr += 32;
501	word_ptr &= ~0x1;
502
503	coda_write(dev, end_addr, CODA9_REG_JPEG_BBC_WR_PTR);
504	coda_write(dev, bbc_base_addr, CODA9_REG_JPEG_BBC_BAS_ADDR);
505
506	/* Leave 3 256-byte page margin to avoid a BBC interrupt */
507	coda_write(dev, end_addr + 256 * 3 + 256, CODA9_REG_JPEG_BBC_END_ADDR);
508	val = DIV_ROUND_UP(vb2_plane_size(buf, 0), 256) + 3;
509	coda_write(dev, BIT(31) | val, CODA9_REG_JPEG_BBC_STRM_CTRL);
510
511	bbc_cur_pos = page_ptr;
512	coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
513	coda_write(dev, bbc_base_addr + (bbc_cur_pos << 8),
514			CODA9_REG_JPEG_BBC_EXT_ADDR);
515	coda_write(dev, (bbc_cur_pos & 1) << 6, CODA9_REG_JPEG_BBC_INT_ADDR);
516	coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
517	coda_write(dev, 0, CODA9_REG_JPEG_BBC_COMMAND);
518	do {
519		ret = coda_read(dev, CODA9_REG_JPEG_BBC_BUSY);
520	} while (ret == 1);
521
522	bbc_cur_pos++;
523	coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
524	coda_write(dev, bbc_base_addr + (bbc_cur_pos << 8),
525			CODA9_REG_JPEG_BBC_EXT_ADDR);
526	coda_write(dev, (bbc_cur_pos & 1) << 6, CODA9_REG_JPEG_BBC_INT_ADDR);
527	coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
528	coda_write(dev, 0, CODA9_REG_JPEG_BBC_COMMAND);
529	do {
530		ret = coda_read(dev, CODA9_REG_JPEG_BBC_BUSY);
531	} while (ret == 1);
532
533	bbc_cur_pos++;
534	coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
535	coda_write(dev, 1, CODA9_REG_JPEG_BBC_CTRL);
536
537	coda_write(dev, 0, CODA9_REG_JPEG_GBU_TT_CNT);
538	coda_write(dev, word_ptr, CODA9_REG_JPEG_GBU_WD_PTR);
539	coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR);
540	coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER);
541	if (page_ptr & 1) {
542		coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBIR);
543		coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBHR);
544	} else {
545		coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBIR);
546		coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBHR);
547	}
548	coda_write(dev, 4, CODA9_REG_JPEG_GBU_CTRL);
549	coda_write(dev, bit_ptr, CODA9_REG_JPEG_GBU_FF_RPTR);
550	coda_write(dev, 3, CODA9_REG_JPEG_GBU_CTRL);
551}
552
553static const int bus_req_num[] = {
554	[CODA9_JPEG_FORMAT_420] = 2,
555	[CODA9_JPEG_FORMAT_422] = 3,
556	[CODA9_JPEG_FORMAT_224] = 3,
557	[CODA9_JPEG_FORMAT_444] = 4,
558	[CODA9_JPEG_FORMAT_400] = 4,
559};
560
561#define MCU_INFO(mcu_block_num, comp_num, comp0_info, comp1_info, comp2_info) \
562	(((mcu_block_num) << CODA9_JPEG_MCU_BLOCK_NUM_OFFSET) | \
563	 ((comp_num) << CODA9_JPEG_COMP_NUM_OFFSET) | \
564	 ((comp0_info) << CODA9_JPEG_COMP0_INFO_OFFSET) | \
565	 ((comp1_info) << CODA9_JPEG_COMP1_INFO_OFFSET) | \
566	 ((comp2_info) << CODA9_JPEG_COMP2_INFO_OFFSET))
567
568static const u32 mcu_info[] = {
569	[CODA9_JPEG_FORMAT_420] = MCU_INFO(6, 3, 10, 5, 5),
570	[CODA9_JPEG_FORMAT_422] = MCU_INFO(4, 3, 9, 5, 5),
571	[CODA9_JPEG_FORMAT_224] = MCU_INFO(4, 3, 6, 5, 5),
572	[CODA9_JPEG_FORMAT_444] = MCU_INFO(3, 3, 5, 5, 5),
573	[CODA9_JPEG_FORMAT_400] = MCU_INFO(1, 1, 5, 0, 0),
574};
575
576/*
577 * Convert Huffman table specifcations to tables of codes and code lengths.
578 * For reference, see JPEG ITU-T.81 (ISO/IEC 10918-1) [1]
579 *
580 * [1] https://www.w3.org/Graphics/JPEG/itu-t81.pdf
581 */
582static int coda9_jpeg_gen_enc_huff_tab(struct coda_ctx *ctx, int tab_num,
583				       int *ehufsi, int *ehufco)
584{
585	int i, j, k, lastk, si, code, maxsymbol;
586	const u8 *bits, *huffval;
587	struct {
588		int size[256];
589		int code[256];
590	} *huff;
591	static const unsigned char *huff_tabs[4] = {
592		luma_dc, luma_ac, chroma_dc, chroma_ac,
593	};
594	int ret = -EINVAL;
595
596	huff = kzalloc(sizeof(*huff), GFP_KERNEL);
597	if (!huff)
598		return -ENOMEM;
599
600	bits = huff_tabs[tab_num];
601	huffval = huff_tabs[tab_num] + 16;
602
603	maxsymbol = tab_num & 1 ? 256 : 16;
604
605	/* Figure C.1 - Generation of table of Huffman code sizes */
606	k = 0;
607	for (i = 1; i <= 16; i++) {
608		j = bits[i - 1];
609		if (k + j > maxsymbol)
610			goto out;
611		while (j--)
612			huff->size[k++] = i;
613	}
614	lastk = k;
615
616	/* Figure C.2 - Generation of table of Huffman codes */
617	k = 0;
618	code = 0;
619	si = huff->size[0];
620	while (k < lastk) {
621		while (huff->size[k] == si) {
622			huff->code[k++] = code;
623			code++;
624		}
625		if (code >= (1 << si))
626			goto out;
627		code <<= 1;
628		si++;
629	}
630
631	/* Figure C.3 - Ordering procedure for encoding procedure code tables */
632	for (k = 0; k < lastk; k++) {
633		i = huffval[k];
634		if (i >= maxsymbol || ehufsi[i])
635			goto out;
636		ehufco[i] = huff->code[k];
637		ehufsi[i] = huff->size[k];
638	}
639
640	ret = 0;
641out:
642	kfree(huff);
643	return ret;
644}
645
646#define DC_TABLE_INDEX0		    0
647#define AC_TABLE_INDEX0		    1
648#define DC_TABLE_INDEX1		    2
649#define AC_TABLE_INDEX1		    3
650
651static u8 *coda9_jpeg_get_huff_bits(struct coda_ctx *ctx, int tab_num)
652{
653	struct coda_huff_tab *huff_tab = ctx->params.jpeg_huff_tab;
654
655	if (!huff_tab)
656		return NULL;
657
658	switch (tab_num) {
659	case DC_TABLE_INDEX0: return huff_tab->luma_dc;
660	case AC_TABLE_INDEX0: return huff_tab->luma_ac;
661	case DC_TABLE_INDEX1: return huff_tab->chroma_dc;
662	case AC_TABLE_INDEX1: return huff_tab->chroma_ac;
663	}
664
665	return NULL;
666}
667
668static int coda9_jpeg_gen_dec_huff_tab(struct coda_ctx *ctx, int tab_num)
669{
670	int ptr_cnt = 0, huff_code = 0, zero_flag = 0, data_flag = 0;
671	u8 *huff_bits;
672	s16 *huff_max;
673	s16 *huff_min;
674	s8 *huff_ptr;
675	int ofs;
676	int i;
677
678	huff_bits = coda9_jpeg_get_huff_bits(ctx, tab_num);
679	if (!huff_bits)
680		return -EINVAL;
681
682	/* DC/AC Luma, DC/AC Chroma -> DC Luma/Chroma, AC Luma/Chroma */
683	ofs = ((tab_num & 1) << 1) | ((tab_num >> 1) & 1);
684	ofs *= 16;
685
686	huff_ptr = ctx->params.jpeg_huff_tab->ptr + ofs;
687	huff_max = ctx->params.jpeg_huff_tab->max + ofs;
688	huff_min = ctx->params.jpeg_huff_tab->min + ofs;
689
690	for (i = 0; i < 16; i++) {
691		if (huff_bits[i]) {
692			huff_ptr[i] = ptr_cnt;
693			ptr_cnt += huff_bits[i];
694			huff_min[i] = huff_code;
695			huff_max[i] = huff_code + (huff_bits[i] - 1);
696			data_flag = 1;
697			zero_flag = 0;
698		} else {
699			huff_ptr[i] = -1;
700			huff_min[i] = -1;
701			huff_max[i] = -1;
702			zero_flag = 1;
703		}
704
705		if (data_flag == 1) {
706			if (zero_flag == 1)
707				huff_code <<= 1;
708			else
709				huff_code = (huff_max[i] + 1) << 1;
710		}
711	}
712
713	return 0;
714}
715
716static int coda9_jpeg_load_huff_tab(struct coda_ctx *ctx)
717{
718	struct {
719		int size[4][256];
720		int code[4][256];
721	} *huff;
722	u32 *huff_data;
723	int i, j;
724	int ret;
725
726	huff = kzalloc(sizeof(*huff), GFP_KERNEL);
727	if (!huff)
728		return -ENOMEM;
729
730	/* Generate all four (luma/chroma DC/AC) code/size lookup tables */
731	for (i = 0; i < 4; i++) {
732		ret = coda9_jpeg_gen_enc_huff_tab(ctx, i, huff->size[i],
733						  huff->code[i]);
734		if (ret)
735			goto out;
736	}
737
738	if (!ctx->params.jpeg_huff_data) {
739		ctx->params.jpeg_huff_data =
740			kzalloc(sizeof(u32) * CODA9_JPEG_ENC_HUFF_DATA_SIZE,
741				GFP_KERNEL);
742		if (!ctx->params.jpeg_huff_data) {
743			ret = -ENOMEM;
744			goto out;
745		}
746	}
747	huff_data = ctx->params.jpeg_huff_data;
748
749	for (j = 0; j < 4; j++) {
750		/* Store Huffman lookup tables in AC0, AC1, DC0, DC1 order */
751		int t = (j == 0) ? AC_TABLE_INDEX0 :
752			(j == 1) ? AC_TABLE_INDEX1 :
753			(j == 2) ? DC_TABLE_INDEX0 :
754				   DC_TABLE_INDEX1;
755		/* DC tables only have 16 entries */
756		int len = (j < 2) ? 256 : 16;
757
758		for (i = 0; i < len; i++) {
759			if (huff->size[t][i] == 0 && huff->code[t][i] == 0)
760				*(huff_data++) = 0;
761			else
762				*(huff_data++) =
763					((huff->size[t][i] - 1) << 16) |
764					huff->code[t][i];
765		}
766	}
767
768	ret = 0;
769out:
770	kfree(huff);
771	return ret;
772}
773
774static void coda9_jpeg_write_huff_tab(struct coda_ctx *ctx)
775{
776	struct coda_dev *dev = ctx->dev;
777	u32 *huff_data = ctx->params.jpeg_huff_data;
778	int i;
779
780	/* Write Huffman size/code lookup tables in AC0, AC1, DC0, DC1 order */
781	coda_write(dev, 0x3, CODA9_REG_JPEG_HUFF_CTRL);
782	for (i = 0; i < CODA9_JPEG_ENC_HUFF_DATA_SIZE; i++)
783		coda_write(dev, *(huff_data++), CODA9_REG_JPEG_HUFF_DATA);
784	coda_write(dev, 0x0, CODA9_REG_JPEG_HUFF_CTRL);
785}
786
787static inline void coda9_jpeg_write_qmat_quotients(struct coda_dev *dev,
788						   u8 *qmat, int index)
789{
790	int i;
791
792	coda_write(dev, index | 0x3, CODA9_REG_JPEG_QMAT_CTRL);
793	for (i = 0; i < 64; i++)
794		coda_write(dev, 0x80000 / qmat[i], CODA9_REG_JPEG_QMAT_DATA);
795	coda_write(dev, index, CODA9_REG_JPEG_QMAT_CTRL);
796}
797
798static void coda9_jpeg_load_qmat_tab(struct coda_ctx *ctx)
799{
800	struct coda_dev *dev = ctx->dev;
801	u8 *luma_tab;
802	u8 *chroma_tab;
803
804	luma_tab = ctx->params.jpeg_qmat_tab[0];
805	if (!luma_tab)
806		luma_tab = luma_q;
807
808	chroma_tab = ctx->params.jpeg_qmat_tab[1];
809	if (!chroma_tab)
810		chroma_tab = chroma_q;
811
812	coda9_jpeg_write_qmat_quotients(dev, luma_tab, 0x00);
813	coda9_jpeg_write_qmat_quotients(dev, chroma_tab, 0x40);
814	coda9_jpeg_write_qmat_quotients(dev, chroma_tab, 0x80);
815}
816
817struct coda_jpeg_stream {
818	u8 *curr;
819	u8 *end;
820};
821
822static inline int coda_jpeg_put_byte(u8 byte, struct coda_jpeg_stream *stream)
823{
824	if (stream->curr >= stream->end)
825		return -EINVAL;
826
827	*stream->curr++ = byte;
828
829	return 0;
830}
831
832static inline int coda_jpeg_put_word(u16 word, struct coda_jpeg_stream *stream)
833{
834	if (stream->curr + sizeof(__be16) > stream->end)
835		return -EINVAL;
836
837	put_unaligned_be16(word, stream->curr);
838	stream->curr += sizeof(__be16);
839
840	return 0;
841}
842
843static int coda_jpeg_put_table(u16 marker, u8 index, const u8 *table,
844			       size_t len, struct coda_jpeg_stream *stream)
845{
846	int i, ret;
847
848	ret = coda_jpeg_put_word(marker, stream);
849	if (ret < 0)
850		return ret;
851	ret = coda_jpeg_put_word(3 + len, stream);
852	if (ret < 0)
853		return ret;
854	ret = coda_jpeg_put_byte(index, stream);
855	for (i = 0; i < len && ret == 0; i++)
856		ret = coda_jpeg_put_byte(table[i], stream);
857
858	return ret;
859}
860
861static int coda_jpeg_define_quantization_table(struct coda_ctx *ctx, u8 index,
862					       struct coda_jpeg_stream *stream)
863{
864	return coda_jpeg_put_table(DQT_MARKER, index,
865				   ctx->params.jpeg_qmat_tab[index], 64,
866				   stream);
867}
868
869static int coda_jpeg_define_huffman_table(u8 index, const u8 *table, size_t len,
870					  struct coda_jpeg_stream *stream)
871{
872	return coda_jpeg_put_table(DHT_MARKER, index, table, len, stream);
873}
874
875static int coda9_jpeg_encode_header(struct coda_ctx *ctx, int len, u8 *buf)
876{
877	struct coda_jpeg_stream stream = { buf, buf + len };
878	struct coda_q_data *q_data_src;
879	int chroma_format, comp_num;
880	int i, ret, pad;
881
882	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
883	chroma_format = coda9_jpeg_chroma_format(q_data_src->fourcc);
884	if (chroma_format < 0)
885		return 0;
886
887	/* Start Of Image */
888	ret = coda_jpeg_put_word(SOI_MARKER, &stream);
889	if (ret < 0)
890		return ret;
891
892	/* Define Restart Interval */
893	if (ctx->params.jpeg_restart_interval) {
894		ret = coda_jpeg_put_word(DRI_MARKER, &stream);
895		if (ret < 0)
896			return ret;
897		ret = coda_jpeg_put_word(4, &stream);
898		if (ret < 0)
899			return ret;
900		ret = coda_jpeg_put_word(ctx->params.jpeg_restart_interval,
901					 &stream);
902		if (ret < 0)
903			return ret;
904	}
905
906	/* Define Quantization Tables */
907	ret = coda_jpeg_define_quantization_table(ctx, 0x00, &stream);
908	if (ret < 0)
909		return ret;
910	if (chroma_format != CODA9_JPEG_FORMAT_400) {
911		ret = coda_jpeg_define_quantization_table(ctx, 0x01, &stream);
912		if (ret < 0)
913			return ret;
914	}
915
916	/* Define Huffman Tables */
917	ret = coda_jpeg_define_huffman_table(0x00, luma_dc, 16 + 12, &stream);
918	if (ret < 0)
919		return ret;
920	ret = coda_jpeg_define_huffman_table(0x10, luma_ac, 16 + 162, &stream);
921	if (ret < 0)
922		return ret;
923	if (chroma_format != CODA9_JPEG_FORMAT_400) {
924		ret = coda_jpeg_define_huffman_table(0x01, chroma_dc, 16 + 12,
925						     &stream);
926		if (ret < 0)
927			return ret;
928		ret = coda_jpeg_define_huffman_table(0x11, chroma_ac, 16 + 162,
929						     &stream);
930		if (ret < 0)
931			return ret;
932	}
933
934	/* Start Of Frame */
935	ret = coda_jpeg_put_word(SOF_MARKER, &stream);
936	if (ret < 0)
937		return ret;
938	comp_num = (chroma_format == CODA9_JPEG_FORMAT_400) ? 1 : 3;
939	ret = coda_jpeg_put_word(8 + comp_num * 3, &stream);
940	if (ret < 0)
941		return ret;
942	ret = coda_jpeg_put_byte(0x08, &stream);
943	if (ret < 0)
944		return ret;
945	ret = coda_jpeg_put_word(q_data_src->height, &stream);
946	if (ret < 0)
947		return ret;
948	ret = coda_jpeg_put_word(q_data_src->width, &stream);
949	if (ret < 0)
950		return ret;
951	ret = coda_jpeg_put_byte(comp_num, &stream);
952	if (ret < 0)
953		return ret;
954	for (i = 0; i < comp_num; i++) {
955		static unsigned char subsampling[5][3] = {
956			[CODA9_JPEG_FORMAT_420] = { 0x22, 0x11, 0x11 },
957			[CODA9_JPEG_FORMAT_422] = { 0x21, 0x11, 0x11 },
958			[CODA9_JPEG_FORMAT_224] = { 0x12, 0x11, 0x11 },
959			[CODA9_JPEG_FORMAT_444] = { 0x11, 0x11, 0x11 },
960			[CODA9_JPEG_FORMAT_400] = { 0x11 },
961		};
962
963		/* Component identifier, matches SOS */
964		ret = coda_jpeg_put_byte(i + 1, &stream);
965		if (ret < 0)
966			return ret;
967		ret = coda_jpeg_put_byte(subsampling[chroma_format][i],
968					 &stream);
969		if (ret < 0)
970			return ret;
971		/* Chroma table index */
972		ret = coda_jpeg_put_byte((i == 0) ? 0 : 1, &stream);
973		if (ret < 0)
974			return ret;
975	}
976
977	/* Pad to multiple of 8 bytes */
978	pad = (stream.curr - buf) % 8;
979	if (pad) {
980		pad = 8 - pad;
981		while (pad--) {
982			ret = coda_jpeg_put_byte(0x00, &stream);
983			if (ret < 0)
984				return ret;
985		}
986	}
987
988	return stream.curr - buf;
989}
990
991/*
992 * Scale quantization table using nonlinear scaling factor
993 * u8 qtab[64], scale [50,190]
994 */
995static void coda_scale_quant_table(u8 *q_tab, int scale)
996{
997	unsigned int temp;
998	int i;
999
1000	for (i = 0; i < 64; i++) {
1001		temp = DIV_ROUND_CLOSEST((unsigned int)q_tab[i] * scale, 100);
1002		if (temp <= 0)
1003			temp = 1;
1004		if (temp > 255)
1005			temp = 255;
1006		q_tab[i] = (unsigned char)temp;
1007	}
1008}
1009
1010void coda_set_jpeg_compression_quality(struct coda_ctx *ctx, int quality)
1011{
1012	unsigned int scale;
1013
1014	ctx->params.jpeg_quality = quality;
1015
1016	/* Clip quality setting to [5,100] interval */
1017	if (quality > 100)
1018		quality = 100;
1019	if (quality < 5)
1020		quality = 5;
1021
1022	/*
1023	 * Non-linear scaling factor:
1024	 * [5,50] -> [1000..100], [51,100] -> [98..0]
1025	 */
1026	if (quality < 50)
1027		scale = 5000 / quality;
1028	else
1029		scale = 200 - 2 * quality;
1030
1031	if (ctx->params.jpeg_qmat_tab[0]) {
1032		memcpy(ctx->params.jpeg_qmat_tab[0], luma_q, 64);
1033		coda_scale_quant_table(ctx->params.jpeg_qmat_tab[0], scale);
1034	}
1035	if (ctx->params.jpeg_qmat_tab[1]) {
1036		memcpy(ctx->params.jpeg_qmat_tab[1], chroma_q, 64);
1037		coda_scale_quant_table(ctx->params.jpeg_qmat_tab[1], scale);
1038	}
1039}
1040
1041/*
1042 * Encoder context operations
1043 */
1044
1045static int coda9_jpeg_start_encoding(struct coda_ctx *ctx)
1046{
1047	struct coda_dev *dev = ctx->dev;
1048	int ret;
1049
1050	ret = coda9_jpeg_load_huff_tab(ctx);
1051	if (ret < 0) {
1052		v4l2_err(&dev->v4l2_dev, "error loading Huffman tables\n");
1053		return ret;
1054	}
1055	if (!ctx->params.jpeg_qmat_tab[0]) {
1056		ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
1057		if (!ctx->params.jpeg_qmat_tab[0])
1058			return -ENOMEM;
1059	}
1060	if (!ctx->params.jpeg_qmat_tab[1]) {
1061		ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
1062		if (!ctx->params.jpeg_qmat_tab[1])
1063			return -ENOMEM;
1064	}
1065	coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
1066
1067	return 0;
1068}
1069
1070static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx)
1071{
1072	struct coda_q_data *q_data_src;
1073	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1074	struct coda_dev *dev = ctx->dev;
1075	u32 start_addr, end_addr;
1076	u16 aligned_width, aligned_height;
1077	bool chroma_interleave;
1078	int chroma_format;
1079	int header_len;
1080	int ret;
1081	ktime_t timeout;
1082
1083	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1084	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1085	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1086
1087	if (vb2_get_plane_payload(&src_buf->vb2_buf, 0) == 0)
1088		vb2_set_plane_payload(&src_buf->vb2_buf, 0,
1089				      vb2_plane_size(&src_buf->vb2_buf, 0));
1090
1091	src_buf->sequence = ctx->osequence;
1092	dst_buf->sequence = ctx->osequence;
1093	ctx->osequence++;
1094
1095	src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1096	src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1097
1098	coda_set_gdi_regs(ctx);
1099
1100	start_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1101	end_addr = start_addr + vb2_plane_size(&dst_buf->vb2_buf, 0);
1102
1103	chroma_format = coda9_jpeg_chroma_format(q_data_src->fourcc);
1104	if (chroma_format < 0)
1105		return chroma_format;
1106
1107	/* Round image dimensions to multiple of MCU size */
1108	aligned_width = round_up(q_data_src->width, width_align[chroma_format]);
1109	aligned_height = round_up(q_data_src->height,
1110				  height_align[chroma_format]);
1111	if (aligned_width != q_data_src->bytesperline) {
1112		v4l2_err(&dev->v4l2_dev, "wrong stride: %d instead of %d\n",
1113			 aligned_width, q_data_src->bytesperline);
1114	}
1115
1116	header_len =
1117		coda9_jpeg_encode_header(ctx,
1118					 vb2_plane_size(&dst_buf->vb2_buf, 0),
1119					 vb2_plane_vaddr(&dst_buf->vb2_buf, 0));
1120	if (header_len < 0)
1121		return header_len;
1122
1123	coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_BAS_ADDR);
1124	coda_write(dev, end_addr, CODA9_REG_JPEG_BBC_END_ADDR);
1125	coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_WR_PTR);
1126	coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_RD_PTR);
1127	coda_write(dev, 0, CODA9_REG_JPEG_BBC_CUR_POS);
1128	/* 64 words per 256-byte page */
1129	coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
1130	coda_write(dev, start_addr, CODA9_REG_JPEG_BBC_EXT_ADDR);
1131	coda_write(dev, 0, CODA9_REG_JPEG_BBC_INT_ADDR);
1132
1133	coda_write(dev, 0, CODA9_REG_JPEG_GBU_BT_PTR);
1134	coda_write(dev, 0, CODA9_REG_JPEG_GBU_WD_PTR);
1135	coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR);
1136	coda_write(dev, BIT(31) | ((end_addr - start_addr - header_len) / 256),
1137		   CODA9_REG_JPEG_BBC_STRM_CTRL);
1138	coda_write(dev, 0, CODA9_REG_JPEG_GBU_CTRL);
1139	coda_write(dev, 0, CODA9_REG_JPEG_GBU_FF_RPTR);
1140	coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER);
1141	coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBIR);
1142	coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBHR);
1143
1144	chroma_interleave = (q_data_src->fourcc == V4L2_PIX_FMT_NV12);
1145	coda_write(dev, CODA9_JPEG_PIC_CTRL_TC_DIRECTION |
1146		   CODA9_JPEG_PIC_CTRL_ENCODER_EN, CODA9_REG_JPEG_PIC_CTRL);
1147	coda_write(dev, 0, CODA9_REG_JPEG_SCL_INFO);
1148	coda_write(dev, chroma_interleave, CODA9_REG_JPEG_DPB_CONFIG);
1149	coda_write(dev, ctx->params.jpeg_restart_interval,
1150		   CODA9_REG_JPEG_RST_INTVAL);
1151	coda_write(dev, 1, CODA9_REG_JPEG_BBC_CTRL);
1152
1153	coda_write(dev, bus_req_num[chroma_format], CODA9_REG_JPEG_OP_INFO);
1154
1155	coda9_jpeg_write_huff_tab(ctx);
1156	coda9_jpeg_load_qmat_tab(ctx);
1157
1158	if (ctx->params.rot_mode & CODA_ROT_90) {
1159		aligned_width = aligned_height;
1160		aligned_height = q_data_src->bytesperline;
1161		if (chroma_format == CODA9_JPEG_FORMAT_422)
1162			chroma_format = CODA9_JPEG_FORMAT_224;
1163		else if (chroma_format == CODA9_JPEG_FORMAT_224)
1164			chroma_format = CODA9_JPEG_FORMAT_422;
1165	}
1166	/* These need to be multiples of MCU size */
1167	coda_write(dev, aligned_width << 16 | aligned_height,
1168		   CODA9_REG_JPEG_PIC_SIZE);
1169	coda_write(dev, ctx->params.rot_mode ?
1170		   (CODA_ROT_MIR_ENABLE | ctx->params.rot_mode) : 0,
1171		   CODA9_REG_JPEG_ROT_INFO);
1172
1173	coda_write(dev, mcu_info[chroma_format], CODA9_REG_JPEG_MCU_INFO);
1174
1175	coda_write(dev, 1, CODA9_GDI_CONTROL);
1176	timeout = ktime_add_us(ktime_get(), 100000);
1177	do {
1178		ret = coda_read(dev, CODA9_GDI_STATUS);
1179		if (ktime_compare(ktime_get(), timeout) > 0) {
1180			v4l2_err(&dev->v4l2_dev, "timeout waiting for GDI\n");
1181			return -ETIMEDOUT;
1182		}
1183	} while (!ret);
1184
1185	coda_write(dev, (chroma_format << 17) | (chroma_interleave << 16) |
1186		   q_data_src->bytesperline, CODA9_GDI_INFO_CONTROL);
1187	/* The content of this register seems to be irrelevant: */
1188	coda_write(dev, aligned_width << 16 | aligned_height,
1189		   CODA9_GDI_INFO_PIC_SIZE);
1190
1191	coda_write_base(ctx, q_data_src, src_buf, CODA9_GDI_INFO_BASE_Y);
1192
1193	coda_write(dev, 0, CODA9_REG_JPEG_DPB_BASE00);
1194	coda_write(dev, 0, CODA9_GDI_CONTROL);
1195	coda_write(dev, 1, CODA9_GDI_PIC_INIT_HOST);
1196
1197	coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
1198	coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
1199
1200	trace_coda_jpeg_run(ctx, src_buf);
1201
1202	coda_write(dev, 1, CODA9_REG_JPEG_PIC_START);
1203
1204	return 0;
1205}
1206
1207static void coda9_jpeg_finish_encode(struct coda_ctx *ctx)
1208{
1209	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1210	struct coda_dev *dev = ctx->dev;
1211	u32 wr_ptr, start_ptr;
1212	u32 err_mb;
1213
1214	if (ctx->aborting) {
1215		coda_write(ctx->dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1216		return;
1217	}
1218
1219	/*
1220	 * Lock to make sure that an encoder stop command running in parallel
1221	 * will either already have marked src_buf as last, or it will wake up
1222	 * the capture queue after the buffers are returned.
1223	 */
1224	mutex_lock(&ctx->wakeup_mutex);
1225	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1226	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1227
1228	trace_coda_jpeg_done(ctx, dst_buf);
1229
1230	/*
1231	 * Set plane payload to the number of bytes written out
1232	 * by the JPEG processing unit
1233	 */
1234	start_ptr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1235	wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR);
1236	vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1237
1238	err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1239	if (err_mb)
1240		coda_dbg(1, ctx, "ERRMB: 0x%x\n", err_mb);
1241
1242	coda_write(dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1243
1244	dst_buf->flags &= ~(V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_LAST);
1245	dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1246	dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST;
1247
1248	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false);
1249
1250	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1251	coda_m2m_buf_done(ctx, dst_buf, err_mb ? VB2_BUF_STATE_ERROR :
1252						 VB2_BUF_STATE_DONE);
1253	mutex_unlock(&ctx->wakeup_mutex);
1254
1255	coda_dbg(1, ctx, "job finished: encoded frame (%u)%s\n",
1256		 dst_buf->sequence,
1257		 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : "");
1258
1259	/*
1260	 * Reset JPEG processing unit after each encode run to work
1261	 * around hangups when switching context between encoder and
1262	 * decoder.
1263	 */
1264	coda_hw_reset(ctx);
1265}
1266
1267static void coda9_jpeg_encode_timeout(struct coda_ctx *ctx)
1268{
1269	struct coda_dev *dev = ctx->dev;
1270	u32 end_addr, wr_ptr;
1271
1272	/* Handle missing BBC overflow interrupt via timeout */
1273	end_addr = coda_read(dev, CODA9_REG_JPEG_BBC_END_ADDR);
1274	wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR);
1275	if (wr_ptr >= end_addr - 256) {
1276		v4l2_err(&dev->v4l2_dev, "JPEG too large for capture buffer\n");
1277		coda9_jpeg_finish_encode(ctx);
1278		return;
1279	}
1280
1281	coda_hw_reset(ctx);
1282}
1283
1284static void coda9_jpeg_release(struct coda_ctx *ctx)
1285{
1286	int i;
1287
1288	if (ctx->params.jpeg_qmat_tab[0] == luma_q)
1289		ctx->params.jpeg_qmat_tab[0] = NULL;
1290	if (ctx->params.jpeg_qmat_tab[1] == chroma_q)
1291		ctx->params.jpeg_qmat_tab[1] = NULL;
1292	for (i = 0; i < 3; i++)
1293		kfree(ctx->params.jpeg_qmat_tab[i]);
1294	kfree(ctx->params.jpeg_huff_data);
1295	kfree(ctx->params.jpeg_huff_tab);
1296}
1297
1298const struct coda_context_ops coda9_jpeg_encode_ops = {
1299	.queue_init = coda_encoder_queue_init,
1300	.start_streaming = coda9_jpeg_start_encoding,
1301	.prepare_run = coda9_jpeg_prepare_encode,
1302	.finish_run = coda9_jpeg_finish_encode,
1303	.run_timeout = coda9_jpeg_encode_timeout,
1304	.release = coda9_jpeg_release,
1305};
1306
1307/*
1308 * Decoder context operations
1309 */
1310
1311static int coda9_jpeg_start_decoding(struct coda_ctx *ctx)
1312{
1313	ctx->params.jpeg_qmat_index[0] = 0;
1314	ctx->params.jpeg_qmat_index[1] = 1;
1315	ctx->params.jpeg_qmat_index[2] = 1;
1316	ctx->params.jpeg_qmat_tab[0] = luma_q;
1317	ctx->params.jpeg_qmat_tab[1] = chroma_q;
1318	/* nothing more to do here */
1319
1320	/* TODO: we could already scan the first header to get the chroma
1321	 * format.
1322	 */
1323
1324	return 0;
1325}
1326
1327static int coda9_jpeg_prepare_decode(struct coda_ctx *ctx)
1328{
1329	struct coda_dev *dev = ctx->dev;
1330	int aligned_width, aligned_height;
1331	int chroma_format;
1332	int ret;
1333	u32 val, dst_fourcc;
1334	struct coda_q_data *q_data_src, *q_data_dst;
1335	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1336	int chroma_interleave;
1337
1338	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1339	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1340	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1341	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1342	dst_fourcc = q_data_dst->fourcc;
1343
1344	if (vb2_get_plane_payload(&src_buf->vb2_buf, 0) == 0)
1345		vb2_set_plane_payload(&src_buf->vb2_buf, 0,
1346				      vb2_plane_size(&src_buf->vb2_buf, 0));
1347
1348	chroma_format = coda9_jpeg_chroma_format(q_data_dst->fourcc);
1349	if (chroma_format < 0) {
1350		v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1351		return chroma_format;
1352	}
1353
1354	ret = coda_jpeg_decode_header(ctx, &src_buf->vb2_buf);
1355	if (ret < 0) {
1356		v4l2_err(&dev->v4l2_dev, "failed to decode JPEG header: %d\n",
1357			 ret);
1358
1359		src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1360		dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1361		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1362		v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
1363
1364		v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1365		return ret;
1366	}
1367
1368	/* Round image dimensions to multiple of MCU size */
1369	aligned_width = round_up(q_data_src->width, width_align[chroma_format]);
1370	aligned_height = round_up(q_data_src->height, height_align[chroma_format]);
1371	if (aligned_width != q_data_dst->bytesperline) {
1372		v4l2_err(&dev->v4l2_dev, "stride mismatch: %d != %d\n",
1373			 aligned_width, q_data_dst->bytesperline);
1374	}
1375
1376	coda_set_gdi_regs(ctx);
1377
1378	val = ctx->params.jpeg_huff_ac_index[0] << 12 |
1379	      ctx->params.jpeg_huff_ac_index[1] << 11 |
1380	      ctx->params.jpeg_huff_ac_index[2] << 10 |
1381	      ctx->params.jpeg_huff_dc_index[0] << 9 |
1382	      ctx->params.jpeg_huff_dc_index[1] << 8 |
1383	      ctx->params.jpeg_huff_dc_index[2] << 7;
1384	if (ctx->params.jpeg_huff_tab)
1385		val |= CODA9_JPEG_PIC_CTRL_USER_HUFFMAN_EN;
1386	coda_write(dev, val, CODA9_REG_JPEG_PIC_CTRL);
1387
1388	coda_write(dev, aligned_width << 16 | aligned_height,
1389			CODA9_REG_JPEG_PIC_SIZE);
1390
1391	chroma_interleave = (dst_fourcc == V4L2_PIX_FMT_NV12);
1392	coda_write(dev, 0, CODA9_REG_JPEG_ROT_INFO);
1393	coda_write(dev, bus_req_num[chroma_format], CODA9_REG_JPEG_OP_INFO);
1394	coda_write(dev, mcu_info[chroma_format], CODA9_REG_JPEG_MCU_INFO);
1395	coda_write(dev, 0, CODA9_REG_JPEG_SCL_INFO);
1396	coda_write(dev, chroma_interleave, CODA9_REG_JPEG_DPB_CONFIG);
1397	coda_write(dev, ctx->params.jpeg_restart_interval,
1398			CODA9_REG_JPEG_RST_INTVAL);
1399
1400	if (ctx->params.jpeg_huff_tab) {
1401		ret = coda9_jpeg_dec_huff_setup(ctx);
1402		if (ret < 0) {
1403			v4l2_err(&dev->v4l2_dev,
1404				 "failed to set up Huffman tables: %d\n", ret);
1405			v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1406			return ret;
1407		}
1408	}
1409
1410	coda9_jpeg_qmat_setup(ctx);
1411
1412	coda9_jpeg_dec_bbc_gbu_setup(ctx, &src_buf->vb2_buf,
1413				     ctx->jpeg_ecs_offset);
1414
1415	coda_write(dev, 0, CODA9_REG_JPEG_RST_INDEX);
1416	coda_write(dev, 0, CODA9_REG_JPEG_RST_COUNT);
1417
1418	coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_Y);
1419	coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_CB);
1420	coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_CR);
1421
1422	coda_write(dev, 0, CODA9_REG_JPEG_ROT_INFO);
1423
1424	coda_write(dev, 1, CODA9_GDI_CONTROL);
1425	do {
1426		ret = coda_read(dev, CODA9_GDI_STATUS);
1427	} while (!ret);
1428
1429	val = (chroma_format << 17) | (chroma_interleave << 16) |
1430	      q_data_dst->bytesperline;
1431	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1432		val |= 3 << 20;
1433	coda_write(dev, val, CODA9_GDI_INFO_CONTROL);
1434
1435	coda_write(dev, aligned_width << 16 | aligned_height,
1436			CODA9_GDI_INFO_PIC_SIZE);
1437
1438	coda_write_base(ctx, q_data_dst, dst_buf, CODA9_GDI_INFO_BASE_Y);
1439
1440	coda_write(dev, 0, CODA9_REG_JPEG_DPB_BASE00);
1441	coda_write(dev, 0, CODA9_GDI_CONTROL);
1442	coda_write(dev, 1, CODA9_GDI_PIC_INIT_HOST);
1443
1444	trace_coda_jpeg_run(ctx, src_buf);
1445
1446	coda_write(dev, 1, CODA9_REG_JPEG_PIC_START);
1447
1448	return 0;
1449}
1450
1451static void coda9_jpeg_finish_decode(struct coda_ctx *ctx)
1452{
1453	struct coda_dev *dev = ctx->dev;
1454	struct vb2_v4l2_buffer *dst_buf, *src_buf;
1455	struct coda_q_data *q_data_dst;
1456	u32 err_mb;
1457
1458	err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1459	if (err_mb)
1460		v4l2_err(&dev->v4l2_dev, "ERRMB: 0x%x\n", err_mb);
1461
1462	coda_write(dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1463
1464	/*
1465	 * Lock to make sure that a decoder stop command running in parallel
1466	 * will either already have marked src_buf as last, or it will wake up
1467	 * the capture queue after the buffers are returned.
1468	 */
1469	mutex_lock(&ctx->wakeup_mutex);
1470	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1471	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1472	dst_buf->sequence = ctx->osequence++;
1473
1474	trace_coda_jpeg_done(ctx, dst_buf);
1475
1476	dst_buf->flags &= ~(V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_LAST);
1477	dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1478	dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST;
1479
1480	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false);
1481
1482	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1483	vb2_set_plane_payload(&dst_buf->vb2_buf, 0, q_data_dst->sizeimage);
1484
1485	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1486	coda_m2m_buf_done(ctx, dst_buf, err_mb ? VB2_BUF_STATE_ERROR :
1487						 VB2_BUF_STATE_DONE);
1488
1489	mutex_unlock(&ctx->wakeup_mutex);
1490
1491	coda_dbg(1, ctx, "job finished: decoded frame (%u)%s\n",
1492		 dst_buf->sequence,
1493		 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : "");
1494
1495	/*
1496	 * Reset JPEG processing unit after each decode run to work
1497	 * around hangups when switching context between encoder and
1498	 * decoder.
1499	 */
1500	coda_hw_reset(ctx);
1501}
1502
1503const struct coda_context_ops coda9_jpeg_decode_ops = {
1504	.queue_init = coda_encoder_queue_init, /* non-bitstream operation */
1505	.start_streaming = coda9_jpeg_start_decoding,
1506	.prepare_run = coda9_jpeg_prepare_decode,
1507	.finish_run = coda9_jpeg_finish_decode,
1508	.release = coda9_jpeg_release,
1509};
1510
1511irqreturn_t coda9_jpeg_irq_handler(int irq, void *data)
1512{
1513	struct coda_dev *dev = data;
1514	struct coda_ctx *ctx;
1515	int status;
1516	int err_mb;
1517
1518	status = coda_read(dev, CODA9_REG_JPEG_PIC_STATUS);
1519	if (status == 0)
1520		return IRQ_HANDLED;
1521	coda_write(dev, status, CODA9_REG_JPEG_PIC_STATUS);
1522
1523	if (status & CODA9_JPEG_STATUS_OVERFLOW)
1524		v4l2_err(&dev->v4l2_dev, "JPEG overflow\n");
1525
1526	if (status & CODA9_JPEG_STATUS_BBC_INT)
1527		v4l2_err(&dev->v4l2_dev, "JPEG BBC interrupt\n");
1528
1529	if (status & CODA9_JPEG_STATUS_ERROR) {
1530		v4l2_err(&dev->v4l2_dev, "JPEG error\n");
1531
1532		err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1533		if (err_mb) {
1534			v4l2_err(&dev->v4l2_dev,
1535				 "ERRMB: 0x%x: rst idx %d, mcu pos (%d,%d)\n",
1536				 err_mb, err_mb >> 24, (err_mb >> 12) & 0xfff,
1537				 err_mb & 0xfff);
1538		}
1539	}
1540
1541	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1542	if (!ctx) {
1543		v4l2_err(&dev->v4l2_dev,
1544			 "Instance released before the end of transaction\n");
1545		mutex_unlock(&dev->coda_mutex);
1546		return IRQ_HANDLED;
1547	}
1548
1549	complete(&ctx->completion);
1550
1551	return IRQ_HANDLED;
1552}
1553