1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2019 NXP.
4 *
5 * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
6 */
7
8#include <linux/device.h>
9#include <linux/slab.h>
10
11#include "dcss-dev.h"
12
13#define DCSS_SCALER_CTRL			0x00
14#define   SCALER_EN				BIT(0)
15#define   REPEAT_EN				BIT(4)
16#define   SCALE2MEM_EN				BIT(8)
17#define   MEM2OFIFO_EN				BIT(12)
18#define DCSS_SCALER_OFIFO_CTRL			0x04
19#define   OFIFO_LOW_THRES_POS			0
20#define   OFIFO_LOW_THRES_MASK			GENMASK(9, 0)
21#define   OFIFO_HIGH_THRES_POS			16
22#define   OFIFO_HIGH_THRES_MASK			GENMASK(25, 16)
23#define   UNDERRUN_DETECT_CLR			BIT(26)
24#define   LOW_THRES_DETECT_CLR			BIT(27)
25#define   HIGH_THRES_DETECT_CLR			BIT(28)
26#define   UNDERRUN_DETECT_EN			BIT(29)
27#define   LOW_THRES_DETECT_EN			BIT(30)
28#define   HIGH_THRES_DETECT_EN			BIT(31)
29#define DCSS_SCALER_SDATA_CTRL			0x08
30#define   YUV_EN				BIT(0)
31#define   RTRAM_8LINES				BIT(1)
32#define   Y_UV_BYTE_SWAP			BIT(4)
33#define   A2R10G10B10_FORMAT_POS		8
34#define   A2R10G10B10_FORMAT_MASK		GENMASK(11, 8)
35#define DCSS_SCALER_BIT_DEPTH			0x0C
36#define   LUM_BIT_DEPTH_POS			0
37#define   LUM_BIT_DEPTH_MASK			GENMASK(1, 0)
38#define   CHR_BIT_DEPTH_POS			4
39#define   CHR_BIT_DEPTH_MASK			GENMASK(5, 4)
40#define DCSS_SCALER_SRC_FORMAT			0x10
41#define DCSS_SCALER_DST_FORMAT			0x14
42#define   FORMAT_MASK				GENMASK(1, 0)
43#define DCSS_SCALER_SRC_LUM_RES			0x18
44#define DCSS_SCALER_SRC_CHR_RES			0x1C
45#define DCSS_SCALER_DST_LUM_RES			0x20
46#define DCSS_SCALER_DST_CHR_RES			0x24
47#define   WIDTH_POS				0
48#define   WIDTH_MASK				GENMASK(11, 0)
49#define   HEIGHT_POS				16
50#define   HEIGHT_MASK				GENMASK(27, 16)
51#define DCSS_SCALER_V_LUM_START			0x48
52#define   V_START_MASK				GENMASK(15, 0)
53#define DCSS_SCALER_V_LUM_INC			0x4C
54#define   V_INC_MASK				GENMASK(15, 0)
55#define DCSS_SCALER_H_LUM_START			0x50
56#define   H_START_MASK				GENMASK(18, 0)
57#define DCSS_SCALER_H_LUM_INC			0x54
58#define   H_INC_MASK				GENMASK(15, 0)
59#define DCSS_SCALER_V_CHR_START			0x58
60#define DCSS_SCALER_V_CHR_INC			0x5C
61#define DCSS_SCALER_H_CHR_START			0x60
62#define DCSS_SCALER_H_CHR_INC			0x64
63#define DCSS_SCALER_COEF_VLUM			0x80
64#define DCSS_SCALER_COEF_HLUM			0x140
65#define DCSS_SCALER_COEF_VCHR			0x200
66#define DCSS_SCALER_COEF_HCHR			0x300
67
68struct dcss_scaler_ch {
69	void __iomem *base_reg;
70	u32 base_ofs;
71	struct dcss_scaler *scl;
72
73	u32 sdata_ctrl;
74	u32 scaler_ctrl;
75
76	bool scaler_ctrl_chgd;
77
78	u32 c_vstart;
79	u32 c_hstart;
80};
81
82struct dcss_scaler {
83	struct device *dev;
84
85	struct dcss_ctxld *ctxld;
86	u32 ctx_id;
87
88	struct dcss_scaler_ch ch[3];
89};
90
91/* scaler coefficients generator */
92#define PSC_FRAC_BITS 30
93#define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
94#define PSC_BITS_FOR_PHASE 4
95#define PSC_NUM_PHASES 16
96#define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
97#define PSC_NUM_TAPS 7
98#define PSC_NUM_TAPS_RGBA 5
99#define PSC_COEFF_PRECISION 10
100#define PSC_PHASE_FRACTION_BITS 13
101#define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
102#define PSC_Q_FRACTION 19
103#define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
104
105/**
106 * mult_q() - Performs fixed-point multiplication.
107 * @A: multiplier
108 * @B: multiplicand
109 */
110static int mult_q(int A, int B)
111{
112	int result;
113	s64 temp;
114
115	temp = (int64_t)A * (int64_t)B;
116	temp += PSC_Q_ROUND_OFFSET;
117	result = (int)(temp >> PSC_Q_FRACTION);
118	return result;
119}
120
121/**
122 * div_q() - Performs fixed-point division.
123 * @A: dividend
124 * @B: divisor
125 */
126static int div_q(int A, int B)
127{
128	int result;
129	s64 temp;
130
131	temp = (int64_t)A << PSC_Q_FRACTION;
132	if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
133		temp += B / 2;
134	else
135		temp -= B / 2;
136
137	result = (int)(temp / B);
138	return result;
139}
140
141/**
142 * exp_approx_q() - Compute approximation to exp(x) function using Taylor
143 *		    series.
144 * @x: fixed-point argument of exp function
145 */
146static int exp_approx_q(int x)
147{
148	int sum = 1 << PSC_Q_FRACTION;
149	int term = 1 << PSC_Q_FRACTION;
150
151	term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
152	sum += term;
153	term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
154	sum += term;
155	term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
156	sum += term;
157	term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
158	sum += term;
159
160	return sum;
161}
162
163/**
164 * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
165 * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
166 * @use_5_taps: indicates whether to use 5 taps or 7 taps
167 * @coef: output filter coefficients
168 */
169static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
170					bool phase0_identity,
171					int coef[][PSC_NUM_TAPS])
172{
173	int sigma_q, g0_q, g1_q, g2_q;
174	int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
175	int mid;
176	int phase;
177	int i;
178	int taps;
179
180	if (use_5_taps)
181		for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
182			coef[phase][0] = 0;
183			coef[phase][PSC_NUM_TAPS - 1] = 0;
184		}
185
186	/* seed coefficient scanner */
187	taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
188	mid = (PSC_NUM_PHASES * taps) / 2 - 1;
189	phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
190	tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
191	tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
192
193	/* seed gaussian filter generator */
194	sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
195	g0_q = 1 << PSC_Q_FRACTION;
196	g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
197				  mult_q(sigma_q, sigma_q)));
198	g2_q = mult_q(g1_q, g1_q);
199	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
200
201	for (i = 0; i < mid; i++) {
202		phase_cnt++;
203		tap_cnt1--;
204		tap_cnt2++;
205
206		g0_q = mult_q(g0_q, g1_q);
207		g1_q = mult_q(g1_q, g2_q);
208
209		if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
210			tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
211			coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
212		}
213		if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
214			tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
215			coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
216		}
217	}
218
219	phase_cnt++;
220	tap_cnt1--;
221	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
222
223	/* override phase 0 with identity filter if specified */
224	if (phase0_identity)
225		for (i = 0; i < PSC_NUM_TAPS; i++)
226			coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
227						(1 << PSC_COEFF_PRECISION) : 0;
228
229	/* normalize coef */
230	for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
231		int sum = 0;
232		s64 ll_temp;
233
234		for (i = 0; i < PSC_NUM_TAPS; i++)
235			sum += coef[phase][i];
236		for (i = 0; i < PSC_NUM_TAPS; i++) {
237			ll_temp = coef[phase][i];
238			ll_temp <<= PSC_COEFF_PRECISION;
239			ll_temp += sum >> 1;
240			ll_temp /= sum;
241			coef[phase][i] = (int)ll_temp;
242		}
243	}
244}
245
246/**
247 * dcss_scaler_filter_design() - Compute filter coefficients using
248 *				 Gaussian filter.
249 * @src_length: length of input
250 * @dst_length: length of output
251 * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
252 * @coef: output coefficients
253 */
254static void dcss_scaler_filter_design(int src_length, int dst_length,
255				      bool use_5_taps, bool phase0_identity,
256				      int coef[][PSC_NUM_TAPS])
257{
258	int fc_q;
259
260	/* compute cutoff frequency */
261	if (dst_length >= src_length)
262		fc_q = div_q(1, PSC_NUM_PHASES);
263	else
264		fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
265
266	/* compute gaussian filter coefficients */
267	dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
268}
269
270static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
271{
272	struct dcss_scaler *scl = ch->scl;
273
274	dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
275}
276
277static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
278				   unsigned long scaler_base)
279{
280	struct dcss_scaler_ch *ch;
281	int i;
282
283	for (i = 0; i < 3; i++) {
284		ch = &scl->ch[i];
285
286		ch->base_ofs = scaler_base + i * 0x400;
287
288		ch->base_reg = ioremap(ch->base_ofs, SZ_4K);
289		if (!ch->base_reg) {
290			dev_err(scl->dev, "scaler: unable to remap ch base\n");
291			return -ENOMEM;
292		}
293
294		ch->scl = scl;
295	}
296
297	return 0;
298}
299
300int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
301{
302	struct dcss_scaler *scaler;
303
304	scaler = kzalloc(sizeof(*scaler), GFP_KERNEL);
305	if (!scaler)
306		return -ENOMEM;
307
308	dcss->scaler = scaler;
309	scaler->dev = dcss->dev;
310	scaler->ctxld = dcss->ctxld;
311	scaler->ctx_id = CTX_SB_HP;
312
313	if (dcss_scaler_ch_init_all(scaler, scaler_base)) {
314		int i;
315
316		for (i = 0; i < 3; i++) {
317			if (scaler->ch[i].base_reg)
318				iounmap(scaler->ch[i].base_reg);
319		}
320
321		kfree(scaler);
322
323		return -ENOMEM;
324	}
325
326	return 0;
327}
328
329void dcss_scaler_exit(struct dcss_scaler *scl)
330{
331	int ch_no;
332
333	for (ch_no = 0; ch_no < 3; ch_no++) {
334		struct dcss_scaler_ch *ch = &scl->ch[ch_no];
335
336		dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
337
338		if (ch->base_reg)
339			iounmap(ch->base_reg);
340	}
341
342	kfree(scl);
343}
344
345void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
346{
347	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
348	u32 scaler_ctrl;
349
350	scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
351
352	if (en)
353		dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
354
355	if (ch->scaler_ctrl != scaler_ctrl)
356		ch->scaler_ctrl_chgd = true;
357
358	ch->scaler_ctrl = scaler_ctrl;
359}
360
361static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
362{
363	ch->sdata_ctrl &= ~YUV_EN;
364	ch->sdata_ctrl |= en ? YUV_EN : 0;
365}
366
367static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
368{
369	ch->sdata_ctrl &= ~RTRAM_8LINES;
370	ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
371}
372
373static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
374{
375	u32 val;
376
377	val = depth == 30 ? 2 : 0;
378
379	dcss_scaler_write(ch,
380			  ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
381			  ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
382			  DCSS_SCALER_BIT_DEPTH);
383}
384
385enum buffer_format {
386	BUF_FMT_YUV420,
387	BUF_FMT_YUV422,
388	BUF_FMT_ARGB8888_YUV444,
389};
390
391enum chroma_location {
392	PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
393	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
394	PSC_LOC_HORZ_0_VERT_0 = 2,
395	PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
396	PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
397	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
398};
399
400static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
401				   enum buffer_format src_fmt,
402				   enum buffer_format dst_fmt)
403{
404	dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
405	dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
406}
407
408static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
409				int src_xres, int src_yres,
410				int dst_xres, int dst_yres,
411				u32 pix_format, enum buffer_format dst_format)
412{
413	u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
414	u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
415	bool src_is_444 = true;
416
417	lsrc_xres = src_xres;
418	csrc_xres = src_xres;
419	lsrc_yres = src_yres;
420	csrc_yres = src_yres;
421	ldst_xres = dst_xres;
422	cdst_xres = dst_xres;
423	ldst_yres = dst_yres;
424	cdst_yres = dst_yres;
425
426	if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
427	    pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
428		csrc_xres >>= 1;
429		src_is_444 = false;
430	} else if (pix_format == DRM_FORMAT_NV12 ||
431		   pix_format == DRM_FORMAT_NV21) {
432		csrc_xres >>= 1;
433		csrc_yres >>= 1;
434		src_is_444 = false;
435	}
436
437	if (dst_format == BUF_FMT_YUV422)
438		cdst_xres >>= 1;
439
440	/* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
441	if (src_is_444 && dst_format == BUF_FMT_YUV422) {
442		lsrc_yres--;
443		csrc_yres--;
444	}
445
446	dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
447			       (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
448			  DCSS_SCALER_SRC_LUM_RES);
449	dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
450			       (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
451			  DCSS_SCALER_SRC_CHR_RES);
452	dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
453			       (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
454			  DCSS_SCALER_DST_LUM_RES);
455	dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
456			       (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
457			  DCSS_SCALER_DST_CHR_RES);
458}
459
460#define downscale_fp(factor, fp_pos)		((factor) << (fp_pos))
461#define upscale_fp(factor, fp_pos)		((1 << (fp_pos)) / (factor))
462
463struct dcss_scaler_factors {
464	int downscale;
465	int upscale;
466};
467
468static const struct dcss_scaler_factors dcss_scaler_factors[] = {
469	{3, 8}, {5, 8}, {5, 8},
470};
471
472static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
473				      int src_xres, int src_yres,
474				      int dst_xres, int dst_yres,
475				      u32 src_format, u32 dst_format,
476				      enum chroma_location src_chroma_loc)
477{
478	int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
479	u32 l_vinc, l_hinc, c_vinc, c_hinc;
480	u32 c_vstart, c_hstart;
481
482	src_c_xres = src_xres;
483	src_c_yres = src_yres;
484	dst_c_xres = dst_xres;
485	dst_c_yres = dst_yres;
486
487	c_vstart = 0;
488	c_hstart = 0;
489
490	/* adjustments for source chroma location */
491	if (src_format == BUF_FMT_YUV420) {
492		/* vertical input chroma position adjustment */
493		switch (src_chroma_loc) {
494		case PSC_LOC_HORZ_0_VERT_1_OVER_4:
495		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
496			/*
497			 * move chroma up to first luma line
498			 * (1/4 chroma input line spacing)
499			 */
500			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
501			break;
502		case PSC_LOC_HORZ_0_VERT_1_OVER_2:
503		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
504			/*
505			 * move chroma up to first luma line
506			 * (1/2 chroma input line spacing)
507			 */
508			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
509			break;
510		default:
511			break;
512		}
513		/* horizontal input chroma position adjustment */
514		switch (src_chroma_loc) {
515		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
516		case PSC_LOC_HORZ_1_OVER_4_VERT_0:
517		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
518			/* move chroma left 1/4 chroma input sample spacing */
519			c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
520			break;
521		default:
522			break;
523		}
524	}
525
526	/* adjustments to chroma resolution */
527	if (src_format == BUF_FMT_YUV420) {
528		src_c_xres >>= 1;
529		src_c_yres >>= 1;
530	} else if (src_format == BUF_FMT_YUV422) {
531		src_c_xres >>= 1;
532	}
533
534	if (dst_format == BUF_FMT_YUV422)
535		dst_c_xres >>= 1;
536
537	l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
538	c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
539	l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
540	c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
541
542	/* save chroma start phase */
543	ch->c_vstart = c_vstart;
544	ch->c_hstart = c_hstart;
545
546	dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
547	dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
548
549	dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
550	dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
551
552	dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
553	dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
554
555	dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
556	dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
557}
558
559int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
560				   int *min, int *max)
561{
562	*min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
563	*max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
564
565	return 0;
566}
567
568static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
569					   int base_addr,
570					   int coef[][PSC_NUM_TAPS])
571{
572	int i, phase;
573
574	for (i = 0; i < PSC_STORED_PHASES; i++) {
575		dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
576				       (coef[i][2] & 0xfff) << 4  |
577				       (coef[i][3] & 0xf00) >> 8),
578				  base_addr + i * sizeof(u32));
579		dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
580				       (coef[i][4] & 0xfff) << 8  |
581				       (coef[i][5] & 0xff0) >> 4),
582				  base_addr + 0x40 + i * sizeof(u32));
583		dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
584				  base_addr + 0x80 + i * sizeof(u32));
585	}
586
587	/* reverse both phase and tap orderings */
588	for (phase = (PSC_NUM_PHASES >> 1) - 1;
589			i < PSC_NUM_PHASES; i++, phase--) {
590		dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
591				       (coef[phase][4] & 0xfff) << 4  |
592				       (coef[phase][3] & 0xf00) >> 8),
593				  base_addr + i * sizeof(u32));
594		dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
595				       (coef[phase][2] & 0xfff) << 8  |
596				       (coef[phase][1] & 0xff0) >> 4),
597				  base_addr + 0x40 + i * sizeof(u32));
598		dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
599				  base_addr + 0x80 + i * sizeof(u32));
600	}
601}
602
603static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
604					   int base_addr,
605					   int coef[][PSC_NUM_TAPS])
606{
607	int i, phase;
608
609	for (i = 0; i < PSC_STORED_PHASES; i++) {
610		dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
611				       (coef[i][1] & 0xfff) << 4  |
612				       (coef[i][2] & 0xf00) >> 8),
613				  base_addr + i * sizeof(u32));
614		dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
615				       (coef[i][3] & 0xfff) << 8  |
616				       (coef[i][4] & 0xff0) >> 4),
617				  base_addr + 0x40 + i * sizeof(u32));
618		dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
619				       (coef[i][5] & 0xfff) << 12 |
620				       (coef[i][6] & 0xfff)),
621				  base_addr + 0x80 + i * sizeof(u32));
622	}
623
624	/* reverse both phase and tap orderings */
625	for (phase = (PSC_NUM_PHASES >> 1) - 1;
626			i < PSC_NUM_PHASES; i++, phase--) {
627		dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
628				       (coef[phase][5] & 0xfff) << 4  |
629				       (coef[phase][4] & 0xf00) >> 8),
630				  base_addr + i * sizeof(u32));
631		dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
632				       (coef[phase][3] & 0xfff) << 8  |
633				       (coef[phase][2] & 0xff0) >> 4),
634				  base_addr + 0x40 + i * sizeof(u32));
635		dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
636				       (coef[phase][1] & 0xfff) << 12 |
637				       (coef[phase][0] & 0xfff)),
638				  base_addr + 0x80 + i * sizeof(u32));
639	}
640}
641
642static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
643				     enum buffer_format src_format,
644				     enum buffer_format dst_format,
645				     bool use_5_taps,
646				     int src_xres, int src_yres, int dst_xres,
647				     int dst_yres)
648{
649	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
650	bool program_5_taps = use_5_taps ||
651			      (dst_format == BUF_FMT_YUV422 &&
652			       src_format == BUF_FMT_ARGB8888_YUV444);
653
654	/* horizontal luma */
655	dcss_scaler_filter_design(src_xres, dst_xres, false,
656				  src_xres == dst_xres, coef);
657	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
658
659	/* vertical luma */
660	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
661				  src_yres == dst_yres, coef);
662
663	if (program_5_taps)
664		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
665	else
666		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
667
668	/* adjust chroma resolution */
669	if (src_format != BUF_FMT_ARGB8888_YUV444)
670		src_xres >>= 1;
671	if (src_format == BUF_FMT_YUV420)
672		src_yres >>= 1;
673	if (dst_format != BUF_FMT_ARGB8888_YUV444)
674		dst_xres >>= 1;
675	if (dst_format == BUF_FMT_YUV420) /* should not happen */
676		dst_yres >>= 1;
677
678	/* horizontal chroma */
679	dcss_scaler_filter_design(src_xres, dst_xres, false,
680				  (src_xres == dst_xres) && (ch->c_hstart == 0),
681				  coef);
682
683	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
684
685	/* vertical chroma */
686	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
687				  (src_yres == dst_yres) && (ch->c_vstart == 0),
688				  coef);
689	if (program_5_taps)
690		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
691	else
692		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
693}
694
695static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
696				     int src_xres, int src_yres, int dst_xres,
697				     int dst_yres)
698{
699	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
700
701	/* horizontal RGB */
702	dcss_scaler_filter_design(src_xres, dst_xres, false,
703				  src_xres == dst_xres, coef);
704	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
705
706	/* vertical RGB */
707	dcss_scaler_filter_design(src_yres, dst_yres, false,
708				  src_yres == dst_yres, coef);
709	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
710}
711
712static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
713					const struct drm_format_info *format)
714{
715	u32 a2r10g10b10_format;
716
717	if (format->is_yuv)
718		return;
719
720	ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
721
722	if (format->depth != 30)
723		return;
724
725	switch (format->format) {
726	case DRM_FORMAT_ARGB2101010:
727	case DRM_FORMAT_XRGB2101010:
728		a2r10g10b10_format = 0;
729		break;
730
731	case DRM_FORMAT_ABGR2101010:
732	case DRM_FORMAT_XBGR2101010:
733		a2r10g10b10_format = 5;
734		break;
735
736	case DRM_FORMAT_RGBA1010102:
737	case DRM_FORMAT_RGBX1010102:
738		a2r10g10b10_format = 6;
739		break;
740
741	case DRM_FORMAT_BGRA1010102:
742	case DRM_FORMAT_BGRX1010102:
743		a2r10g10b10_format = 11;
744		break;
745
746	default:
747		a2r10g10b10_format = 0;
748		break;
749	}
750
751	ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
752}
753
754void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
755		       const struct drm_format_info *format,
756		       int src_xres, int src_yres, int dst_xres, int dst_yres,
757		       u32 vrefresh_hz)
758{
759	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
760	unsigned int pixel_depth = 0;
761	bool rtr_8line_en = false;
762	bool use_5_taps = false;
763	enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
764	enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
765	u32 pix_format = format->format;
766
767	if (format->is_yuv) {
768		dcss_scaler_yuv_enable(ch, true);
769
770		if (pix_format == DRM_FORMAT_NV12 ||
771		    pix_format == DRM_FORMAT_NV21) {
772			rtr_8line_en = true;
773			src_format = BUF_FMT_YUV420;
774		} else if (pix_format == DRM_FORMAT_UYVY ||
775			   pix_format == DRM_FORMAT_VYUY ||
776			   pix_format == DRM_FORMAT_YUYV ||
777			   pix_format == DRM_FORMAT_YVYU) {
778			src_format = BUF_FMT_YUV422;
779		}
780
781		use_5_taps = !rtr_8line_en;
782	} else {
783		dcss_scaler_yuv_enable(ch, false);
784
785		pixel_depth = format->depth;
786	}
787
788	dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
789				  dst_yres, src_format, dst_format,
790				  PSC_LOC_HORZ_0_VERT_1_OVER_4);
791
792	if (format->is_yuv)
793		dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
794					 use_5_taps, src_xres, src_yres,
795					 dst_xres, dst_yres);
796	else
797		dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
798					 dst_xres, dst_yres);
799
800	dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
801	dcss_scaler_bit_depth_set(ch, pixel_depth);
802	dcss_scaler_set_rgb10_order(ch, format);
803	dcss_scaler_format_set(ch, src_format, dst_format);
804	dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
805			    pix_format, dst_format);
806}
807
808/* This function will be called from interrupt context. */
809void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
810{
811	int chnum;
812
813	dcss_ctxld_assert_locked(scl->ctxld);
814
815	for (chnum = 0; chnum < 3; chnum++) {
816		struct dcss_scaler_ch *ch = &scl->ch[chnum];
817
818		if (ch->scaler_ctrl_chgd) {
819			dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
820						 ch->scaler_ctrl,
821						 ch->base_ofs +
822						 DCSS_SCALER_CTRL);
823			ch->scaler_ctrl_chgd = false;
824		}
825	}
826}
827