1/*
2 *  linux/drivers/video/kyro/STG4000OverlayDevice.c
3 *
4 *  Copyright (C) 2000 Imagination Technologies Ltd
5 *  Copyright (C) 2002 STMicroelectronics
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License.  See the file COPYING in the main directory of this archive
9 * for more details.
10 */
11
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/types.h>
15
16#include "STG4000Reg.h"
17#include "STG4000Interface.h"
18
19/* HW Defines */
20
21#define STG4000_NO_SCALING    0x800
22#define STG4000_NO_DECIMATION 0xFFFFFFFF
23
24/* Primary surface */
25#define STG4000_PRIM_NUM_PIX   5
26#define STG4000_PRIM_ALIGN     4
27#define STG4000_PRIM_ADDR_BITS 20
28
29#define STG4000_PRIM_MIN_WIDTH  640
30#define STG4000_PRIM_MAX_WIDTH  1600
31#define STG4000_PRIM_MIN_HEIGHT 480
32#define STG4000_PRIM_MAX_HEIGHT 1200
33
34/* Overlay surface */
35#define STG4000_OVRL_NUM_PIX   4
36#define STG4000_OVRL_ALIGN     2
37#define STG4000_OVRL_ADDR_BITS 20
38#define STG4000_OVRL_NUM_MODES 5
39
40#define STG4000_OVRL_MIN_WIDTH  0
41#define STG4000_OVRL_MAX_WIDTH  720
42#define STG4000_OVRL_MIN_HEIGHT 0
43#define STG4000_OVRL_MAX_HEIGHT 576
44
45/* Decimation and Scaling */
46static u32 adwDecim8[33] = {
47	    0xffffffff, 0xfffeffff, 0xffdffbff, 0xfefefeff, 0xfdf7efbf,
48	    0xfbdf7bdf, 0xf7bbddef, 0xeeeeeeef, 0xeeddbb77, 0xedb76db7,
49	    0xdb6db6db, 0xdb5b5b5b, 0xdab5ad6b, 0xd5ab55ab, 0xd555aaab,
50	    0xaaaaaaab, 0xaaaa5555, 0xaa952a55, 0xa94a5295, 0xa5252525,
51	    0xa4924925, 0x92491249, 0x91224489, 0x91111111, 0x90884211,
52	    0x88410821, 0x88102041, 0x81010101, 0x80800801, 0x80010001,
53	    0x80000001, 0x00000001, 0x00000000
54};
55
56typedef struct _OVRL_SRC_DEST {
57	/*clipped on-screen pixel position of overlay */
58	u32 ulDstX1;
59	u32 ulDstY1;
60	u32 ulDstX2;
61	u32 ulDstY2;
62
63	/*clipped pixel pos of source data within buffer thses need to be 128 bit word aligned */
64	u32 ulSrcX1;
65	u32 ulSrcY1;
66	u32 ulSrcX2;
67	u32 ulSrcY2;
68
69	/* on-screen pixel position of overlay */
70	s32 lDstX1;
71	s32 lDstY1;
72	s32 lDstX2;
73	s32 lDstY2;
74} OVRL_SRC_DEST;
75
76static u32 ovlWidth, ovlHeight, ovlStride;
77static int ovlLinear;
78
79void ResetOverlayRegisters(volatile STG4000REG __iomem *pSTGReg)
80{
81	u32 tmp;
82
83	/* Set Overlay address to default */
84	tmp = STG_READ_REG(DACOverlayAddr);
85	CLEAR_BITS_FRM_TO(0, 20);
86	CLEAR_BIT(31);
87	STG_WRITE_REG(DACOverlayAddr, tmp);
88
89	/* Set Overlay U address */
90	tmp = STG_READ_REG(DACOverlayUAddr);
91	CLEAR_BITS_FRM_TO(0, 20);
92	STG_WRITE_REG(DACOverlayUAddr, tmp);
93
94	/* Set Overlay V address */
95	tmp = STG_READ_REG(DACOverlayVAddr);
96	CLEAR_BITS_FRM_TO(0, 20);
97	STG_WRITE_REG(DACOverlayVAddr, tmp);
98
99	/* Set Overlay Size */
100	tmp = STG_READ_REG(DACOverlaySize);
101	CLEAR_BITS_FRM_TO(0, 10);
102	CLEAR_BITS_FRM_TO(12, 31);
103	STG_WRITE_REG(DACOverlaySize, tmp);
104
105	/* Set Overlay Vt Decimation */
106	tmp = STG4000_NO_DECIMATION;
107	STG_WRITE_REG(DACOverlayVtDec, tmp);
108
109	/* Set Overlay format to default value */
110	tmp = STG_READ_REG(DACPixelFormat);
111	CLEAR_BITS_FRM_TO(4, 7);
112	CLEAR_BITS_FRM_TO(16, 22);
113	STG_WRITE_REG(DACPixelFormat, tmp);
114
115	/* Set Vertical scaling to default */
116	tmp = STG_READ_REG(DACVerticalScal);
117	CLEAR_BITS_FRM_TO(0, 11);
118	CLEAR_BITS_FRM_TO(16, 22);
119	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
120	STG_WRITE_REG(DACVerticalScal, tmp);
121
122	/* Set Horizontal Scaling to default */
123	tmp = STG_READ_REG(DACHorizontalScal);
124	CLEAR_BITS_FRM_TO(0, 11);
125	CLEAR_BITS_FRM_TO(16, 17);
126	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
127	STG_WRITE_REG(DACHorizontalScal, tmp);
128
129	/* Set Blend mode to Alpha Blend */
130	/* ????? SG 08/11/2001 Surely this isn't the alpha blend mode,
131	   hopefully its overwrite
132	 */
133	tmp = STG_READ_REG(DACBlendCtrl);
134	CLEAR_BITS_FRM_TO(0, 30);
135	tmp = (GRAPHICS_MODE << 28);
136	STG_WRITE_REG(DACBlendCtrl, tmp);
137
138}
139
140int CreateOverlaySurface(volatile STG4000REG __iomem *pSTGReg,
141			 u32 inWidth,
142			 u32 inHeight,
143			 int bLinear,
144			 u32 ulOverlayOffset,
145			 u32 * retStride, u32 * retUVStride)
146{
147	u32 tmp;
148	u32 ulStride;
149
150	if (inWidth > STG4000_OVRL_MAX_WIDTH ||
151	    inHeight > STG4000_OVRL_MAX_HEIGHT) {
152		return -EINVAL;
153	}
154
155	/* Stride in 16 byte words - 16Bpp */
156	if (bLinear) {
157		/* Format is 16bits so num 16 byte words is width/8 */
158		if ((inWidth & 0x7) == 0) {	/* inWidth % 8 */
159			ulStride = (inWidth / 8);
160		} else {
161			/* Round up to next 16byte boundary */
162			ulStride = ((inWidth + 8) / 8);
163		}
164	} else {
165		/* Y component is 8bits so num 16 byte words is width/16 */
166		if ((inWidth & 0xf) == 0) {	/* inWidth % 16 */
167			ulStride = (inWidth / 16);
168		} else {
169			/* Round up to next 16byte boundary */
170			ulStride = ((inWidth + 16) / 16);
171		}
172	}
173
174
175	/* Set Overlay address and Format mode */
176	tmp = STG_READ_REG(DACOverlayAddr);
177	CLEAR_BITS_FRM_TO(0, 20);
178	if (bLinear) {
179		CLEAR_BIT(31);	/* Overlay format to Linear */
180	} else {
181		tmp |= SET_BIT(31);	/* Overlay format to Planer */
182	}
183
184	/* Only bits 24:4 of the Overlay address */
185	tmp |= (ulOverlayOffset >> 4);
186	STG_WRITE_REG(DACOverlayAddr, tmp);
187
188	if (!bLinear) {
189		u32 uvSize =
190		    (inWidth & 0x1) ? (inWidth + 1 / 2) : (inWidth / 2);
191		u32 uvStride;
192		u32 ulOffset;
193		/* Y component is 8bits so num 32 byte words is width/32 */
194		if ((uvSize & 0xf) == 0) {	/* inWidth % 16 */
195			uvStride = (uvSize / 16);
196		} else {
197			/* Round up to next 32byte boundary */
198			uvStride = ((uvSize + 16) / 16);
199		}
200
201		ulOffset = ulOverlayOffset + (inHeight * (ulStride * 16));
202		/* Align U,V data to 32byte boundary */
203		if ((ulOffset & 0x1f) != 0)
204			ulOffset = (ulOffset + 32L) & 0xffffffE0L;
205
206		tmp = STG_READ_REG(DACOverlayUAddr);
207		CLEAR_BITS_FRM_TO(0, 20);
208		tmp |= (ulOffset >> 4);
209		STG_WRITE_REG(DACOverlayUAddr, tmp);
210
211		ulOffset += (inHeight / 2) * (uvStride * 16);
212		/* Align U,V data to 32byte boundary */
213		if ((ulOffset & 0x1f) != 0)
214			ulOffset = (ulOffset + 32L) & 0xffffffE0L;
215
216		tmp = STG_READ_REG(DACOverlayVAddr);
217		CLEAR_BITS_FRM_TO(0, 20);
218		tmp |= (ulOffset >> 4);
219		STG_WRITE_REG(DACOverlayVAddr, tmp);
220
221		*retUVStride = uvStride * 16;
222	}
223
224
225	/* Set Overlay YUV pixel format
226	 * Make sure that LUT not used - ??????
227	 */
228	tmp = STG_READ_REG(DACPixelFormat);
229	/* Only support Planer or UYVY linear formats */
230	CLEAR_BITS_FRM_TO(4, 9);
231	STG_WRITE_REG(DACPixelFormat, tmp);
232
233	ovlWidth = inWidth;
234	ovlHeight = inHeight;
235	ovlStride = ulStride;
236	ovlLinear = bLinear;
237	*retStride = ulStride << 4;	/* In bytes */
238
239	return 0;
240}
241
242int SetOverlayBlendMode(volatile STG4000REG __iomem *pSTGReg,
243			OVRL_BLEND_MODE mode,
244			u32 ulAlpha, u32 ulColorKey)
245{
246	u32 tmp;
247
248	tmp = STG_READ_REG(DACBlendCtrl);
249	CLEAR_BITS_FRM_TO(28, 30);
250	tmp |= (mode << 28);
251
252	switch (mode) {
253	case COLOR_KEY:
254		CLEAR_BITS_FRM_TO(0, 23);
255		tmp |= (ulColorKey & 0x00FFFFFF);
256		break;
257
258	case GLOBAL_ALPHA:
259		CLEAR_BITS_FRM_TO(24, 27);
260		tmp |= ((ulAlpha & 0xF) << 24);
261		break;
262
263	case CK_PIXEL_ALPHA:
264		CLEAR_BITS_FRM_TO(0, 23);
265		tmp |= (ulColorKey & 0x00FFFFFF);
266		break;
267
268	case CK_GLOBAL_ALPHA:
269		CLEAR_BITS_FRM_TO(0, 23);
270		tmp |= (ulColorKey & 0x00FFFFFF);
271		CLEAR_BITS_FRM_TO(24, 27);
272		tmp |= ((ulAlpha & 0xF) << 24);
273		break;
274
275	case GRAPHICS_MODE:
276	case PER_PIXEL_ALPHA:
277		break;
278
279	default:
280		return -EINVAL;
281	}
282
283	STG_WRITE_REG(DACBlendCtrl, tmp);
284
285	return 0;
286}
287
288void EnableOverlayPlane(volatile STG4000REG __iomem *pSTGReg)
289{
290	u32 tmp;
291	/* Enable Overlay */
292	tmp = STG_READ_REG(DACPixelFormat);
293	tmp |= SET_BIT(7);
294	STG_WRITE_REG(DACPixelFormat, tmp);
295
296	/* Set video stream control */
297	tmp = STG_READ_REG(DACStreamCtrl);
298	tmp |= SET_BIT(1);	/* video stream */
299	STG_WRITE_REG(DACStreamCtrl, tmp);
300}
301
302static u32 Overlap(u32 ulBits, u32 ulPattern)
303{
304	u32 ulCount = 0;
305
306	while (ulBits) {
307		if (!(ulPattern & 1))
308			ulCount++;
309		ulBits--;
310		ulPattern = ulPattern >> 1;
311	}
312
313	return ulCount;
314
315}
316
317int SetOverlayViewPort(volatile STG4000REG __iomem *pSTGReg,
318		       u32 left, u32 top,
319		       u32 right, u32 bottom)
320{
321	OVRL_SRC_DEST srcDest;
322
323	u32 ulSrcTop, ulSrcBottom;
324	u32 ulSrc, ulDest;
325	u32 ulFxScale, ulFxOffset;
326	u32 ulHeight, ulWidth;
327	u32 ulPattern;
328	u32 ulDecimate, ulDecimated;
329	u32 ulApplied;
330	u32 ulDacXScale, ulDacYScale;
331	u32 ulScale;
332	u32 ulLeft, ulRight;
333	u32 ulSrcLeft, ulSrcRight;
334	u32 ulScaleLeft;
335	u32 ulhDecim;
336	u32 ulsVal;
337	u32 ulVertDecFactor;
338	int bResult;
339	u32 ulClipOff = 0;
340	u32 ulBits = 0;
341	u32 ulsAdd = 0;
342	u32 tmp, ulStride;
343	u32 ulExcessPixels, ulClip, ulExtraLines;
344
345
346	srcDest.ulSrcX1 = 0;
347	srcDest.ulSrcY1 = 0;
348	srcDest.ulSrcX2 = ovlWidth - 1;
349	srcDest.ulSrcY2 = ovlHeight - 1;
350
351	srcDest.ulDstX1 = left;
352	srcDest.ulDstY1 = top;
353	srcDest.ulDstX2 = right;
354	srcDest.ulDstY2 = bottom;
355
356	srcDest.lDstX1 = srcDest.ulDstX1;
357	srcDest.lDstY1 = srcDest.ulDstY1;
358	srcDest.lDstX2 = srcDest.ulDstX2;
359	srcDest.lDstY2 = srcDest.ulDstY2;
360
361    /************* Vertical decimation/scaling ******************/
362
363	/* Get Src Top and Bottom */
364	ulSrcTop = srcDest.ulSrcY1;
365	ulSrcBottom = srcDest.ulSrcY2;
366
367	ulSrc = ulSrcBottom - ulSrcTop;
368	ulDest = srcDest.lDstY2 - srcDest.lDstY1;	/* on-screen overlay */
369
370	if (ulSrc <= 1)
371		return -EINVAL;
372
373	/* First work out the position we are to display as offset from the
374	 * source of the buffer
375	 */
376	ulFxScale = (ulDest << 11) / ulSrc;	/* fixed point scale factor */
377	ulFxOffset = (srcDest.lDstY2 - srcDest.ulDstY2) << 11;
378
379	ulSrcBottom = ulSrcBottom - (ulFxOffset / ulFxScale);
380	ulSrc = ulSrcBottom - ulSrcTop;
381	ulHeight = ulSrc;
382
383	ulDest = srcDest.ulDstY2 - (srcDest.ulDstY1 - 1);
384	ulPattern = adwDecim8[ulBits];
385
386	/* At this point ulSrc represents the input decimator */
387	if (ulSrc > ulDest) {
388		ulDecimate = ulSrc - ulDest;
389		ulBits = 0;
390		ulApplied = ulSrc / 32;
391
392		while (((ulBits * ulApplied) +
393			Overlap((ulSrc % 32),
394				adwDecim8[ulBits])) < ulDecimate)
395			ulBits++;
396
397		ulPattern = adwDecim8[ulBits];
398		ulDecimated =
399		    (ulBits * ulApplied) + Overlap((ulSrc % 32),
400						   ulPattern);
401		ulSrc = ulSrc - ulDecimated;	/* the number number of lines that will go into the scaler */
402	}
403
404	if (ulBits && (ulBits != 32)) {
405		ulVertDecFactor = (63 - ulBits) / (32 - ulBits);	/* vertical decimation factor scaled up to nearest integer */
406	} else {
407		ulVertDecFactor = 1;
408	}
409
410	ulDacYScale = ((ulSrc - 1) * 2048) / (ulDest + 1);
411
412	tmp = STG_READ_REG(DACOverlayVtDec);	/* Decimation */
413	CLEAR_BITS_FRM_TO(0, 31);
414	tmp = ulPattern;
415	STG_WRITE_REG(DACOverlayVtDec, tmp);
416
417	/***************** Horizontal decimation/scaling ***************************/
418
419	/*
420	 * Now we handle the horizontal case, this is a simplified version of
421	 * the vertical case in that we decimate by factors of 2.  as we are
422	 * working in words we should always be able to decimate by these
423	 * factors.  as we always have to have a buffer which is aligned to a
424	 * whole number of 128 bit words, we must align the left side to the
425	 * lowest to the next lowest 128 bit boundary, and the right hand edge
426	 * to the next largets boundary, (in a similar way to how we didi it in
427	 * PMX1) as the left and right hand edges are aligned to these
428	 * boundaries normally this only becomes an issue when we are chopping
429	 * of one of the sides We shall work out vertical stuff first
430	 */
431	ulSrc = srcDest.ulSrcX2 - srcDest.ulSrcX1;
432	ulDest = srcDest.lDstX2 - srcDest.lDstX1;
433#ifdef _OLDCODE
434	ulLeft = srcDest.ulDstX1;
435	ulRight = srcDest.ulDstX2;
436#else
437	if (srcDest.ulDstX1 > 2) {
438		ulLeft = srcDest.ulDstX1 + 2;
439		ulRight = srcDest.ulDstX2 + 1;
440	} else {
441		ulLeft = srcDest.ulDstX1;
442		ulRight = srcDest.ulDstX2 + 1;
443	}
444#endif
445	/* first work out the position we are to display as offset from the source of the buffer */
446	bResult = 1;
447
448	do {
449		if (ulDest == 0)
450			return -EINVAL;
451
452		/* source pixels per dest pixel <<11 */
453		ulFxScale = ((ulSrc - 1) << 11) / (ulDest);
454
455		/* then number of destination pixels out we are */
456		ulFxOffset = ulFxScale * ((srcDest.ulDstX1 - srcDest.lDstX1) + ulClipOff);
457		ulFxOffset >>= 11;
458
459		/* this replaces the code which was making a decision as to use either ulFxOffset or ulSrcX1 */
460		ulSrcLeft = srcDest.ulSrcX1 + ulFxOffset;
461
462		/* then number of destination pixels out we are */
463		ulFxOffset = ulFxScale * (srcDest.lDstX2 - srcDest.ulDstX2);
464		ulFxOffset >>= 11;
465
466		ulSrcRight = srcDest.ulSrcX2 - ulFxOffset;
467
468		/*
469		 * we must align these to our 128 bit boundaries. we shall
470		 * round down the pixel pos to the nearest 8 pixels.
471		 */
472		ulScaleLeft = ulSrcLeft;
473
474		/* shift fxscale until it is in the range of the scaler */
475		ulhDecim = 0;
476		ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);
477
478		while (ulScale > 0x800) {
479			ulhDecim++;
480			ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);
481		}
482
483		/*
484		 * to try and get the best values We first try and use
485		 * src/dwdest for the scale factor, then we move onto src-1
486		 *
487		 * we want to check to see if we will need to clip data, if so
488		 * then we should clip our source so that we don't need to
489		 */
490		if (!ovlLinear) {
491			ulSrcLeft &= ~0x1f;
492
493			/*
494			 * we must align the right hand edge to the next 32
495			 * pixel` boundary, must be on a 256 boundary so u, and
496			 * v are 128 bit aligned
497			 */
498			ulSrcRight = (ulSrcRight + 0x1f) & ~0x1f;
499		} else {
500			ulSrcLeft &= ~0x7;
501
502			/*
503			 * we must align the right hand edge to the next
504			 * 8pixel` boundary
505			 */
506			ulSrcRight = (ulSrcRight + 0x7) & ~0x7;
507		}
508
509		/* this is the input size line store needs to cope with */
510		ulWidth = ulSrcRight - ulSrcLeft;
511
512		/*
513		 * use unclipped value to work out scale factror this is the
514		 * scale factor we want we shall now work out the horizonal
515		 * decimation and scaling
516		 */
517		ulsVal = ((ulWidth / 8) >> ulhDecim);
518
519		if ((ulWidth != (ulsVal << ulhDecim) * 8))
520			ulsAdd = 1;
521
522		/* input pixels to scaler; */
523		ulSrc = ulWidth >> ulhDecim;
524
525		if (ulSrc <= 2)
526			return -EINVAL;
527
528		ulExcessPixels = ((((ulScaleLeft - ulSrcLeft)) << (11 - ulhDecim)) / ulScale);
529
530		ulClip = (ulSrc << 11) / ulScale;
531		ulClip -= (ulRight - ulLeft);
532		ulClip += ulExcessPixels;
533
534		if (ulClip)
535			ulClip--;
536
537		/* We may need to do more here if we really have a HW rev < 5 */
538	} while (!bResult);
539
540	ulExtraLines = (1 << ulhDecim) * ulVertDecFactor;
541	ulExtraLines += 64;
542	ulHeight += ulExtraLines;
543
544	ulDacXScale = ulScale;
545
546
547	tmp = STG_READ_REG(DACVerticalScal);
548	CLEAR_BITS_FRM_TO(0, 11);
549	CLEAR_BITS_FRM_TO(16, 22);	/* Vertical Scaling */
550
551	/* Calculate new output line stride, this is always the number of 422
552	   words in the line buffer, so it doesn't matter if the
553	   mode is 420. Then set the vertical scale register.
554	 */
555	ulStride = (ulWidth >> (ulhDecim + 3)) + ulsAdd;
556	tmp |= ((ulStride << 16) | (ulDacYScale));	/* DAC_LS_CTRL = stride */
557	STG_WRITE_REG(DACVerticalScal, tmp);
558
559	/* Now set up the overlay size using the modified width and height
560	   from decimate and scaling calculations
561	 */
562	tmp = STG_READ_REG(DACOverlaySize);
563	CLEAR_BITS_FRM_TO(0, 10);
564	CLEAR_BITS_FRM_TO(12, 31);
565
566	if (ovlLinear) {
567		tmp |=
568		    (ovlStride | ((ulHeight + 1) << 12) |
569		     (((ulWidth / 8) - 1) << 23));
570	} else {
571		tmp |=
572		    (ovlStride | ((ulHeight + 1) << 12) |
573		     (((ulWidth / 32) - 1) << 23));
574	}
575
576	STG_WRITE_REG(DACOverlaySize, tmp);
577
578	/* Set Video Window Start */
579	tmp = ((ulLeft << 16)) | (srcDest.ulDstY1);
580	STG_WRITE_REG(DACVidWinStart, tmp);
581
582	/* Set Video Window End */
583	tmp = ((ulRight) << 16) | (srcDest.ulDstY2);
584	STG_WRITE_REG(DACVidWinEnd, tmp);
585
586	/* Finally set up the rest of the overlay regs in the order
587	   done in the IMG driver
588	 */
589	tmp = STG_READ_REG(DACPixelFormat);
590	tmp = ((ulExcessPixels << 16) | tmp) & 0x7fffffff;
591	STG_WRITE_REG(DACPixelFormat, tmp);
592
593	tmp = STG_READ_REG(DACHorizontalScal);
594	CLEAR_BITS_FRM_TO(0, 11);
595	CLEAR_BITS_FRM_TO(16, 17);
596	tmp |= ((ulhDecim << 16) | (ulDacXScale));
597	STG_WRITE_REG(DACHorizontalScal, tmp);
598
599	return 0;
600}
601