162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci#ifndef _VIDEO_ATAFB_UTILS_H
362306a36Sopenharmony_ci#define _VIDEO_ATAFB_UTILS_H
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci/* ================================================================= */
662306a36Sopenharmony_ci/*                      Utility Assembler Functions                  */
762306a36Sopenharmony_ci/* ================================================================= */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/* ====================================================================== */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci/* Those of a delicate disposition might like to skip the next couple of
1262306a36Sopenharmony_ci * pages.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * These functions are drop in replacements for memmove and
1562306a36Sopenharmony_ci * memset(_, 0, _). However their five instances add at least a kilobyte
1662306a36Sopenharmony_ci * to the object file. You have been warned.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * Not a great fan of assembler for the sake of it, but I think
1962306a36Sopenharmony_ci * that these routines are at least 10 times faster than their C
2062306a36Sopenharmony_ci * equivalents for large blits, and that's important to the lowest level of
2162306a36Sopenharmony_ci * a graphics driver. Question is whether some scheme with the blitter
2262306a36Sopenharmony_ci * would be faster. I suspect not for simple text system - not much
2362306a36Sopenharmony_ci * asynchrony.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci * Code is very simple, just gruesome expansion. Basic strategy is to
2662306a36Sopenharmony_ci * increase data moved/cleared at each step to 16 bytes to reduce
2762306a36Sopenharmony_ci * instruction per data move overhead. movem might be faster still
2862306a36Sopenharmony_ci * For more than 15 bytes, we try to align the write direction on a
2962306a36Sopenharmony_ci * longword boundary to get maximum speed. This is even more gruesome.
3062306a36Sopenharmony_ci * Unaligned read/write used requires 68020+ - think this is a problem?
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * Sorry!
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/* ++roman: I've optimized Robert's original versions in some minor
3762306a36Sopenharmony_ci * aspects, e.g. moveq instead of movel, let gcc choose the registers,
3862306a36Sopenharmony_ci * use movem in some places...
3962306a36Sopenharmony_ci * For other modes than 1 plane, lots of more such assembler functions
4062306a36Sopenharmony_ci * were needed (e.g. the ones using movep or expanding color values).
4162306a36Sopenharmony_ci */
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci/* ++andreas: more optimizations:
4462306a36Sopenharmony_ci   subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc
4562306a36Sopenharmony_ci   addal is faster than addaw
4662306a36Sopenharmony_ci   movep is rather expensive compared to ordinary move's
4762306a36Sopenharmony_ci   some functions rewritten in C for clarity, no speed loss */
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistatic inline void *fb_memclear_small(void *s, size_t count)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	if (!count)
5262306a36Sopenharmony_ci		return 0;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	asm volatile ("\n"
5562306a36Sopenharmony_ci		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n"
5662306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n"
5762306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n"
5862306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"
5962306a36Sopenharmony_ci		"1:"
6062306a36Sopenharmony_ci		: "=a" (s), "=d" (count)
6162306a36Sopenharmony_ci		: "d" (0), "0" ((char *)s + count), "1" (count));
6262306a36Sopenharmony_ci	asm volatile ("\n"
6362306a36Sopenharmony_ci		"	subq.l  #1,%1\n"
6462306a36Sopenharmony_ci		"	jcs	3f\n"
6562306a36Sopenharmony_ci		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"
6662306a36Sopenharmony_ci		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n"
6762306a36Sopenharmony_ci		"	dbra	%1,2b\n"
6862306a36Sopenharmony_ci		"3:"
6962306a36Sopenharmony_ci		: "=a" (s), "=d" (count)
7062306a36Sopenharmony_ci		: "d" (0), "0" (s), "1" (count)
7162306a36Sopenharmony_ci		: "d4", "d5", "d6"
7262306a36Sopenharmony_ci		);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	return 0;
7562306a36Sopenharmony_ci}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cistatic inline void *fb_memclear(void *s, size_t count)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	if (!count)
8162306a36Sopenharmony_ci		return 0;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (count < 16) {
8462306a36Sopenharmony_ci		asm volatile ("\n"
8562306a36Sopenharmony_ci			"	lsr.l	#1,%1 ; jcc 1f ; clr.b (%0)+\n"
8662306a36Sopenharmony_ci			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.w (%0)+\n"
8762306a36Sopenharmony_ci			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+\n"
8862306a36Sopenharmony_ci			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n"
8962306a36Sopenharmony_ci			"1:"
9062306a36Sopenharmony_ci			: "=a" (s), "=d" (count)
9162306a36Sopenharmony_ci			: "0" (s), "1" (count));
9262306a36Sopenharmony_ci	} else {
9362306a36Sopenharmony_ci		long tmp;
9462306a36Sopenharmony_ci		asm volatile ("\n"
9562306a36Sopenharmony_ci			"	move.l	%1,%2\n"
9662306a36Sopenharmony_ci			"	lsr.l	#1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n"
9762306a36Sopenharmony_ci			"	lsr.l	#1,%2 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
9862306a36Sopenharmony_ci			"	clr.w	(%0)+  ; subq.w  #2,%1 ; jra 2f\n"
9962306a36Sopenharmony_ci			"1:	lsr.l	#1,%2 ; jcc 2f\n"
10062306a36Sopenharmony_ci			"	clr.w	(%0)+  ; subq.w  #2,%1\n"
10162306a36Sopenharmony_ci			"2:	move.w	%1,%2; lsr.l #2,%1 ; jeq 6f\n"
10262306a36Sopenharmony_ci			"	lsr.l	#1,%1 ; jcc 3f ; clr.l (%0)+\n"
10362306a36Sopenharmony_ci			"3:	lsr.l	#1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n"
10462306a36Sopenharmony_ci			"4:	subq.l	#1,%1 ; jcs 6f\n"
10562306a36Sopenharmony_ci			"5:	clr.l	(%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n"
10662306a36Sopenharmony_ci			"	dbra	%1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n"
10762306a36Sopenharmony_ci			"6:	move.w	%2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n"
10862306a36Sopenharmony_ci			"7:	btst	#0,%1 ; jeq 8f ; clr.b (%0)+\n"
10962306a36Sopenharmony_ci			"8:"
11062306a36Sopenharmony_ci			: "=a" (s), "=d" (count), "=d" (tmp)
11162306a36Sopenharmony_ci			: "0" (s), "1" (count));
11262306a36Sopenharmony_ci	}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	return 0;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_cistatic inline void *fb_memset255(void *s, size_t count)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	if (!count)
12162306a36Sopenharmony_ci		return 0;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	asm volatile ("\n"
12462306a36Sopenharmony_ci		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n"
12562306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n"
12662306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n"
12762306a36Sopenharmony_ci		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"
12862306a36Sopenharmony_ci		"1:"
12962306a36Sopenharmony_ci		: "=a" (s), "=d" (count)
13062306a36Sopenharmony_ci		: "d" (-1), "0" ((char *)s+count), "1" (count));
13162306a36Sopenharmony_ci	asm volatile ("\n"
13262306a36Sopenharmony_ci		"	subq.l	#1,%1 ; jcs 3f\n"
13362306a36Sopenharmony_ci		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"
13462306a36Sopenharmony_ci		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n"
13562306a36Sopenharmony_ci		"	dbra	%1,2b\n"
13662306a36Sopenharmony_ci		"3:"
13762306a36Sopenharmony_ci		: "=a" (s), "=d" (count)
13862306a36Sopenharmony_ci		: "d" (-1), "0" (s), "1" (count)
13962306a36Sopenharmony_ci		: "d4", "d5", "d6");
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	return 0;
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_cistatic inline void *fb_memmove(void *d, const void *s, size_t count)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	if (d < s) {
14862306a36Sopenharmony_ci		if (count < 16) {
14962306a36Sopenharmony_ci			asm volatile ("\n"
15062306a36Sopenharmony_ci				"	lsr.l	#1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n"
15162306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n"
15262306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n"
15362306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"
15462306a36Sopenharmony_ci				"1:"
15562306a36Sopenharmony_ci				: "=a" (d), "=a" (s), "=d" (count)
15662306a36Sopenharmony_ci				: "0" (d), "1" (s), "2" (count));
15762306a36Sopenharmony_ci		} else {
15862306a36Sopenharmony_ci			long tmp;
15962306a36Sopenharmony_ci			asm volatile ("\n"
16062306a36Sopenharmony_ci				"	move.l	%0,%3\n"
16162306a36Sopenharmony_ci				"	lsr.l	#1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n"
16262306a36Sopenharmony_ci				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
16362306a36Sopenharmony_ci				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2 ; jra 2f\n"
16462306a36Sopenharmony_ci				"1:	lsr.l   #1,%3 ; jcc 2f\n"
16562306a36Sopenharmony_ci				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2\n"
16662306a36Sopenharmony_ci				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n"
16762306a36Sopenharmony_ci				"	lsr.l	#1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n"
16862306a36Sopenharmony_ci				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"
16962306a36Sopenharmony_ci				"4:	subq.l	#1,%2 ; jcs 6f\n"
17062306a36Sopenharmony_ci				"5:	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n"
17162306a36Sopenharmony_ci				"	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n"
17262306a36Sopenharmony_ci				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"
17362306a36Sopenharmony_ci				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n"
17462306a36Sopenharmony_ci				"7:	btst	#0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n"
17562306a36Sopenharmony_ci				"8:"
17662306a36Sopenharmony_ci				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
17762306a36Sopenharmony_ci				: "0" (d), "1" (s), "2" (count));
17862306a36Sopenharmony_ci		}
17962306a36Sopenharmony_ci	} else {
18062306a36Sopenharmony_ci		if (count < 16) {
18162306a36Sopenharmony_ci			asm volatile ("\n"
18262306a36Sopenharmony_ci				"	lsr.l	#1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n"
18362306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n"
18462306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n"
18562306a36Sopenharmony_ci				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"
18662306a36Sopenharmony_ci				"1:"
18762306a36Sopenharmony_ci				: "=a" (d), "=a" (s), "=d" (count)
18862306a36Sopenharmony_ci				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count));
18962306a36Sopenharmony_ci		} else {
19062306a36Sopenharmony_ci			long tmp;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci			asm volatile ("\n"
19362306a36Sopenharmony_ci				"	move.l	%0,%3\n"
19462306a36Sopenharmony_ci				"	lsr.l	#1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n"
19562306a36Sopenharmony_ci				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
19662306a36Sopenharmony_ci				"	move.w	-(%1),-(%0) ; subqw  #2,%2 ; jra 2f\n"
19762306a36Sopenharmony_ci				"1:	lsr.l	#1,%3 ; jcc 2f\n"
19862306a36Sopenharmony_ci				"	move.w	-(%1),-(%0) ; subqw  #2,%2\n"
19962306a36Sopenharmony_ci				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n"
20062306a36Sopenharmony_ci				"	lsr.l	#1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n"
20162306a36Sopenharmony_ci				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"
20262306a36Sopenharmony_ci				"4:	subq.l	#1,%2 ; jcs 6f\n"
20362306a36Sopenharmony_ci				"5:	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n"
20462306a36Sopenharmony_ci				"	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n"
20562306a36Sopenharmony_ci				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"
20662306a36Sopenharmony_ci				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n"
20762306a36Sopenharmony_ci				"7:	btst	#0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n"
20862306a36Sopenharmony_ci				"8:"
20962306a36Sopenharmony_ci				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
21062306a36Sopenharmony_ci				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count));
21162306a36Sopenharmony_ci		}
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	return 0;
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci/* ++andreas: Simple and fast version of memmove, assumes size is
21962306a36Sopenharmony_ci   divisible by 16, suitable for moving the whole screen bitplane */
22062306a36Sopenharmony_cistatic inline void fast_memmove(char *dst, const char *src, size_t size)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	if (!size)
22362306a36Sopenharmony_ci		return;
22462306a36Sopenharmony_ci	if (dst < src)
22562306a36Sopenharmony_ci		asm volatile ("\n"
22662306a36Sopenharmony_ci			"1:	movem.l	(%0)+,%%d0/%%d1/%%a0/%%a1\n"
22762306a36Sopenharmony_ci			"	movem.l	%%d0/%%d1/%%a0/%%a1,%1@\n"
22862306a36Sopenharmony_ci			"	addq.l	#8,%1; addq.l #8,%1\n"
22962306a36Sopenharmony_ci			"	dbra	%2,1b\n"
23062306a36Sopenharmony_ci			"	clr.w	%2; subq.l #1,%2\n"
23162306a36Sopenharmony_ci			"	jcc	1b"
23262306a36Sopenharmony_ci			: "=a" (src), "=a" (dst), "=d" (size)
23362306a36Sopenharmony_ci			: "0" (src), "1" (dst), "2" (size / 16 - 1)
23462306a36Sopenharmony_ci			: "d0", "d1", "a0", "a1", "memory");
23562306a36Sopenharmony_ci	else
23662306a36Sopenharmony_ci		asm volatile ("\n"
23762306a36Sopenharmony_ci			"1:	subq.l	#8,%0; subq.l #8,%0\n"
23862306a36Sopenharmony_ci			"	movem.l	%0@,%%d0/%%d1/%%a0/%%a1\n"
23962306a36Sopenharmony_ci			"	movem.l	%%d0/%%d1/%%a0/%%a1,-(%1)\n"
24062306a36Sopenharmony_ci			"	dbra	%2,1b\n"
24162306a36Sopenharmony_ci			"	clr.w	%2; subq.l #1,%2\n"
24262306a36Sopenharmony_ci			"	jcc 1b"
24362306a36Sopenharmony_ci			: "=a" (src), "=a" (dst), "=d" (size)
24462306a36Sopenharmony_ci			: "0" (src + size), "1" (dst + size), "2" (size / 16 - 1)
24562306a36Sopenharmony_ci			: "d0", "d1", "a0", "a1", "memory");
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci#ifdef BPL
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci/*
25162306a36Sopenharmony_ci * This expands a up to 8 bit color into two longs
25262306a36Sopenharmony_ci * for movel operations.
25362306a36Sopenharmony_ci */
25462306a36Sopenharmony_cistatic const u32 four2long[] = {
25562306a36Sopenharmony_ci	0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
25662306a36Sopenharmony_ci	0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
25762306a36Sopenharmony_ci	0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
25862306a36Sopenharmony_ci	0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
25962306a36Sopenharmony_ci};
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_cistatic inline void expand8_col2mask(u8 c, u32 m[])
26262306a36Sopenharmony_ci{
26362306a36Sopenharmony_ci	m[0] = four2long[c & 15];
26462306a36Sopenharmony_ci#if BPL > 4
26562306a36Sopenharmony_ci	m[1] = four2long[c >> 4];
26662306a36Sopenharmony_ci#endif
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]);
27262306a36Sopenharmony_ci#if BPL > 4
27362306a36Sopenharmony_ci	fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]);
27462306a36Sopenharmony_ci#endif
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci/*
27862306a36Sopenharmony_ci * set an 8bit value to a color
27962306a36Sopenharmony_ci */
28062306a36Sopenharmony_cistatic inline void fill8_col(u8 *dst, u32 m[])
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	u32 tmp = m[0];
28362306a36Sopenharmony_ci	dst[0] = tmp;
28462306a36Sopenharmony_ci	dst[2] = (tmp >>= 8);
28562306a36Sopenharmony_ci#if BPL > 2
28662306a36Sopenharmony_ci	dst[4] = (tmp >>= 8);
28762306a36Sopenharmony_ci	dst[6] = tmp >> 8;
28862306a36Sopenharmony_ci#endif
28962306a36Sopenharmony_ci#if BPL > 4
29062306a36Sopenharmony_ci	tmp = m[1];
29162306a36Sopenharmony_ci	dst[8] = tmp;
29262306a36Sopenharmony_ci	dst[10] = (tmp >>= 8);
29362306a36Sopenharmony_ci	dst[12] = (tmp >>= 8);
29462306a36Sopenharmony_ci	dst[14] = tmp >> 8;
29562306a36Sopenharmony_ci#endif
29662306a36Sopenharmony_ci}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci/*
29962306a36Sopenharmony_ci * set an 8bit value according to foreground/background color
30062306a36Sopenharmony_ci */
30162306a36Sopenharmony_cistatic inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask)
30262306a36Sopenharmony_ci{
30362306a36Sopenharmony_ci	u32 fgm[2], bgm[2], tmp;
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	expand8_2col2mask(fg, bg, fgm, bgm);
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	mask |= mask << 8;
30862306a36Sopenharmony_ci#if BPL > 2
30962306a36Sopenharmony_ci	mask |= mask << 16;
31062306a36Sopenharmony_ci#endif
31162306a36Sopenharmony_ci	tmp = (mask & fgm[0]) ^ bgm[0];
31262306a36Sopenharmony_ci	dst[0] = tmp;
31362306a36Sopenharmony_ci	dst[2] = (tmp >>= 8);
31462306a36Sopenharmony_ci#if BPL > 2
31562306a36Sopenharmony_ci	dst[4] = (tmp >>= 8);
31662306a36Sopenharmony_ci	dst[6] = tmp >> 8;
31762306a36Sopenharmony_ci#endif
31862306a36Sopenharmony_ci#if BPL > 4
31962306a36Sopenharmony_ci	tmp = (mask & fgm[1]) ^ bgm[1];
32062306a36Sopenharmony_ci	dst[8] = tmp;
32162306a36Sopenharmony_ci	dst[10] = (tmp >>= 8);
32262306a36Sopenharmony_ci	dst[12] = (tmp >>= 8);
32362306a36Sopenharmony_ci	dst[14] = tmp >> 8;
32462306a36Sopenharmony_ci#endif
32562306a36Sopenharmony_ci}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_cistatic const u32 two2word[] = {
32862306a36Sopenharmony_ci	0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
32962306a36Sopenharmony_ci};
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_cistatic inline void expand16_col2mask(u8 c, u32 m[])
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	m[0] = two2word[c & 3];
33462306a36Sopenharmony_ci#if BPL > 2
33562306a36Sopenharmony_ci	m[1] = two2word[(c >> 2) & 3];
33662306a36Sopenharmony_ci#endif
33762306a36Sopenharmony_ci#if BPL > 4
33862306a36Sopenharmony_ci	m[2] = two2word[(c >> 4) & 3];
33962306a36Sopenharmony_ci	m[3] = two2word[c >> 6];
34062306a36Sopenharmony_ci#endif
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_cistatic inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	bgm[0] = two2word[bg & 3];
34662306a36Sopenharmony_ci	fgm[0] = two2word[fg & 3] ^ bgm[0];
34762306a36Sopenharmony_ci#if BPL > 2
34862306a36Sopenharmony_ci	bgm[1] = two2word[(bg >> 2) & 3];
34962306a36Sopenharmony_ci	fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1];
35062306a36Sopenharmony_ci#endif
35162306a36Sopenharmony_ci#if BPL > 4
35262306a36Sopenharmony_ci	bgm[2] = two2word[(bg >> 4) & 3];
35362306a36Sopenharmony_ci	fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2];
35462306a36Sopenharmony_ci	bgm[3] = two2word[bg >> 6];
35562306a36Sopenharmony_ci	fgm[3] = two2word[fg >> 6] ^ bgm[3];
35662306a36Sopenharmony_ci#endif
35762306a36Sopenharmony_ci}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_cistatic inline u32 *fill16_col(u32 *dst, int rows, u32 m[])
36062306a36Sopenharmony_ci{
36162306a36Sopenharmony_ci	while (rows) {
36262306a36Sopenharmony_ci		*dst++ = m[0];
36362306a36Sopenharmony_ci#if BPL > 2
36462306a36Sopenharmony_ci		*dst++ = m[1];
36562306a36Sopenharmony_ci#endif
36662306a36Sopenharmony_ci#if BPL > 4
36762306a36Sopenharmony_ci		*dst++ = m[2];
36862306a36Sopenharmony_ci		*dst++ = m[3];
36962306a36Sopenharmony_ci#endif
37062306a36Sopenharmony_ci		rows--;
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci	return dst;
37362306a36Sopenharmony_ci}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_cistatic inline void memmove32_col(void *dst, void *src, u32 mask, u32 h, u32 bytes)
37662306a36Sopenharmony_ci{
37762306a36Sopenharmony_ci	u32 *s, *d, v;
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci        s = src;
38062306a36Sopenharmony_ci        d = dst;
38162306a36Sopenharmony_ci        do {
38262306a36Sopenharmony_ci                v = (*s++ & mask) | (*d  & ~mask);
38362306a36Sopenharmony_ci                *d++ = v;
38462306a36Sopenharmony_ci#if BPL > 2
38562306a36Sopenharmony_ci                v = (*s++ & mask) | (*d  & ~mask);
38662306a36Sopenharmony_ci                *d++ = v;
38762306a36Sopenharmony_ci#endif
38862306a36Sopenharmony_ci#if BPL > 4
38962306a36Sopenharmony_ci                v = (*s++ & mask) | (*d  & ~mask);
39062306a36Sopenharmony_ci                *d++ = v;
39162306a36Sopenharmony_ci                v = (*s++ & mask) | (*d  & ~mask);
39262306a36Sopenharmony_ci                *d++ = v;
39362306a36Sopenharmony_ci#endif
39462306a36Sopenharmony_ci                d = (u32 *)((u8 *)d + bytes);
39562306a36Sopenharmony_ci                s = (u32 *)((u8 *)s + bytes);
39662306a36Sopenharmony_ci        } while (--h);
39762306a36Sopenharmony_ci}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci#endif
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci#endif /* _VIDEO_ATAFB_UTILS_H */
402