162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci#ifndef _VIDEO_ATAFB_UTILS_H 362306a36Sopenharmony_ci#define _VIDEO_ATAFB_UTILS_H 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci/* ================================================================= */ 662306a36Sopenharmony_ci/* Utility Assembler Functions */ 762306a36Sopenharmony_ci/* ================================================================= */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci/* ====================================================================== */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci/* Those of a delicate disposition might like to skip the next couple of 1262306a36Sopenharmony_ci * pages. 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * These functions are drop in replacements for memmove and 1562306a36Sopenharmony_ci * memset(_, 0, _). However their five instances add at least a kilobyte 1662306a36Sopenharmony_ci * to the object file. You have been warned. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * Not a great fan of assembler for the sake of it, but I think 1962306a36Sopenharmony_ci * that these routines are at least 10 times faster than their C 2062306a36Sopenharmony_ci * equivalents for large blits, and that's important to the lowest level of 2162306a36Sopenharmony_ci * a graphics driver. Question is whether some scheme with the blitter 2262306a36Sopenharmony_ci * would be faster. I suspect not for simple text system - not much 2362306a36Sopenharmony_ci * asynchrony. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * Code is very simple, just gruesome expansion. Basic strategy is to 2662306a36Sopenharmony_ci * increase data moved/cleared at each step to 16 bytes to reduce 2762306a36Sopenharmony_ci * instruction per data move overhead. movem might be faster still 2862306a36Sopenharmony_ci * For more than 15 bytes, we try to align the write direction on a 2962306a36Sopenharmony_ci * longword boundary to get maximum speed. This is even more gruesome. 3062306a36Sopenharmony_ci * Unaligned read/write used requires 68020+ - think this is a problem? 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci * Sorry! 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci/* ++roman: I've optimized Robert's original versions in some minor 3762306a36Sopenharmony_ci * aspects, e.g. moveq instead of movel, let gcc choose the registers, 3862306a36Sopenharmony_ci * use movem in some places... 3962306a36Sopenharmony_ci * For other modes than 1 plane, lots of more such assembler functions 4062306a36Sopenharmony_ci * were needed (e.g. the ones using movep or expanding color values). 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* ++andreas: more optimizations: 4462306a36Sopenharmony_ci subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc 4562306a36Sopenharmony_ci addal is faster than addaw 4662306a36Sopenharmony_ci movep is rather expensive compared to ordinary move's 4762306a36Sopenharmony_ci some functions rewritten in C for clarity, no speed loss */ 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistatic inline void *fb_memclear_small(void *s, size_t count) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci if (!count) 5262306a36Sopenharmony_ci return 0; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci asm volatile ("\n" 5562306a36Sopenharmony_ci " lsr.l #1,%1 ; jcc 1f ; move.b %2,-(%0)\n" 5662306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.w %2,-(%0)\n" 5762306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0)\n" 5862306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" 5962306a36Sopenharmony_ci "1:" 6062306a36Sopenharmony_ci : "=a" (s), "=d" (count) 6162306a36Sopenharmony_ci : "d" (0), "0" ((char *)s + count), "1" (count)); 6262306a36Sopenharmony_ci asm volatile ("\n" 6362306a36Sopenharmony_ci " subq.l #1,%1\n" 6462306a36Sopenharmony_ci " jcs 3f\n" 6562306a36Sopenharmony_ci " move.l %2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" 6662306a36Sopenharmony_ci "2: movem.l %2/%%d4/%%d5/%%d6,-(%0)\n" 6762306a36Sopenharmony_ci " dbra %1,2b\n" 6862306a36Sopenharmony_ci "3:" 6962306a36Sopenharmony_ci : "=a" (s), "=d" (count) 7062306a36Sopenharmony_ci : "d" (0), "0" (s), "1" (count) 7162306a36Sopenharmony_ci : "d4", "d5", "d6" 7262306a36Sopenharmony_ci ); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci return 0; 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic inline void *fb_memclear(void *s, size_t count) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci if (!count) 8162306a36Sopenharmony_ci return 0; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (count < 16) { 8462306a36Sopenharmony_ci asm volatile ("\n" 8562306a36Sopenharmony_ci " lsr.l #1,%1 ; jcc 1f ; clr.b (%0)+\n" 8662306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; clr.w (%0)+\n" 8762306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; clr.l (%0)+\n" 8862306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n" 8962306a36Sopenharmony_ci "1:" 9062306a36Sopenharmony_ci : "=a" (s), "=d" (count) 9162306a36Sopenharmony_ci : "0" (s), "1" (count)); 9262306a36Sopenharmony_ci } else { 9362306a36Sopenharmony_ci long tmp; 9462306a36Sopenharmony_ci asm volatile ("\n" 9562306a36Sopenharmony_ci " move.l %1,%2\n" 9662306a36Sopenharmony_ci " lsr.l #1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n" 9762306a36Sopenharmony_ci " lsr.l #1,%2 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/ 9862306a36Sopenharmony_ci " clr.w (%0)+ ; subq.w #2,%1 ; jra 2f\n" 9962306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 2f\n" 10062306a36Sopenharmony_ci " clr.w (%0)+ ; subq.w #2,%1\n" 10162306a36Sopenharmony_ci "2: move.w %1,%2; lsr.l #2,%1 ; jeq 6f\n" 10262306a36Sopenharmony_ci " lsr.l #1,%1 ; jcc 3f ; clr.l (%0)+\n" 10362306a36Sopenharmony_ci "3: lsr.l #1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n" 10462306a36Sopenharmony_ci "4: subq.l #1,%1 ; jcs 6f\n" 10562306a36Sopenharmony_ci "5: clr.l (%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n" 10662306a36Sopenharmony_ci " dbra %1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n" 10762306a36Sopenharmony_ci "6: move.w %2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n" 10862306a36Sopenharmony_ci "7: btst #0,%1 ; jeq 8f ; clr.b (%0)+\n" 10962306a36Sopenharmony_ci "8:" 11062306a36Sopenharmony_ci : "=a" (s), "=d" (count), "=d" (tmp) 11162306a36Sopenharmony_ci : "0" (s), "1" (count)); 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci return 0; 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic inline void *fb_memset255(void *s, size_t count) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci if (!count) 12162306a36Sopenharmony_ci return 0; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci asm volatile ("\n" 12462306a36Sopenharmony_ci " lsr.l #1,%1 ; jcc 1f ; move.b %2,-(%0)\n" 12562306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.w %2,-(%0)\n" 12662306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0)\n" 12762306a36Sopenharmony_ci "1: lsr.l #1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" 12862306a36Sopenharmony_ci "1:" 12962306a36Sopenharmony_ci : "=a" (s), "=d" (count) 13062306a36Sopenharmony_ci : "d" (-1), "0" ((char *)s+count), "1" (count)); 13162306a36Sopenharmony_ci asm volatile ("\n" 13262306a36Sopenharmony_ci " subq.l #1,%1 ; jcs 3f\n" 13362306a36Sopenharmony_ci " move.l %2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" 13462306a36Sopenharmony_ci "2: movem.l %2/%%d4/%%d5/%%d6,-(%0)\n" 13562306a36Sopenharmony_ci " dbra %1,2b\n" 13662306a36Sopenharmony_ci "3:" 13762306a36Sopenharmony_ci : "=a" (s), "=d" (count) 13862306a36Sopenharmony_ci : "d" (-1), "0" (s), "1" (count) 13962306a36Sopenharmony_ci : "d4", "d5", "d6"); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci return 0; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_cistatic inline void *fb_memmove(void *d, const void *s, size_t count) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci if (d < s) { 14862306a36Sopenharmony_ci if (count < 16) { 14962306a36Sopenharmony_ci asm volatile ("\n" 15062306a36Sopenharmony_ci " lsr.l #1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n" 15162306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n" 15262306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n" 15362306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" 15462306a36Sopenharmony_ci "1:" 15562306a36Sopenharmony_ci : "=a" (d), "=a" (s), "=d" (count) 15662306a36Sopenharmony_ci : "0" (d), "1" (s), "2" (count)); 15762306a36Sopenharmony_ci } else { 15862306a36Sopenharmony_ci long tmp; 15962306a36Sopenharmony_ci asm volatile ("\n" 16062306a36Sopenharmony_ci " move.l %0,%3\n" 16162306a36Sopenharmony_ci " lsr.l #1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n" 16262306a36Sopenharmony_ci " lsr.l #1,%3 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/ 16362306a36Sopenharmony_ci " move.w (%1)+,(%0)+ ; subqw #2,%2 ; jra 2f\n" 16462306a36Sopenharmony_ci "1: lsr.l #1,%3 ; jcc 2f\n" 16562306a36Sopenharmony_ci " move.w (%1)+,(%0)+ ; subqw #2,%2\n" 16662306a36Sopenharmony_ci "2: move.w %2,%-; lsr.l #2,%2 ; jeq 6f\n" 16762306a36Sopenharmony_ci " lsr.l #1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n" 16862306a36Sopenharmony_ci "3: lsr.l #1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" 16962306a36Sopenharmony_ci "4: subq.l #1,%2 ; jcs 6f\n" 17062306a36Sopenharmony_ci "5: move.l (%1)+,(%0)+; move.l (%1)+,(%0)+\n" 17162306a36Sopenharmony_ci " move.l (%1)+,(%0)+; move.l (%1)+,(%0)+\n" 17262306a36Sopenharmony_ci " dbra %2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" 17362306a36Sopenharmony_ci "6: move.w %+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n" 17462306a36Sopenharmony_ci "7: btst #0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n" 17562306a36Sopenharmony_ci "8:" 17662306a36Sopenharmony_ci : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) 17762306a36Sopenharmony_ci : "0" (d), "1" (s), "2" (count)); 17862306a36Sopenharmony_ci } 17962306a36Sopenharmony_ci } else { 18062306a36Sopenharmony_ci if (count < 16) { 18162306a36Sopenharmony_ci asm volatile ("\n" 18262306a36Sopenharmony_ci " lsr.l #1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n" 18362306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n" 18462306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n" 18562306a36Sopenharmony_ci "1: lsr.l #1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" 18662306a36Sopenharmony_ci "1:" 18762306a36Sopenharmony_ci : "=a" (d), "=a" (s), "=d" (count) 18862306a36Sopenharmony_ci : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); 18962306a36Sopenharmony_ci } else { 19062306a36Sopenharmony_ci long tmp; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci asm volatile ("\n" 19362306a36Sopenharmony_ci " move.l %0,%3\n" 19462306a36Sopenharmony_ci " lsr.l #1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n" 19562306a36Sopenharmony_ci " lsr.l #1,%3 ; jcs 2f\n" /* %0 increased=>bit 2 switched*/ 19662306a36Sopenharmony_ci " move.w -(%1),-(%0) ; subqw #2,%2 ; jra 2f\n" 19762306a36Sopenharmony_ci "1: lsr.l #1,%3 ; jcc 2f\n" 19862306a36Sopenharmony_ci " move.w -(%1),-(%0) ; subqw #2,%2\n" 19962306a36Sopenharmony_ci "2: move.w %2,%-; lsr.l #2,%2 ; jeq 6f\n" 20062306a36Sopenharmony_ci " lsr.l #1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n" 20162306a36Sopenharmony_ci "3: lsr.l #1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" 20262306a36Sopenharmony_ci "4: subq.l #1,%2 ; jcs 6f\n" 20362306a36Sopenharmony_ci "5: move.l -(%1),-(%0); move.l -(%1),-(%0)\n" 20462306a36Sopenharmony_ci " move.l -(%1),-(%0); move.l -(%1),-(%0)\n" 20562306a36Sopenharmony_ci " dbra %2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" 20662306a36Sopenharmony_ci "6: move.w %+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n" 20762306a36Sopenharmony_ci "7: btst #0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n" 20862306a36Sopenharmony_ci "8:" 20962306a36Sopenharmony_ci : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) 21062306a36Sopenharmony_ci : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci return 0; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci/* ++andreas: Simple and fast version of memmove, assumes size is 21962306a36Sopenharmony_ci divisible by 16, suitable for moving the whole screen bitplane */ 22062306a36Sopenharmony_cistatic inline void fast_memmove(char *dst, const char *src, size_t size) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci if (!size) 22362306a36Sopenharmony_ci return; 22462306a36Sopenharmony_ci if (dst < src) 22562306a36Sopenharmony_ci asm volatile ("\n" 22662306a36Sopenharmony_ci "1: movem.l (%0)+,%%d0/%%d1/%%a0/%%a1\n" 22762306a36Sopenharmony_ci " movem.l %%d0/%%d1/%%a0/%%a1,%1@\n" 22862306a36Sopenharmony_ci " addq.l #8,%1; addq.l #8,%1\n" 22962306a36Sopenharmony_ci " dbra %2,1b\n" 23062306a36Sopenharmony_ci " clr.w %2; subq.l #1,%2\n" 23162306a36Sopenharmony_ci " jcc 1b" 23262306a36Sopenharmony_ci : "=a" (src), "=a" (dst), "=d" (size) 23362306a36Sopenharmony_ci : "0" (src), "1" (dst), "2" (size / 16 - 1) 23462306a36Sopenharmony_ci : "d0", "d1", "a0", "a1", "memory"); 23562306a36Sopenharmony_ci else 23662306a36Sopenharmony_ci asm volatile ("\n" 23762306a36Sopenharmony_ci "1: subq.l #8,%0; subq.l #8,%0\n" 23862306a36Sopenharmony_ci " movem.l %0@,%%d0/%%d1/%%a0/%%a1\n" 23962306a36Sopenharmony_ci " movem.l %%d0/%%d1/%%a0/%%a1,-(%1)\n" 24062306a36Sopenharmony_ci " dbra %2,1b\n" 24162306a36Sopenharmony_ci " clr.w %2; subq.l #1,%2\n" 24262306a36Sopenharmony_ci " jcc 1b" 24362306a36Sopenharmony_ci : "=a" (src), "=a" (dst), "=d" (size) 24462306a36Sopenharmony_ci : "0" (src + size), "1" (dst + size), "2" (size / 16 - 1) 24562306a36Sopenharmony_ci : "d0", "d1", "a0", "a1", "memory"); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci#ifdef BPL 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci/* 25162306a36Sopenharmony_ci * This expands a up to 8 bit color into two longs 25262306a36Sopenharmony_ci * for movel operations. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_cistatic const u32 four2long[] = { 25562306a36Sopenharmony_ci 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 25662306a36Sopenharmony_ci 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, 25762306a36Sopenharmony_ci 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 25862306a36Sopenharmony_ci 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, 25962306a36Sopenharmony_ci}; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic inline void expand8_col2mask(u8 c, u32 m[]) 26262306a36Sopenharmony_ci{ 26362306a36Sopenharmony_ci m[0] = four2long[c & 15]; 26462306a36Sopenharmony_ci#if BPL > 4 26562306a36Sopenharmony_ci m[1] = four2long[c >> 4]; 26662306a36Sopenharmony_ci#endif 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]); 27262306a36Sopenharmony_ci#if BPL > 4 27362306a36Sopenharmony_ci fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]); 27462306a36Sopenharmony_ci#endif 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci/* 27862306a36Sopenharmony_ci * set an 8bit value to a color 27962306a36Sopenharmony_ci */ 28062306a36Sopenharmony_cistatic inline void fill8_col(u8 *dst, u32 m[]) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci u32 tmp = m[0]; 28362306a36Sopenharmony_ci dst[0] = tmp; 28462306a36Sopenharmony_ci dst[2] = (tmp >>= 8); 28562306a36Sopenharmony_ci#if BPL > 2 28662306a36Sopenharmony_ci dst[4] = (tmp >>= 8); 28762306a36Sopenharmony_ci dst[6] = tmp >> 8; 28862306a36Sopenharmony_ci#endif 28962306a36Sopenharmony_ci#if BPL > 4 29062306a36Sopenharmony_ci tmp = m[1]; 29162306a36Sopenharmony_ci dst[8] = tmp; 29262306a36Sopenharmony_ci dst[10] = (tmp >>= 8); 29362306a36Sopenharmony_ci dst[12] = (tmp >>= 8); 29462306a36Sopenharmony_ci dst[14] = tmp >> 8; 29562306a36Sopenharmony_ci#endif 29662306a36Sopenharmony_ci} 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci/* 29962306a36Sopenharmony_ci * set an 8bit value according to foreground/background color 30062306a36Sopenharmony_ci */ 30162306a36Sopenharmony_cistatic inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci u32 fgm[2], bgm[2], tmp; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci expand8_2col2mask(fg, bg, fgm, bgm); 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci mask |= mask << 8; 30862306a36Sopenharmony_ci#if BPL > 2 30962306a36Sopenharmony_ci mask |= mask << 16; 31062306a36Sopenharmony_ci#endif 31162306a36Sopenharmony_ci tmp = (mask & fgm[0]) ^ bgm[0]; 31262306a36Sopenharmony_ci dst[0] = tmp; 31362306a36Sopenharmony_ci dst[2] = (tmp >>= 8); 31462306a36Sopenharmony_ci#if BPL > 2 31562306a36Sopenharmony_ci dst[4] = (tmp >>= 8); 31662306a36Sopenharmony_ci dst[6] = tmp >> 8; 31762306a36Sopenharmony_ci#endif 31862306a36Sopenharmony_ci#if BPL > 4 31962306a36Sopenharmony_ci tmp = (mask & fgm[1]) ^ bgm[1]; 32062306a36Sopenharmony_ci dst[8] = tmp; 32162306a36Sopenharmony_ci dst[10] = (tmp >>= 8); 32262306a36Sopenharmony_ci dst[12] = (tmp >>= 8); 32362306a36Sopenharmony_ci dst[14] = tmp >> 8; 32462306a36Sopenharmony_ci#endif 32562306a36Sopenharmony_ci} 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_cistatic const u32 two2word[] = { 32862306a36Sopenharmony_ci 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff 32962306a36Sopenharmony_ci}; 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic inline void expand16_col2mask(u8 c, u32 m[]) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci m[0] = two2word[c & 3]; 33462306a36Sopenharmony_ci#if BPL > 2 33562306a36Sopenharmony_ci m[1] = two2word[(c >> 2) & 3]; 33662306a36Sopenharmony_ci#endif 33762306a36Sopenharmony_ci#if BPL > 4 33862306a36Sopenharmony_ci m[2] = two2word[(c >> 4) & 3]; 33962306a36Sopenharmony_ci m[3] = two2word[c >> 6]; 34062306a36Sopenharmony_ci#endif 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci bgm[0] = two2word[bg & 3]; 34662306a36Sopenharmony_ci fgm[0] = two2word[fg & 3] ^ bgm[0]; 34762306a36Sopenharmony_ci#if BPL > 2 34862306a36Sopenharmony_ci bgm[1] = two2word[(bg >> 2) & 3]; 34962306a36Sopenharmony_ci fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1]; 35062306a36Sopenharmony_ci#endif 35162306a36Sopenharmony_ci#if BPL > 4 35262306a36Sopenharmony_ci bgm[2] = two2word[(bg >> 4) & 3]; 35362306a36Sopenharmony_ci fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2]; 35462306a36Sopenharmony_ci bgm[3] = two2word[bg >> 6]; 35562306a36Sopenharmony_ci fgm[3] = two2word[fg >> 6] ^ bgm[3]; 35662306a36Sopenharmony_ci#endif 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_cistatic inline u32 *fill16_col(u32 *dst, int rows, u32 m[]) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci while (rows) { 36262306a36Sopenharmony_ci *dst++ = m[0]; 36362306a36Sopenharmony_ci#if BPL > 2 36462306a36Sopenharmony_ci *dst++ = m[1]; 36562306a36Sopenharmony_ci#endif 36662306a36Sopenharmony_ci#if BPL > 4 36762306a36Sopenharmony_ci *dst++ = m[2]; 36862306a36Sopenharmony_ci *dst++ = m[3]; 36962306a36Sopenharmony_ci#endif 37062306a36Sopenharmony_ci rows--; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci return dst; 37362306a36Sopenharmony_ci} 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_cistatic inline void memmove32_col(void *dst, void *src, u32 mask, u32 h, u32 bytes) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci u32 *s, *d, v; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci s = src; 38062306a36Sopenharmony_ci d = dst; 38162306a36Sopenharmony_ci do { 38262306a36Sopenharmony_ci v = (*s++ & mask) | (*d & ~mask); 38362306a36Sopenharmony_ci *d++ = v; 38462306a36Sopenharmony_ci#if BPL > 2 38562306a36Sopenharmony_ci v = (*s++ & mask) | (*d & ~mask); 38662306a36Sopenharmony_ci *d++ = v; 38762306a36Sopenharmony_ci#endif 38862306a36Sopenharmony_ci#if BPL > 4 38962306a36Sopenharmony_ci v = (*s++ & mask) | (*d & ~mask); 39062306a36Sopenharmony_ci *d++ = v; 39162306a36Sopenharmony_ci v = (*s++ & mask) | (*d & ~mask); 39262306a36Sopenharmony_ci *d++ = v; 39362306a36Sopenharmony_ci#endif 39462306a36Sopenharmony_ci d = (u32 *)((u8 *)d + bytes); 39562306a36Sopenharmony_ci s = (u32 *)((u8 *)s + bytes); 39662306a36Sopenharmony_ci } while (--h); 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci#endif 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci#endif /* _VIDEO_ATAFB_UTILS_H */ 402