162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Fast C2P (Chunky-to-Planar) Conversion 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2003-2008 Geert Uytterhoeven 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public 762306a36Sopenharmony_ci * License. See the file COPYING in the main directory of this archive 862306a36Sopenharmony_ci * for more details. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/module.h> 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <asm/unaligned.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include "c2p.h" 1762306a36Sopenharmony_ci#include "c2p_core.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci /* 2162306a36Sopenharmony_ci * Perform a full C2P step on 16 8-bit pixels, stored in 4 32-bit words 2262306a36Sopenharmony_ci * containing 2362306a36Sopenharmony_ci * - 16 8-bit chunky pixels on input 2462306a36Sopenharmony_ci * - permutated planar data (2 planes per 32-bit word) on output 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic void c2p_16x8(u32 d[4]) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci transp4(d, 8, 2); 3062306a36Sopenharmony_ci transp4(d, 1, 2); 3162306a36Sopenharmony_ci transp4x(d, 16, 2); 3262306a36Sopenharmony_ci transp4x(d, 2, 2); 3362306a36Sopenharmony_ci transp4(d, 4, 1); 3462306a36Sopenharmony_ci} 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci /* 3862306a36Sopenharmony_ci * Array containing the permutation indices of the planar data after c2p 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic const int perm_c2p_16x8[4] = { 1, 3, 0, 2 }; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci /* 4562306a36Sopenharmony_ci * Store a full block of iplan2 data after c2p conversion 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cistatic inline void store_iplan2(void *dst, u32 bpp, u32 d[4]) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci int i; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci for (i = 0; i < bpp/2; i++, dst += 4) 5362306a36Sopenharmony_ci put_unaligned_be32(d[perm_c2p_16x8[i]], dst); 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* 5862306a36Sopenharmony_ci * Store a partial block of iplan2 data after c2p conversion 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic inline void store_iplan2_masked(void *dst, u32 bpp, u32 d[4], u32 mask) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci int i; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci for (i = 0; i < bpp/2; i++, dst += 4) 6662306a36Sopenharmony_ci put_unaligned_be32(comp(d[perm_c2p_16x8[i]], 6762306a36Sopenharmony_ci get_unaligned_be32(dst), mask), 6862306a36Sopenharmony_ci dst); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci /* 7362306a36Sopenharmony_ci * c2p_iplan2 - Copy 8-bit chunky image data to an interleaved planar 7462306a36Sopenharmony_ci * frame buffer with 2 bytes of interleave 7562306a36Sopenharmony_ci * @dst: Starting address of the planar frame buffer 7662306a36Sopenharmony_ci * @dx: Horizontal destination offset (in pixels) 7762306a36Sopenharmony_ci * @dy: Vertical destination offset (in pixels) 7862306a36Sopenharmony_ci * @width: Image width (in pixels) 7962306a36Sopenharmony_ci * @height: Image height (in pixels) 8062306a36Sopenharmony_ci * @dst_nextline: Frame buffer offset to the next line (in bytes) 8162306a36Sopenharmony_ci * @src_nextline: Image offset to the next line (in bytes) 8262306a36Sopenharmony_ci * @bpp: Bits per pixel of the planar frame buffer (2, 4, or 8) 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_civoid c2p_iplan2(void *dst, const void *src, u32 dx, u32 dy, u32 width, 8662306a36Sopenharmony_ci u32 height, u32 dst_nextline, u32 src_nextline, u32 bpp) 8762306a36Sopenharmony_ci{ 8862306a36Sopenharmony_ci union { 8962306a36Sopenharmony_ci u8 pixels[16]; 9062306a36Sopenharmony_ci u32 words[4]; 9162306a36Sopenharmony_ci } d; 9262306a36Sopenharmony_ci u32 dst_idx, first, last, w; 9362306a36Sopenharmony_ci const u8 *c; 9462306a36Sopenharmony_ci void *p; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci dst += dy*dst_nextline+(dx & ~15)*bpp; 9762306a36Sopenharmony_ci dst_idx = dx % 16; 9862306a36Sopenharmony_ci first = 0xffffU >> dst_idx; 9962306a36Sopenharmony_ci first |= first << 16; 10062306a36Sopenharmony_ci last = 0xffffU ^ (0xffffU >> ((dst_idx+width) % 16)); 10162306a36Sopenharmony_ci last |= last << 16; 10262306a36Sopenharmony_ci while (height--) { 10362306a36Sopenharmony_ci c = src; 10462306a36Sopenharmony_ci p = dst; 10562306a36Sopenharmony_ci w = width; 10662306a36Sopenharmony_ci if (dst_idx+width <= 16) { 10762306a36Sopenharmony_ci /* Single destination word */ 10862306a36Sopenharmony_ci first &= last; 10962306a36Sopenharmony_ci memset(d.pixels, 0, sizeof(d)); 11062306a36Sopenharmony_ci memcpy(d.pixels+dst_idx, c, width); 11162306a36Sopenharmony_ci c += width; 11262306a36Sopenharmony_ci c2p_16x8(d.words); 11362306a36Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, first); 11462306a36Sopenharmony_ci p += bpp*2; 11562306a36Sopenharmony_ci } else { 11662306a36Sopenharmony_ci /* Multiple destination words */ 11762306a36Sopenharmony_ci w = width; 11862306a36Sopenharmony_ci /* Leading bits */ 11962306a36Sopenharmony_ci if (dst_idx) { 12062306a36Sopenharmony_ci w = 16 - dst_idx; 12162306a36Sopenharmony_ci memset(d.pixels, 0, dst_idx); 12262306a36Sopenharmony_ci memcpy(d.pixels+dst_idx, c, w); 12362306a36Sopenharmony_ci c += w; 12462306a36Sopenharmony_ci c2p_16x8(d.words); 12562306a36Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, first); 12662306a36Sopenharmony_ci p += bpp*2; 12762306a36Sopenharmony_ci w = width-w; 12862306a36Sopenharmony_ci } 12962306a36Sopenharmony_ci /* Main chunk */ 13062306a36Sopenharmony_ci while (w >= 16) { 13162306a36Sopenharmony_ci memcpy(d.pixels, c, 16); 13262306a36Sopenharmony_ci c += 16; 13362306a36Sopenharmony_ci c2p_16x8(d.words); 13462306a36Sopenharmony_ci store_iplan2(p, bpp, d.words); 13562306a36Sopenharmony_ci p += bpp*2; 13662306a36Sopenharmony_ci w -= 16; 13762306a36Sopenharmony_ci } 13862306a36Sopenharmony_ci /* Trailing bits */ 13962306a36Sopenharmony_ci w %= 16; 14062306a36Sopenharmony_ci if (w > 0) { 14162306a36Sopenharmony_ci memcpy(d.pixels, c, w); 14262306a36Sopenharmony_ci memset(d.pixels+w, 0, 16-w); 14362306a36Sopenharmony_ci c2p_16x8(d.words); 14462306a36Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, last); 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci src += src_nextline; 14862306a36Sopenharmony_ci dst += dst_nextline; 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(c2p_iplan2); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 154