18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Fast C2P (Chunky-to-Planar) Conversion 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2003-2008 Geert Uytterhoeven 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public 78c2ecf20Sopenharmony_ci * License. See the file COPYING in the main directory of this archive 88c2ecf20Sopenharmony_ci * for more details. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/module.h> 128c2ecf20Sopenharmony_ci#include <linux/string.h> 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <asm/unaligned.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include "c2p.h" 178c2ecf20Sopenharmony_ci#include "c2p_core.h" 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci /* 218c2ecf20Sopenharmony_ci * Perform a full C2P step on 16 8-bit pixels, stored in 4 32-bit words 228c2ecf20Sopenharmony_ci * containing 238c2ecf20Sopenharmony_ci * - 16 8-bit chunky pixels on input 248c2ecf20Sopenharmony_ci * - permutated planar data (2 planes per 32-bit word) on output 258c2ecf20Sopenharmony_ci */ 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_cistatic void c2p_16x8(u32 d[4]) 288c2ecf20Sopenharmony_ci{ 298c2ecf20Sopenharmony_ci transp4(d, 8, 2); 308c2ecf20Sopenharmony_ci transp4(d, 1, 2); 318c2ecf20Sopenharmony_ci transp4x(d, 16, 2); 328c2ecf20Sopenharmony_ci transp4x(d, 2, 2); 338c2ecf20Sopenharmony_ci transp4(d, 4, 1); 348c2ecf20Sopenharmony_ci} 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci /* 388c2ecf20Sopenharmony_ci * Array containing the permutation indices of the planar data after c2p 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic const int perm_c2p_16x8[4] = { 1, 3, 0, 2 }; 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci /* 458c2ecf20Sopenharmony_ci * Store a full block of iplan2 data after c2p conversion 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic inline void store_iplan2(void *dst, u32 bpp, u32 d[4]) 498c2ecf20Sopenharmony_ci{ 508c2ecf20Sopenharmony_ci int i; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci for (i = 0; i < bpp/2; i++, dst += 4) 538c2ecf20Sopenharmony_ci put_unaligned_be32(d[perm_c2p_16x8[i]], dst); 548c2ecf20Sopenharmony_ci} 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci /* 588c2ecf20Sopenharmony_ci * Store a partial block of iplan2 data after c2p conversion 598c2ecf20Sopenharmony_ci */ 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistatic inline void store_iplan2_masked(void *dst, u32 bpp, u32 d[4], u32 mask) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci int i; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci for (i = 0; i < bpp/2; i++, dst += 4) 668c2ecf20Sopenharmony_ci put_unaligned_be32(comp(d[perm_c2p_16x8[i]], 678c2ecf20Sopenharmony_ci get_unaligned_be32(dst), mask), 688c2ecf20Sopenharmony_ci dst); 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci /* 738c2ecf20Sopenharmony_ci * c2p_iplan2 - Copy 8-bit chunky image data to an interleaved planar 748c2ecf20Sopenharmony_ci * frame buffer with 2 bytes of interleave 758c2ecf20Sopenharmony_ci * @dst: Starting address of the planar frame buffer 768c2ecf20Sopenharmony_ci * @dx: Horizontal destination offset (in pixels) 778c2ecf20Sopenharmony_ci * @dy: Vertical destination offset (in pixels) 788c2ecf20Sopenharmony_ci * @width: Image width (in pixels) 798c2ecf20Sopenharmony_ci * @height: Image height (in pixels) 808c2ecf20Sopenharmony_ci * @dst_nextline: Frame buffer offset to the next line (in bytes) 818c2ecf20Sopenharmony_ci * @src_nextline: Image offset to the next line (in bytes) 828c2ecf20Sopenharmony_ci * @bpp: Bits per pixel of the planar frame buffer (2, 4, or 8) 838c2ecf20Sopenharmony_ci */ 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_civoid c2p_iplan2(void *dst, const void *src, u32 dx, u32 dy, u32 width, 868c2ecf20Sopenharmony_ci u32 height, u32 dst_nextline, u32 src_nextline, u32 bpp) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci union { 898c2ecf20Sopenharmony_ci u8 pixels[16]; 908c2ecf20Sopenharmony_ci u32 words[4]; 918c2ecf20Sopenharmony_ci } d; 928c2ecf20Sopenharmony_ci u32 dst_idx, first, last, w; 938c2ecf20Sopenharmony_ci const u8 *c; 948c2ecf20Sopenharmony_ci void *p; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci dst += dy*dst_nextline+(dx & ~15)*bpp; 978c2ecf20Sopenharmony_ci dst_idx = dx % 16; 988c2ecf20Sopenharmony_ci first = 0xffffU >> dst_idx; 998c2ecf20Sopenharmony_ci first |= first << 16; 1008c2ecf20Sopenharmony_ci last = 0xffffU ^ (0xffffU >> ((dst_idx+width) % 16)); 1018c2ecf20Sopenharmony_ci last |= last << 16; 1028c2ecf20Sopenharmony_ci while (height--) { 1038c2ecf20Sopenharmony_ci c = src; 1048c2ecf20Sopenharmony_ci p = dst; 1058c2ecf20Sopenharmony_ci w = width; 1068c2ecf20Sopenharmony_ci if (dst_idx+width <= 16) { 1078c2ecf20Sopenharmony_ci /* Single destination word */ 1088c2ecf20Sopenharmony_ci first &= last; 1098c2ecf20Sopenharmony_ci memset(d.pixels, 0, sizeof(d)); 1108c2ecf20Sopenharmony_ci memcpy(d.pixels+dst_idx, c, width); 1118c2ecf20Sopenharmony_ci c += width; 1128c2ecf20Sopenharmony_ci c2p_16x8(d.words); 1138c2ecf20Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, first); 1148c2ecf20Sopenharmony_ci p += bpp*2; 1158c2ecf20Sopenharmony_ci } else { 1168c2ecf20Sopenharmony_ci /* Multiple destination words */ 1178c2ecf20Sopenharmony_ci w = width; 1188c2ecf20Sopenharmony_ci /* Leading bits */ 1198c2ecf20Sopenharmony_ci if (dst_idx) { 1208c2ecf20Sopenharmony_ci w = 16 - dst_idx; 1218c2ecf20Sopenharmony_ci memset(d.pixels, 0, dst_idx); 1228c2ecf20Sopenharmony_ci memcpy(d.pixels+dst_idx, c, w); 1238c2ecf20Sopenharmony_ci c += w; 1248c2ecf20Sopenharmony_ci c2p_16x8(d.words); 1258c2ecf20Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, first); 1268c2ecf20Sopenharmony_ci p += bpp*2; 1278c2ecf20Sopenharmony_ci w = width-w; 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci /* Main chunk */ 1308c2ecf20Sopenharmony_ci while (w >= 16) { 1318c2ecf20Sopenharmony_ci memcpy(d.pixels, c, 16); 1328c2ecf20Sopenharmony_ci c += 16; 1338c2ecf20Sopenharmony_ci c2p_16x8(d.words); 1348c2ecf20Sopenharmony_ci store_iplan2(p, bpp, d.words); 1358c2ecf20Sopenharmony_ci p += bpp*2; 1368c2ecf20Sopenharmony_ci w -= 16; 1378c2ecf20Sopenharmony_ci } 1388c2ecf20Sopenharmony_ci /* Trailing bits */ 1398c2ecf20Sopenharmony_ci w %= 16; 1408c2ecf20Sopenharmony_ci if (w > 0) { 1418c2ecf20Sopenharmony_ci memcpy(d.pixels, c, w); 1428c2ecf20Sopenharmony_ci memset(d.pixels+w, 0, 16-w); 1438c2ecf20Sopenharmony_ci c2p_16x8(d.words); 1448c2ecf20Sopenharmony_ci store_iplan2_masked(p, bpp, d.words, last); 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci src += src_nextline; 1488c2ecf20Sopenharmony_ci dst += dst_nextline; 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(c2p_iplan2); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 154