1/*
2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 *              software YUV to YUV converter
5 *              software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27#include <stddef.h>
28
29#include "libavutil/attributes.h"
30
31static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
32                                  int src_size)
33{
34    uint8_t *dest      = dst;
35    const uint8_t *s   = src;
36    const uint8_t *end = s + src_size;
37
38    while (s < end) {
39#if HAVE_BIGENDIAN
40        /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
41        *dest++  = 255;
42        *dest++  = s[2];
43        *dest++  = s[1];
44        *dest++  = s[0];
45        s       += 3;
46#else
47        *dest++  = *s++;
48        *dest++  = *s++;
49        *dest++  = *s++;
50        *dest++  = 255;
51#endif
52    }
53}
54
55static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
56                                  int src_size)
57{
58    uint8_t *dest      = dst;
59    const uint8_t *s   = src;
60    const uint8_t *end = s + src_size;
61
62    while (s < end) {
63#if HAVE_BIGENDIAN
64        /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
65        s++;
66        dest[2]  = *s++;
67        dest[1]  = *s++;
68        dest[0]  = *s++;
69        dest    += 3;
70#else
71        *dest++  = *s++;
72        *dest++  = *s++;
73        *dest++  = *s++;
74        s++;
75#endif
76    }
77}
78
79/*
80 * original by Strepto/Astral
81 * ported to gcc & bugfixed: A'rpi
82 * MMXEXT, 3DNOW optimization by Nick Kurshev
83 * 32-bit C version, and and&add trick by Michael Niedermayer
84 */
85static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
86{
87    register uint8_t *d         = dst;
88    register const uint8_t *s   = src;
89    register const uint8_t *end = s + src_size;
90    const uint8_t *mm_end       = end - 3;
91
92    while (s < mm_end) {
93        register unsigned x = *((const uint32_t *)s);
94        *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
95        d += 4;
96        s += 4;
97    }
98    if (s < end) {
99        register unsigned short x = *((const uint16_t *)s);
100        *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
101    }
102}
103
104static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
105{
106    register uint8_t *d         = dst;
107    register const uint8_t *s   = src;
108    register const uint8_t *end = s + src_size;
109    const uint8_t *mm_end       = end - 3;
110
111    while (s < mm_end) {
112        register uint32_t x  = *((const uint32_t *)s);
113        *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
114        s                   += 4;
115        d                   += 4;
116    }
117    if (s < end) {
118        register uint16_t x = *((const uint16_t *)s);
119        *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
120    }
121}
122
123static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
124{
125    uint16_t *d        = (uint16_t *)dst;
126    const uint8_t *s   = src;
127    const uint8_t *end = s + src_size;
128
129    while (s < end) {
130        register int rgb  = *(const uint32_t *)s;
131        s                += 4;
132        *d++              = ((rgb & 0xFF)     >> 3) +
133                            ((rgb & 0xFC00)   >> 5) +
134                            ((rgb & 0xF80000) >> 8);
135    }
136}
137
138static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
139                                  int src_size)
140{
141    uint16_t *d        = (uint16_t *)dst;
142    const uint8_t *s   = src;
143    const uint8_t *end = s + src_size;
144
145    while (s < end) {
146        register int rgb  = *(const uint32_t *)s;
147        s                += 4;
148        *d++              = ((rgb & 0xF8)     << 8) +
149                            ((rgb & 0xFC00)   >> 5) +
150                            ((rgb & 0xF80000) >> 19);
151    }
152}
153
154static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
155{
156    uint16_t *d        = (uint16_t *)dst;
157    const uint8_t *s   = src;
158    const uint8_t *end = s + src_size;
159
160    while (s < end) {
161        register int rgb  = *(const uint32_t *)s;
162        s                += 4;
163        *d++              = ((rgb & 0xFF)     >> 3) +
164                            ((rgb & 0xF800)   >> 6) +
165                            ((rgb & 0xF80000) >> 9);
166    }
167}
168
169static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
170                                  int src_size)
171{
172    uint16_t *d        = (uint16_t *)dst;
173    const uint8_t *s   = src;
174    const uint8_t *end = s + src_size;
175
176    while (s < end) {
177        register int rgb  = *(const uint32_t *)s;
178        s                += 4;
179        *d++              = ((rgb & 0xF8)     <<  7) +
180                            ((rgb & 0xF800)   >>  6) +
181                            ((rgb & 0xF80000) >> 19);
182    }
183}
184
185static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
186                                  int src_size)
187{
188    uint16_t *d        = (uint16_t *)dst;
189    const uint8_t *s   = src;
190    const uint8_t *end = s + src_size;
191
192    while (s < end) {
193        const int b = *s++;
194        const int g = *s++;
195        const int r = *s++;
196        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
197    }
198}
199
200static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
201{
202    uint16_t *d        = (uint16_t *)dst;
203    const uint8_t *s   = src;
204    const uint8_t *end = s + src_size;
205
206    while (s < end) {
207        const int r = *s++;
208        const int g = *s++;
209        const int b = *s++;
210        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
211    }
212}
213
214static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
215                                  int src_size)
216{
217    uint16_t *d        = (uint16_t *)dst;
218    const uint8_t *s   = src;
219    const uint8_t *end = s + src_size;
220
221    while (s < end) {
222        const int b = *s++;
223        const int g = *s++;
224        const int r = *s++;
225        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
226    }
227}
228
229static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
230{
231    uint16_t *d        = (uint16_t *)dst;
232    const uint8_t *s   = src;
233    const uint8_t *end = s + src_size;
234
235    while (s < end) {
236        const int r = *s++;
237        const int g = *s++;
238        const int b = *s++;
239        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
240    }
241}
242
243static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
244                                  int src_size)
245{
246    uint8_t *d          = dst;
247    const uint16_t *s   = (const uint16_t *)src;
248    const uint16_t *end = s + src_size / 2;
249
250    while (s < end) {
251        register uint16_t bgr = *s++;
252        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
253        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
254        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
255    }
256}
257
258static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
259                                  int src_size)
260{
261    uint8_t *d          = (uint8_t *)dst;
262    const uint16_t *s   = (const uint16_t *)src;
263    const uint16_t *end = s + src_size / 2;
264
265    while (s < end) {
266        register uint16_t bgr = *s++;
267        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
268        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
269        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
270    }
271}
272
273static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
274{
275    uint8_t *d          = dst;
276    const uint16_t *s   = (const uint16_t *)src;
277    const uint16_t *end = s + src_size / 2;
278
279    while (s < end) {
280        register uint16_t bgr = *s++;
281#if HAVE_BIGENDIAN
282        *d++ = 255;
283        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
284        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
285        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
286#else
287        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
288        *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
289        *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
290        *d++ = 255;
291#endif
292    }
293}
294
295static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
296{
297    uint8_t *d          = dst;
298    const uint16_t *s   = (const uint16_t *)src;
299    const uint16_t *end = s + src_size / 2;
300
301    while (s < end) {
302        register uint16_t bgr = *s++;
303#if HAVE_BIGENDIAN
304        *d++ = 255;
305        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
306        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
307        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
308#else
309        *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
310        *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
311        *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
312        *d++ = 255;
313#endif
314    }
315}
316
317static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
318                                        int src_size)
319{
320    int idx          = 15  - src_size;
321    const uint8_t *s = src - idx;
322    uint8_t *d       = dst - idx;
323
324    for (; idx < 15; idx += 4) {
325        register unsigned v   = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
326        v                    &= 0xff00ff;
327        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
328    }
329}
330
331static inline void shuffle_bytes_0321_c(const uint8_t *src, uint8_t *dst,
332                                        int src_size)
333{
334    int idx          = 15  - src_size;
335    const uint8_t *s = src - idx;
336    uint8_t *d       = dst - idx;
337
338    for (; idx < 15; idx += 4) {
339        register unsigned v   = *(const uint32_t *)&s[idx], g = v & 0x00ff00ff;
340        v                    &= 0xff00ff00;
341        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
342    }
343}
344
345#define DEFINE_SHUFFLE_BYTES(name, a, b, c, d)                          \
346static void shuffle_bytes_##name (const uint8_t *src,                   \
347                                        uint8_t *dst, int src_size)     \
348{                                                                       \
349    int i;                                                              \
350                                                                        \
351    for (i = 0; i < src_size; i += 4) {                                 \
352        dst[i + 0] = src[i + a];                                        \
353        dst[i + 1] = src[i + b];                                        \
354        dst[i + 2] = src[i + c];                                        \
355        dst[i + 3] = src[i + d];                                        \
356    }                                                                   \
357}
358
359DEFINE_SHUFFLE_BYTES(1230_c, 1, 2, 3, 0)
360DEFINE_SHUFFLE_BYTES(3012_c, 3, 0, 1, 2)
361DEFINE_SHUFFLE_BYTES(3210_c, 3, 2, 1, 0)
362
363static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
364{
365    unsigned i;
366
367    for (i = 0; i < src_size; i += 3) {
368        register uint8_t x = src[i + 2];
369        dst[i + 1]         = src[i + 1];
370        dst[i + 2]         = src[i + 0];
371        dst[i + 0]         = x;
372    }
373}
374
375static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
376                                     const uint8_t *vsrc, uint8_t *dst,
377                                     int width, int height,
378                                     int lumStride, int chromStride,
379                                     int dstStride, int vertLumPerChroma)
380{
381    int y, i;
382    const int chromWidth = width >> 1;
383
384    for (y = 0; y < height; y++) {
385#if HAVE_FAST_64BIT
386        uint64_t *ldst = (uint64_t *)dst;
387        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
388        for (i = 0; i < chromWidth; i += 2) {
389            uint64_t k = yc[0] + (uc[0] << 8) +
390                         (yc[1] << 16) + ((unsigned) vc[0] << 24);
391            uint64_t l = yc[2] + (uc[1] << 8) +
392                         (yc[3] << 16) + ((unsigned) vc[1] << 24);
393            *ldst++ = k + (l << 32);
394            yc     += 4;
395            uc     += 2;
396            vc     += 2;
397        }
398
399#else
400        int *idst = (int32_t *)dst;
401        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
402
403        for (i = 0; i < chromWidth; i++) {
404#if HAVE_BIGENDIAN
405            *idst++ = (yc[0] << 24) + (uc[0] << 16) +
406                      (yc[1] <<  8) + (vc[0] <<  0);
407#else
408            *idst++ = yc[0] + (uc[0] << 8) +
409                      (yc[1] << 16) + (vc[0] << 24);
410#endif
411            yc += 2;
412            uc++;
413            vc++;
414        }
415#endif
416        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
417            usrc += chromStride;
418            vsrc += chromStride;
419        }
420        ysrc += lumStride;
421        dst  += dstStride;
422    }
423}
424
425/**
426 * Height should be a multiple of 2 and width should be a multiple of 16.
427 * (If this is a problem for anyone then tell me, and I will fix it.)
428 */
429static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
430                                const uint8_t *vsrc, uint8_t *dst,
431                                int width, int height, int lumStride,
432                                int chromStride, int dstStride)
433{
434    //FIXME interpolate chroma
435    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
436                      chromStride, dstStride, 2);
437}
438
439static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
440                                     const uint8_t *vsrc, uint8_t *dst,
441                                     int width, int height,
442                                     int lumStride, int chromStride,
443                                     int dstStride, int vertLumPerChroma)
444{
445    int y, i;
446    const int chromWidth = width >> 1;
447
448    for (y = 0; y < height; y++) {
449#if HAVE_FAST_64BIT
450        uint64_t *ldst = (uint64_t *)dst;
451        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
452        for (i = 0; i < chromWidth; i += 2) {
453            uint64_t k = uc[0] + (yc[0] << 8) +
454                         (vc[0] << 16) + ((unsigned) yc[1] << 24);
455            uint64_t l = uc[1] + (yc[2] << 8) +
456                         (vc[1] << 16) + ((unsigned) yc[3] << 24);
457            *ldst++ = k + (l << 32);
458            yc     += 4;
459            uc     += 2;
460            vc     += 2;
461        }
462
463#else
464        int *idst = (int32_t *)dst;
465        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
466
467        for (i = 0; i < chromWidth; i++) {
468#if HAVE_BIGENDIAN
469            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
470                      (vc[0] <<  8) + (yc[1] <<  0);
471#else
472            *idst++ = uc[0] + (yc[0] << 8) +
473                      (vc[0] << 16) + (yc[1] << 24);
474#endif
475            yc += 2;
476            uc++;
477            vc++;
478        }
479#endif
480        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
481            usrc += chromStride;
482            vsrc += chromStride;
483        }
484        ysrc += lumStride;
485        dst  += dstStride;
486    }
487}
488
489/**
490 * Height should be a multiple of 2 and width should be a multiple of 16
491 * (If this is a problem for anyone then tell me, and I will fix it.)
492 */
493static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
494                                const uint8_t *vsrc, uint8_t *dst,
495                                int width, int height, int lumStride,
496                                int chromStride, int dstStride)
497{
498    //FIXME interpolate chroma
499    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
500                      chromStride, dstStride, 2);
501}
502
503/**
504 * Width should be a multiple of 16.
505 */
506static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
507                                   const uint8_t *vsrc, uint8_t *dst,
508                                   int width, int height, int lumStride,
509                                   int chromStride, int dstStride)
510{
511    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
512                      chromStride, dstStride, 1);
513}
514
515/**
516 * Width should be a multiple of 16.
517 */
518static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
519                                   const uint8_t *vsrc, uint8_t *dst,
520                                   int width, int height, int lumStride,
521                                   int chromStride, int dstStride)
522{
523    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
524                      chromStride, dstStride, 1);
525}
526
527/**
528 * Height should be a multiple of 2 and width should be a multiple of 16.
529 * (If this is a problem for anyone then tell me, and I will fix it.)
530 */
531static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
532                                uint8_t *udst, uint8_t *vdst,
533                                int width, int height, int lumStride,
534                                int chromStride, int srcStride)
535{
536    int y;
537    const int chromWidth = width >> 1;
538
539    for (y = 0; y < height; y += 2) {
540        int i;
541        for (i = 0; i < chromWidth; i++) {
542            ydst[2 * i + 0] = src[4 * i + 0];
543            udst[i]         = src[4 * i + 1];
544            ydst[2 * i + 1] = src[4 * i + 2];
545            vdst[i]         = src[4 * i + 3];
546        }
547        ydst += lumStride;
548        src  += srcStride;
549
550        for (i = 0; i < chromWidth; i++) {
551            ydst[2 * i + 0] = src[4 * i + 0];
552            ydst[2 * i + 1] = src[4 * i + 2];
553        }
554        udst += chromStride;
555        vdst += chromStride;
556        ydst += lumStride;
557        src  += srcStride;
558    }
559}
560
561static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
562                              int srcHeight, int srcStride, int dstStride)
563{
564    int x, y;
565
566    dst[0] = src[0];
567
568    // first line
569    for (x = 0; x < srcWidth - 1; x++) {
570        dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
571        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
572    }
573    dst[2 * srcWidth - 1] = src[srcWidth - 1];
574
575    dst += dstStride;
576
577    for (y = 1; y < srcHeight; y++) {
578        const int mmxSize = 1;
579
580        dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
581        dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
582
583        for (x = mmxSize - 1; x < srcWidth - 1; x++) {
584            dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
585            dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
586            dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
587            dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
588        }
589        dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
590        dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
591
592        dst += dstStride * 2;
593        src += srcStride;
594    }
595
596    // last line
597    dst[0] = src[0];
598
599    for (x = 0; x < srcWidth - 1; x++) {
600        dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
601        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
602    }
603    dst[2 * srcWidth - 1] = src[srcWidth - 1];
604}
605
606/**
607 * Height should be a multiple of 2 and width should be a multiple of 16.
608 * (If this is a problem for anyone then tell me, and I will fix it.)
609 * Chrominance data is only taken from every second line, others are ignored.
610 * FIXME: Write HQ version.
611 */
612static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
613                                uint8_t *udst, uint8_t *vdst,
614                                int width, int height, int lumStride,
615                                int chromStride, int srcStride)
616{
617    int y;
618    const int chromWidth = width >> 1;
619
620    for (y = 0; y < height; y += 2) {
621        int i;
622        for (i = 0; i < chromWidth; i++) {
623            udst[i]         = src[4 * i + 0];
624            ydst[2 * i + 0] = src[4 * i + 1];
625            vdst[i]         = src[4 * i + 2];
626            ydst[2 * i + 1] = src[4 * i + 3];
627        }
628        ydst += lumStride;
629        src  += srcStride;
630
631        for (i = 0; i < chromWidth; i++) {
632            ydst[2 * i + 0] = src[4 * i + 1];
633            ydst[2 * i + 1] = src[4 * i + 3];
634        }
635        udst += chromStride;
636        vdst += chromStride;
637        ydst += lumStride;
638        src  += srcStride;
639    }
640}
641
642/**
643 * Height should be a multiple of 2 and width should be a multiple of 2.
644 * (If this is a problem for anyone then tell me, and I will fix it.)
645 * Chrominance data is only taken from every second line,
646 * others are ignored in the C version.
647 * FIXME: Write HQ version.
648 */
649void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
650                   uint8_t *vdst, int width, int height, int lumStride,
651                   int chromStride, int srcStride, int32_t *rgb2yuv)
652{
653    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
654    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
655    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
656    int y;
657    const int chromWidth = width >> 1;
658
659    for (y = 0; y < height; y += 2) {
660        int i;
661        for (i = 0; i < chromWidth; i++) {
662            unsigned int b = src[6 * i + 0];
663            unsigned int g = src[6 * i + 1];
664            unsigned int r = src[6 * i + 2];
665
666            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
667            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
668            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
669
670            udst[i]     = U;
671            vdst[i]     = V;
672            ydst[2 * i] = Y;
673
674            b = src[6 * i + 3];
675            g = src[6 * i + 4];
676            r = src[6 * i + 5];
677
678            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
679            ydst[2 * i + 1] = Y;
680        }
681        ydst += lumStride;
682        src  += srcStride;
683
684        if (y+1 == height)
685            break;
686
687        for (i = 0; i < chromWidth; i++) {
688            unsigned int b = src[6 * i + 0];
689            unsigned int g = src[6 * i + 1];
690            unsigned int r = src[6 * i + 2];
691
692            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
693
694            ydst[2 * i] = Y;
695
696            b = src[6 * i + 3];
697            g = src[6 * i + 4];
698            r = src[6 * i + 5];
699
700            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
701            ydst[2 * i + 1] = Y;
702        }
703        udst += chromStride;
704        vdst += chromStride;
705        ydst += lumStride;
706        src  += srcStride;
707    }
708}
709
710static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
711                              uint8_t *dest, int width, int height,
712                              int src1Stride, int src2Stride, int dstStride)
713{
714    int h;
715
716    for (h = 0; h < height; h++) {
717        int w;
718        for (w = 0; w < width; w++) {
719            dest[2 * w + 0] = src1[w];
720            dest[2 * w + 1] = src2[w];
721        }
722        dest += dstStride;
723        src1 += src1Stride;
724        src2 += src2Stride;
725    }
726}
727
728static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
729                                int width, int height, int srcStride,
730                                int dst1Stride, int dst2Stride)
731{
732    int h;
733
734    for (h = 0; h < height; h++) {
735        int w;
736        for (w = 0; w < width; w++) {
737            dst1[w] = src[2 * w + 0];
738            dst2[w] = src[2 * w + 1];
739        }
740        src  += srcStride;
741        dst1 += dst1Stride;
742        dst2 += dst2Stride;
743    }
744}
745
746static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
747                                 uint8_t *dst1, uint8_t *dst2,
748                                 int width, int height,
749                                 int srcStride1, int srcStride2,
750                                 int dstStride1, int dstStride2)
751{
752    int x, y;
753    int w = width  / 2;
754    int h = height / 2;
755
756    for (y = 0; y < h; y++) {
757        const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
758        uint8_t *d        = dst1 + dstStride1 *  y;
759        for (x = 0; x < w; x++)
760            d[2 * x] = d[2 * x + 1] = s1[x];
761    }
762    for (y = 0; y < h; y++) {
763        const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
764        uint8_t *d        = dst2 + dstStride2 *  y;
765        for (x = 0; x < w; x++)
766            d[2 * x] = d[2 * x + 1] = s2[x];
767    }
768}
769
770static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
771                                  const uint8_t *src3, uint8_t *dst,
772                                  int width, int height,
773                                  int srcStride1, int srcStride2,
774                                  int srcStride3, int dstStride)
775{
776    int x, y;
777    int w = width / 2;
778    int h = height;
779
780    for (y = 0; y < h; y++) {
781        const uint8_t *yp = src1 + srcStride1 *  y;
782        const uint8_t *up = src2 + srcStride2 * (y >> 2);
783        const uint8_t *vp = src3 + srcStride3 * (y >> 2);
784        uint8_t *d        = dst  + dstStride  *  y;
785        for (x = 0; x < w; x++) {
786            const int x2 = x << 2;
787            d[8 * x + 0] = yp[x2];
788            d[8 * x + 1] = up[x];
789            d[8 * x + 2] = yp[x2 + 1];
790            d[8 * x + 3] = vp[x];
791            d[8 * x + 4] = yp[x2 + 2];
792            d[8 * x + 5] = up[x];
793            d[8 * x + 6] = yp[x2 + 3];
794            d[8 * x + 7] = vp[x];
795        }
796    }
797}
798
799static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
800{
801    dst   +=  count;
802    src   +=  count * 2;
803    count  = -count;
804    while (count < 0) {
805        dst[count] = src[2 * count];
806        count++;
807    }
808}
809
810static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
811                            int count)
812{
813    dst0  +=  count;
814    dst1  +=  count;
815    src   +=  count * 4;
816    count  = -count;
817    while (count < 0) {
818        dst0[count] = src[4 * count + 0];
819        dst1[count] = src[4 * count + 2];
820        count++;
821    }
822}
823
824static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
825                               uint8_t *dst0, uint8_t *dst1, int count)
826{
827    dst0  +=  count;
828    dst1  +=  count;
829    src0  +=  count * 4;
830    src1  +=  count * 4;
831    count  = -count;
832    while (count < 0) {
833        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
834        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
835        count++;
836    }
837}
838
839static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
840                           int count)
841{
842    dst0  +=  count;
843    dst1  +=  count;
844    src   +=  count * 4;
845    count  = -count;
846    src++;
847    while (count < 0) {
848        dst0[count] = src[4 * count + 0];
849        dst1[count] = src[4 * count + 2];
850        count++;
851    }
852}
853
854static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
855                              uint8_t *dst0, uint8_t *dst1, int count)
856{
857    dst0  +=  count;
858    dst1  +=  count;
859    src0  +=  count * 4;
860    src1  +=  count * 4;
861    count  = -count;
862    src0++;
863    src1++;
864    while (count < 0) {
865        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
866        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
867        count++;
868    }
869}
870
871static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
872                           const uint8_t *src, int width, int height,
873                           int lumStride, int chromStride, int srcStride)
874{
875    int y;
876    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
877
878    for (y = 0; y < height; y++) {
879        extract_even_c(src, ydst, width);
880        if (y & 1) {
881            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
882            udst += chromStride;
883            vdst += chromStride;
884        }
885
886        src  += srcStride;
887        ydst += lumStride;
888    }
889}
890
891static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
892                           const uint8_t *src, int width, int height,
893                           int lumStride, int chromStride, int srcStride)
894{
895    int y;
896    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
897
898    for (y = 0; y < height; y++) {
899        extract_even_c(src, ydst, width);
900        extract_odd2_c(src, udst, vdst, chromWidth);
901
902        src  += srcStride;
903        ydst += lumStride;
904        udst += chromStride;
905        vdst += chromStride;
906    }
907}
908
909static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
910                           const uint8_t *src, int width, int height,
911                           int lumStride, int chromStride, int srcStride)
912{
913    int y;
914    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
915
916    for (y = 0; y < height; y++) {
917        extract_even_c(src + 1, ydst, width);
918        if (y & 1) {
919            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
920            udst += chromStride;
921            vdst += chromStride;
922        }
923
924        src  += srcStride;
925        ydst += lumStride;
926    }
927}
928
929static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
930                           const uint8_t *src, int width, int height,
931                           int lumStride, int chromStride, int srcStride)
932{
933    int y;
934    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
935
936    for (y = 0; y < height; y++) {
937        extract_even_c(src + 1, ydst, width);
938        extract_even2_c(src, udst, vdst, chromWidth);
939
940        src  += srcStride;
941        ydst += lumStride;
942        udst += chromStride;
943        vdst += chromStride;
944    }
945}
946
947static av_cold void rgb2rgb_init_c(void)
948{
949    rgb15to16          = rgb15to16_c;
950    rgb15tobgr24       = rgb15tobgr24_c;
951    rgb15to32          = rgb15to32_c;
952    rgb16tobgr24       = rgb16tobgr24_c;
953    rgb16to32          = rgb16to32_c;
954    rgb16to15          = rgb16to15_c;
955    rgb24tobgr16       = rgb24tobgr16_c;
956    rgb24tobgr15       = rgb24tobgr15_c;
957    rgb24tobgr32       = rgb24tobgr32_c;
958    rgb32to16          = rgb32to16_c;
959    rgb32to15          = rgb32to15_c;
960    rgb32tobgr24       = rgb32tobgr24_c;
961    rgb24to15          = rgb24to15_c;
962    rgb24to16          = rgb24to16_c;
963    rgb24tobgr24       = rgb24tobgr24_c;
964#if HAVE_BIGENDIAN
965    shuffle_bytes_0321 = shuffle_bytes_2103_c;
966    shuffle_bytes_2103 = shuffle_bytes_0321_c;
967#else
968    shuffle_bytes_0321 = shuffle_bytes_0321_c;
969    shuffle_bytes_2103 = shuffle_bytes_2103_c;
970#endif
971    shuffle_bytes_1230 = shuffle_bytes_1230_c;
972    shuffle_bytes_3012 = shuffle_bytes_3012_c;
973    shuffle_bytes_3210 = shuffle_bytes_3210_c;
974    rgb32tobgr16       = rgb32tobgr16_c;
975    rgb32tobgr15       = rgb32tobgr15_c;
976    yv12toyuy2         = yv12toyuy2_c;
977    yv12touyvy         = yv12touyvy_c;
978    yuv422ptoyuy2      = yuv422ptoyuy2_c;
979    yuv422ptouyvy      = yuv422ptouyvy_c;
980    yuy2toyv12         = yuy2toyv12_c;
981    planar2x           = planar2x_c;
982    ff_rgb24toyv12     = ff_rgb24toyv12_c;
983    interleaveBytes    = interleaveBytes_c;
984    deinterleaveBytes  = deinterleaveBytes_c;
985    vu9_to_vu12        = vu9_to_vu12_c;
986    yvu9_to_yuy2       = yvu9_to_yuy2_c;
987
988    uyvytoyuv420       = uyvytoyuv420_c;
989    uyvytoyuv422       = uyvytoyuv422_c;
990    yuyvtoyuv420       = yuyvtoyuv420_c;
991    yuyvtoyuv422       = yuyvtoyuv422_c;
992}
993