xref: /third_party/ffmpeg/libswscale/x86/rgb2rgb.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * software RGB to RGB converter
3cabdff1aSopenharmony_ci * pluralize by software PAL8 to RGB converter
4cabdff1aSopenharmony_ci *              software YUV to YUV converter
5cabdff1aSopenharmony_ci *              software YUV to RGB converter
6cabdff1aSopenharmony_ci * Written by Nick Kurshev.
7cabdff1aSopenharmony_ci * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * This file is part of FFmpeg.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
12cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
13cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
14cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
17cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
18cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19cabdff1aSopenharmony_ci * Lesser General Public License for more details.
20cabdff1aSopenharmony_ci *
21cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
22cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
23cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24cabdff1aSopenharmony_ci */
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include <stdint.h>
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci#include "config.h"
29cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
30cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
31cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
32cabdff1aSopenharmony_ci#include "libavutil/bswap.h"
33cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_ci#include "libswscale/rgb2rgb.h"
36cabdff1aSopenharmony_ci#include "libswscale/swscale.h"
37cabdff1aSopenharmony_ci#include "libswscale/swscale_internal.h"
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mmx_ff)       = 0x00000000000000FFULL;
42cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
43cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
44cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
45cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
46cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
47cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
48cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
49cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
50cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
51cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
52cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
53cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
54cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
55cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
56cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
57cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
58cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
59cabdff1aSopenharmony_ci#define mask16b mask15b
60cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
61cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
62cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
63cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
64cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
65cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
66cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
67cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
68cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mul15_mid)    = 0x4200420042004200ULL;
69cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mul15_hi)     = 0x0210021002100210ULL;
70cabdff1aSopenharmony_ciDECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL;
71cabdff1aSopenharmony_ci
72cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2YOffset);
73cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, extern const uint64_t, ff_w1111);
74cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
77cabdff1aSopenharmony_ci#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
78cabdff1aSopenharmony_ci#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
79cabdff1aSopenharmony_ci#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
80cabdff1aSopenharmony_ci#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
81cabdff1aSopenharmony_ci#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
82cabdff1aSopenharmony_ci#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
83cabdff1aSopenharmony_ci#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
84cabdff1aSopenharmony_ci#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
85cabdff1aSopenharmony_ci
86cabdff1aSopenharmony_ci// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci#define COMPILE_TEMPLATE_SSE2 0
89cabdff1aSopenharmony_ci#define COMPILE_TEMPLATE_AVX 0
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci// MMXEXT versions
92cabdff1aSopenharmony_ci#undef RENAME
93cabdff1aSopenharmony_ci#define RENAME(a) a ## _mmxext
94cabdff1aSopenharmony_ci#include "rgb2rgb_template.c"
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_ci//SSE2 versions
97cabdff1aSopenharmony_ci#undef RENAME
98cabdff1aSopenharmony_ci#undef COMPILE_TEMPLATE_SSE2
99cabdff1aSopenharmony_ci#define COMPILE_TEMPLATE_SSE2 1
100cabdff1aSopenharmony_ci#define RENAME(a) a ## _sse2
101cabdff1aSopenharmony_ci#include "rgb2rgb_template.c"
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci//AVX versions
104cabdff1aSopenharmony_ci#undef RENAME
105cabdff1aSopenharmony_ci#undef COMPILE_TEMPLATE_AVX
106cabdff1aSopenharmony_ci#define COMPILE_TEMPLATE_AVX 1
107cabdff1aSopenharmony_ci#define RENAME(a) a ## _avx
108cabdff1aSopenharmony_ci#include "rgb2rgb_template.c"
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci/*
111cabdff1aSopenharmony_ci RGB15->RGB16 original by Strepto/Astral
112cabdff1aSopenharmony_ci ported to gcc & bugfixed : A'rpi
113cabdff1aSopenharmony_ci MMXEXT, 3DNOW optimization by Nick Kurshev
114cabdff1aSopenharmony_ci 32-bit C version, and and&add trick by Michael Niedermayer
115cabdff1aSopenharmony_ci*/
116cabdff1aSopenharmony_ci
117cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_civoid ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size);
120cabdff1aSopenharmony_civoid ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
121cabdff1aSopenharmony_civoid ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
122cabdff1aSopenharmony_civoid ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
123cabdff1aSopenharmony_civoid ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
124cabdff1aSopenharmony_civoid ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ci#if ARCH_X86_64
127cabdff1aSopenharmony_civoid ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int src_size);
128cabdff1aSopenharmony_civoid ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t *dst, int src_size);
129cabdff1aSopenharmony_civoid ff_shuffle_bytes_1230_avx2(const uint8_t *src, uint8_t *dst, int src_size);
130cabdff1aSopenharmony_civoid ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int src_size);
131cabdff1aSopenharmony_civoid ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t *dst, int src_size);
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_civoid ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
134cabdff1aSopenharmony_ci                          const uint8_t *src, int width, int height,
135cabdff1aSopenharmony_ci                          int lumStride, int chromStride, int srcStride);
136cabdff1aSopenharmony_civoid ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
137cabdff1aSopenharmony_ci                         const uint8_t *src, int width, int height,
138cabdff1aSopenharmony_ci                         int lumStride, int chromStride, int srcStride);
139cabdff1aSopenharmony_ci#endif
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ciav_cold void rgb2rgb_init_x86(void)
142cabdff1aSopenharmony_ci{
143cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
146cabdff1aSopenharmony_ci    if (INLINE_MMXEXT(cpu_flags))
147cabdff1aSopenharmony_ci        rgb2rgb_init_mmxext();
148cabdff1aSopenharmony_ci    if (INLINE_SSE2(cpu_flags))
149cabdff1aSopenharmony_ci        rgb2rgb_init_sse2();
150cabdff1aSopenharmony_ci    if (INLINE_AVX(cpu_flags))
151cabdff1aSopenharmony_ci        rgb2rgb_init_avx();
152cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
153cabdff1aSopenharmony_ci
154cabdff1aSopenharmony_ci    if (EXTERNAL_MMXEXT(cpu_flags)) {
155cabdff1aSopenharmony_ci        shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
156cabdff1aSopenharmony_ci    }
157cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(cpu_flags)) {
158cabdff1aSopenharmony_ci#if ARCH_X86_64
159cabdff1aSopenharmony_ci        uyvytoyuv422 = ff_uyvytoyuv422_sse2;
160cabdff1aSopenharmony_ci#endif
161cabdff1aSopenharmony_ci    }
162cabdff1aSopenharmony_ci    if (EXTERNAL_SSSE3(cpu_flags)) {
163cabdff1aSopenharmony_ci        shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3;
164cabdff1aSopenharmony_ci        shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3;
165cabdff1aSopenharmony_ci        shuffle_bytes_1230 = ff_shuffle_bytes_1230_ssse3;
166cabdff1aSopenharmony_ci        shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
167cabdff1aSopenharmony_ci        shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
168cabdff1aSopenharmony_ci    }
169cabdff1aSopenharmony_ci#if ARCH_X86_64
170cabdff1aSopenharmony_ci    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
171cabdff1aSopenharmony_ci        shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2;
172cabdff1aSopenharmony_ci        shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2;
173cabdff1aSopenharmony_ci        shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
174cabdff1aSopenharmony_ci        shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
175cabdff1aSopenharmony_ci        shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
176cabdff1aSopenharmony_ci    }
177cabdff1aSopenharmony_ci    if (EXTERNAL_AVX(cpu_flags)) {
178cabdff1aSopenharmony_ci        uyvytoyuv422 = ff_uyvytoyuv422_avx;
179cabdff1aSopenharmony_ci    }
180cabdff1aSopenharmony_ci#endif
181cabdff1aSopenharmony_ci}
182