1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * SIMD-optimized halfpel functions
3cabdff1aSopenharmony_ci * Copyright (c) 2000, 2001 Fabrice Bellard
4cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * This file is part of FFmpeg.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci * Lesser General Public License for more details.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci *
22cabdff1aSopenharmony_ci * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
23cabdff1aSopenharmony_ci */
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#include "config_components.h"
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
28cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
30cabdff1aSopenharmony_ci#include "libavcodec/avcodec.h"
31cabdff1aSopenharmony_ci#include "libavcodec/hpeldsp.h"
32cabdff1aSopenharmony_ci#include "libavcodec/pixels.h"
33cabdff1aSopenharmony_ci#include "fpel.h"
34cabdff1aSopenharmony_ci#include "hpeldsp.h"
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_civoid ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
37cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h);
38cabdff1aSopenharmony_civoid ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
39cabdff1aSopenharmony_ci                               ptrdiff_t line_size, int h);
40cabdff1aSopenharmony_civoid ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
41cabdff1aSopenharmony_ci                             ptrdiff_t line_size, int h);
42cabdff1aSopenharmony_civoid ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
43cabdff1aSopenharmony_ci                             ptrdiff_t line_size, int h);
44cabdff1aSopenharmony_civoid ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
45cabdff1aSopenharmony_ci                             ptrdiff_t line_size, int h);
46cabdff1aSopenharmony_civoid ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
47cabdff1aSopenharmony_ci                             ptrdiff_t line_size, int h);
48cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
49cabdff1aSopenharmony_ci                                     ptrdiff_t line_size, int h);
50cabdff1aSopenharmony_civoid ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
51cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h);
52cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
53cabdff1aSopenharmony_ci                                     ptrdiff_t line_size, int h);
54cabdff1aSopenharmony_civoid ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
55cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h);
56cabdff1aSopenharmony_civoid ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
57cabdff1aSopenharmony_ci                              ptrdiff_t line_size, int h);
58cabdff1aSopenharmony_civoid ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
59cabdff1aSopenharmony_ci                                      ptrdiff_t line_size, int h);
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#define avg_pixels16_mmx        ff_avg_pixels16_mmx
62cabdff1aSopenharmony_ci#define put_pixels8_mmx         ff_put_pixels8_mmx
63cabdff1aSopenharmony_ci#define put_pixels16_mmx        ff_put_pixels16_mmx
64cabdff1aSopenharmony_ci#define put_pixels8_xy2_mmx     ff_put_pixels8_xy2_mmx
65cabdff1aSopenharmony_ci#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
66cabdff1aSopenharmony_ci#define put_no_rnd_pixels8_mmx  ff_put_pixels8_mmx
67cabdff1aSopenharmony_ci#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
70cabdff1aSopenharmony_ci
71cabdff1aSopenharmony_ci/***********************************/
72cabdff1aSopenharmony_ci/* MMX no rounding */
73cabdff1aSopenharmony_ci#define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
74cabdff1aSopenharmony_ci#define SET_RND  MOVQ_WONE
75cabdff1aSopenharmony_ci#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
76cabdff1aSopenharmony_ci#define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
77cabdff1aSopenharmony_ci#define STATIC static
78cabdff1aSopenharmony_ci
79cabdff1aSopenharmony_ci#include "rnd_template.c"
80cabdff1aSopenharmony_ci#include "hpeldsp_rnd_template.c"
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci#undef DEF
83cabdff1aSopenharmony_ci#undef SET_RND
84cabdff1aSopenharmony_ci#undef PAVGBP
85cabdff1aSopenharmony_ci#undef PAVGB
86cabdff1aSopenharmony_ci#undef STATIC
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci#if HAVE_MMX
89cabdff1aSopenharmony_ciCALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8)
90cabdff1aSopenharmony_ciCALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8)
91cabdff1aSopenharmony_ci
92cabdff1aSopenharmony_ciCALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8)
93cabdff1aSopenharmony_ciCALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
94cabdff1aSopenharmony_ci#endif
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_ci/***********************************/
97cabdff1aSopenharmony_ci/* MMX rounding */
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci#define SET_RND  MOVQ_WTWO
100cabdff1aSopenharmony_ci#define DEF(x, y) ff_ ## x ## _ ## y ## _mmx
101cabdff1aSopenharmony_ci#define STATIC
102cabdff1aSopenharmony_ci#define NO_AVG
103cabdff1aSopenharmony_ci
104cabdff1aSopenharmony_ci#include "rnd_template.c"
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci#undef NO_AVG
107cabdff1aSopenharmony_ci#undef DEF
108cabdff1aSopenharmony_ci#undef SET_RND
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci#if HAVE_MMX
111cabdff1aSopenharmony_ciCALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
112cabdff1aSopenharmony_ci#endif
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci
117cabdff1aSopenharmony_ci#if HAVE_X86ASM
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_ci#define HPELDSP_AVG_PIXELS16(CPUEXT)                      \
120cabdff1aSopenharmony_ci    CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \
121cabdff1aSopenharmony_ci    CALL_2X_PIXELS(put_pixels16_y2        ## CPUEXT, ff_put_pixels8_y2        ## CPUEXT, 8) \
122cabdff1aSopenharmony_ci    CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \
123cabdff1aSopenharmony_ci    CALL_2X_PIXELS(avg_pixels16           ## CPUEXT, ff_avg_pixels8           ## CPUEXT, 8) \
124cabdff1aSopenharmony_ci    CALL_2X_PIXELS(avg_pixels16_x2        ## CPUEXT, ff_avg_pixels8_x2        ## CPUEXT, 8) \
125cabdff1aSopenharmony_ci    CALL_2X_PIXELS(avg_pixels16_y2        ## CPUEXT, ff_avg_pixels8_y2        ## CPUEXT, 8) \
126cabdff1aSopenharmony_ci    CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8) \
127cabdff1aSopenharmony_ci    CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
128cabdff1aSopenharmony_ci
129cabdff1aSopenharmony_ciHPELDSP_AVG_PIXELS16(_mmxext)
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                             \
134cabdff1aSopenharmony_ci    if (HAVE_MMX_EXTERNAL)                                                  \
135cabdff1aSopenharmony_ci        c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU
136cabdff1aSopenharmony_ci
137cabdff1aSopenharmony_ci#if HAVE_MMX_INLINE
138cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU)                                   \
139cabdff1aSopenharmony_ci    do {                                                                        \
140cabdff1aSopenharmony_ci        SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU);                                \
141cabdff1aSopenharmony_ci        c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
142cabdff1aSopenharmony_ci    } while (0)
143cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU)                                   \
144cabdff1aSopenharmony_ci    do {                                                                        \
145cabdff1aSopenharmony_ci        c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_  ## CPU; \
146cabdff1aSopenharmony_ci        c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_  ## CPU; \
147cabdff1aSopenharmony_ci    } while (0)
148cabdff1aSopenharmony_ci#else
149cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)
150cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) ((void)0)
151cabdff1aSopenharmony_ci#endif
152cabdff1aSopenharmony_ci#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
153cabdff1aSopenharmony_ci    do {                                                                        \
154cabdff1aSopenharmony_ci        SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU);                                  \
155cabdff1aSopenharmony_ci        SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU);                                  \
156cabdff1aSopenharmony_ci    } while (0)
157cabdff1aSopenharmony_ci
158cabdff1aSopenharmony_cistatic void hpeldsp_init_mmx(HpelDSPContext *c, int flags)
159cabdff1aSopenharmony_ci{
160cabdff1aSopenharmony_ci    SET_HPEL_FUNCS03(put,      [0], 16, mmx);
161cabdff1aSopenharmony_ci    SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
162cabdff1aSopenharmony_ci    SET_HPEL_FUNCS(avg_no_rnd,    , 16, mmx);
163cabdff1aSopenharmony_ci    SET_HPEL_FUNCS03(put,      [1],  8, mmx);
164cabdff1aSopenharmony_ci    SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
165cabdff1aSopenharmony_ci}
166cabdff1aSopenharmony_ci
167cabdff1aSopenharmony_cistatic void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
168cabdff1aSopenharmony_ci{
169cabdff1aSopenharmony_ci#if HAVE_MMXEXT_EXTERNAL
170cabdff1aSopenharmony_ci    c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
171cabdff1aSopenharmony_ci    c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
174cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
175cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
176cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
177cabdff1aSopenharmony_ci
178cabdff1aSopenharmony_ci    c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
179cabdff1aSopenharmony_ci    c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
180cabdff1aSopenharmony_ci
181cabdff1aSopenharmony_ci    c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
182cabdff1aSopenharmony_ci    c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
183cabdff1aSopenharmony_ci    c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
184cabdff1aSopenharmony_ci    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_ci    if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
187cabdff1aSopenharmony_ci        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
188cabdff1aSopenharmony_ci        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
189cabdff1aSopenharmony_ci        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
190cabdff1aSopenharmony_ci        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
191cabdff1aSopenharmony_ci
192cabdff1aSopenharmony_ci        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
193cabdff1aSopenharmony_ci        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
194cabdff1aSopenharmony_ci    }
195cabdff1aSopenharmony_ci#endif /* HAVE_MMXEXT_EXTERNAL */
196cabdff1aSopenharmony_ci}
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_cistatic void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags)
199cabdff1aSopenharmony_ci{
200cabdff1aSopenharmony_ci#if HAVE_SSE2_EXTERNAL
201cabdff1aSopenharmony_ci    c->put_pixels_tab[0][0]        = ff_put_pixels16_sse2;
202cabdff1aSopenharmony_ci    c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
203cabdff1aSopenharmony_ci    c->put_pixels_tab[0][1]        = ff_put_pixels16_x2_sse2;
204cabdff1aSopenharmony_ci    c->put_pixels_tab[0][2]        = ff_put_pixels16_y2_sse2;
205cabdff1aSopenharmony_ci    c->put_pixels_tab[0][3]        = ff_put_pixels16_xy2_sse2;
206cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][0]        = ff_avg_pixels16_sse2;
207cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
208cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
209cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
210cabdff1aSopenharmony_ci#endif /* HAVE_SSE2_EXTERNAL */
211cabdff1aSopenharmony_ci}
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_cistatic void hpeldsp_init_ssse3(HpelDSPContext *c, int flags)
214cabdff1aSopenharmony_ci{
215cabdff1aSopenharmony_ci#if HAVE_SSSE3_EXTERNAL
216cabdff1aSopenharmony_ci    c->put_pixels_tab[0][3]            = ff_put_pixels16_xy2_ssse3;
217cabdff1aSopenharmony_ci    c->avg_pixels_tab[0][3]            = ff_avg_pixels16_xy2_ssse3;
218cabdff1aSopenharmony_ci    c->put_pixels_tab[1][3]            = ff_put_pixels8_xy2_ssse3;
219cabdff1aSopenharmony_ci    c->avg_pixels_tab[1][3]            = ff_avg_pixels8_xy2_ssse3;
220cabdff1aSopenharmony_ci#endif
221cabdff1aSopenharmony_ci}
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ciav_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
224cabdff1aSopenharmony_ci{
225cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
226cabdff1aSopenharmony_ci
227cabdff1aSopenharmony_ci    if (INLINE_MMX(cpu_flags))
228cabdff1aSopenharmony_ci        hpeldsp_init_mmx(c, flags);
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ci    if (EXTERNAL_MMXEXT(cpu_flags))
231cabdff1aSopenharmony_ci        hpeldsp_init_mmxext(c, flags);
232cabdff1aSopenharmony_ci
233cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2_FAST(cpu_flags))
234cabdff1aSopenharmony_ci        hpeldsp_init_sse2_fast(c, flags);
235cabdff1aSopenharmony_ci
236cabdff1aSopenharmony_ci    if (EXTERNAL_SSSE3(cpu_flags))
237cabdff1aSopenharmony_ci        hpeldsp_init_ssse3(c, flags);
238cabdff1aSopenharmony_ci
239cabdff1aSopenharmony_ci    if (CONFIG_VP3_DECODER)
240cabdff1aSopenharmony_ci        ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
241cabdff1aSopenharmony_ci}
242