1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VC-1 and WMV3 - DSP functions MMX-optimized
3cabdff1aSopenharmony_ci * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * Permission is hereby granted, free of charge, to any person
6cabdff1aSopenharmony_ci * obtaining a copy of this software and associated documentation
7cabdff1aSopenharmony_ci * files (the "Software"), to deal in the Software without
8cabdff1aSopenharmony_ci * restriction, including without limitation the rights to use,
9cabdff1aSopenharmony_ci * copy, modify, merge, publish, distribute, sublicense, and/or sell
10cabdff1aSopenharmony_ci * copies of the Software, and to permit persons to whom the
11cabdff1aSopenharmony_ci * Software is furnished to do so, subject to the following
12cabdff1aSopenharmony_ci * conditions:
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * The above copyright notice and this permission notice shall be
15cabdff1aSopenharmony_ci * included in all copies or substantial portions of the Software.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18cabdff1aSopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19cabdff1aSopenharmony_ci * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20cabdff1aSopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21cabdff1aSopenharmony_ci * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22cabdff1aSopenharmony_ci * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23cabdff1aSopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24cabdff1aSopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
28cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
30cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h"
31cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h"
32cabdff1aSopenharmony_ci#include "fpel.h"
33cabdff1aSopenharmony_ci#include "vc1dsp.h"
34cabdff1aSopenharmony_ci#include "config.h"
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci#define LOOP_FILTER4(EXT) \
37cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
38cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq);
39cabdff1aSopenharmony_ci#define LOOP_FILTER816(EXT) \
40cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
41cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
42cabdff1aSopenharmony_ci\
43cabdff1aSopenharmony_cistatic void vc1_v_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq) \
44cabdff1aSopenharmony_ci{ \
45cabdff1aSopenharmony_ci    ff_vc1_v_loop_filter8_ ## EXT(src,   stride, pq); \
46cabdff1aSopenharmony_ci    ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \
47cabdff1aSopenharmony_ci} \
48cabdff1aSopenharmony_ci\
49cabdff1aSopenharmony_cistatic void vc1_h_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq) \
50cabdff1aSopenharmony_ci{ \
51cabdff1aSopenharmony_ci    ff_vc1_h_loop_filter8_ ## EXT(src,          stride, pq); \
52cabdff1aSopenharmony_ci    ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \
53cabdff1aSopenharmony_ci}
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci#if HAVE_X86ASM
56cabdff1aSopenharmony_ciLOOP_FILTER4(mmxext)
57cabdff1aSopenharmony_ciLOOP_FILTER816(sse2)
58cabdff1aSopenharmony_ciLOOP_FILTER4(ssse3)
59cabdff1aSopenharmony_ciLOOP_FILTER816(ssse3)
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_sse4(uint8_t *src, ptrdiff_t stride, int pq);
62cabdff1aSopenharmony_ci
63cabdff1aSopenharmony_cistatic void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t stride, int pq)
64cabdff1aSopenharmony_ci{
65cabdff1aSopenharmony_ci    ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
66cabdff1aSopenharmony_ci    ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
67cabdff1aSopenharmony_ci}
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci#define DECLARE_FUNCTION(OP, DEPTH, INSN)                       \
70cabdff1aSopenharmony_ci    static void OP##vc1_mspel_mc00_##DEPTH##INSN(uint8_t *dst,          \
71cabdff1aSopenharmony_ci                             const uint8_t *src, ptrdiff_t stride, int rnd) \
72cabdff1aSopenharmony_ci    {                                                                       \
73cabdff1aSopenharmony_ci        ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH);     \
74cabdff1aSopenharmony_ci    }
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ciDECLARE_FUNCTION(put_,  8, _mmx)
77cabdff1aSopenharmony_ciDECLARE_FUNCTION(avg_,  8, _mmxext)
78cabdff1aSopenharmony_ciDECLARE_FUNCTION(put_, 16, _sse2)
79cabdff1aSopenharmony_ciDECLARE_FUNCTION(avg_, 16, _sse2)
80cabdff1aSopenharmony_ci
81cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc8_nornd_mmx  (uint8_t *dst, uint8_t *src,
84cabdff1aSopenharmony_ci                                       ptrdiff_t stride, int h, int x, int y);
85cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
86cabdff1aSopenharmony_ci                                        ptrdiff_t stride, int h, int x, int y);
87cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
88cabdff1aSopenharmony_ci                                       ptrdiff_t stride, int h, int x, int y);
89cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
90cabdff1aSopenharmony_ci                                       ptrdiff_t stride, int h, int x, int y);
91cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, ptrdiff_t linesize,
92cabdff1aSopenharmony_ci                                    int16_t *block);
93cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, ptrdiff_t linesize,
94cabdff1aSopenharmony_ci                                    int16_t *block);
95cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, ptrdiff_t linesize,
96cabdff1aSopenharmony_ci                                    int16_t *block);
97cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, ptrdiff_t linesize,
98cabdff1aSopenharmony_ci                                    int16_t *block);
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ciav_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
102cabdff1aSopenharmony_ci{
103cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
104cabdff1aSopenharmony_ci
105cabdff1aSopenharmony_ci    if (HAVE_6REGS && INLINE_MMX(cpu_flags))
106cabdff1aSopenharmony_ci        if (EXTERNAL_MMX(cpu_flags))
107cabdff1aSopenharmony_ci        ff_vc1dsp_init_mmx(dsp);
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci    if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags))
110cabdff1aSopenharmony_ci        if (EXTERNAL_MMXEXT(cpu_flags))
111cabdff1aSopenharmony_ci        ff_vc1dsp_init_mmxext(dsp);
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci#define ASSIGN_LF4(EXT) \
114cabdff1aSopenharmony_ci        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
115cabdff1aSopenharmony_ci        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT
116cabdff1aSopenharmony_ci#define ASSIGN_LF816(EXT) \
117cabdff1aSopenharmony_ci        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
118cabdff1aSopenharmony_ci        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
119cabdff1aSopenharmony_ci        dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
120cabdff1aSopenharmony_ci        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_ci#if HAVE_X86ASM
123cabdff1aSopenharmony_ci    if (EXTERNAL_MMX(cpu_flags)) {
124cabdff1aSopenharmony_ci        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ci        dsp->put_vc1_mspel_pixels_tab[1][0]      = put_vc1_mspel_mc00_8_mmx;
127cabdff1aSopenharmony_ci    }
128cabdff1aSopenharmony_ci    if (EXTERNAL_MMXEXT(cpu_flags)) {
129cabdff1aSopenharmony_ci        ASSIGN_LF4(mmxext);
130cabdff1aSopenharmony_ci        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_ci        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci        dsp->vc1_inv_trans_8x8_dc                = ff_vc1_inv_trans_8x8_dc_mmxext;
135cabdff1aSopenharmony_ci        dsp->vc1_inv_trans_4x8_dc                = ff_vc1_inv_trans_4x8_dc_mmxext;
136cabdff1aSopenharmony_ci        dsp->vc1_inv_trans_8x4_dc                = ff_vc1_inv_trans_8x4_dc_mmxext;
137cabdff1aSopenharmony_ci        dsp->vc1_inv_trans_4x4_dc                = ff_vc1_inv_trans_4x4_dc_mmxext;
138cabdff1aSopenharmony_ci    }
139cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(cpu_flags)) {
140cabdff1aSopenharmony_ci        ASSIGN_LF816(sse2);
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci        dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_sse2;
143cabdff1aSopenharmony_ci        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
144cabdff1aSopenharmony_ci    }
145cabdff1aSopenharmony_ci    if (EXTERNAL_SSSE3(cpu_flags)) {
146cabdff1aSopenharmony_ci        ASSIGN_LF4(ssse3);
147cabdff1aSopenharmony_ci        ASSIGN_LF816(ssse3);
148cabdff1aSopenharmony_ci        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
149cabdff1aSopenharmony_ci        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
150cabdff1aSopenharmony_ci    }
151cabdff1aSopenharmony_ci    if (EXTERNAL_SSE4(cpu_flags)) {
152cabdff1aSopenharmony_ci        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;
153cabdff1aSopenharmony_ci        dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
154cabdff1aSopenharmony_ci    }
155cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */
156cabdff1aSopenharmony_ci}
157