1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (C) 2010 David Conrad
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
22cabdff1aSopenharmony_ci#include "libavcodec/diracdsp.h"
23cabdff1aSopenharmony_ci#include "fpel.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_civoid ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_civoid ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_civoid ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
30cabdff1aSopenharmony_civoid ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_civoid ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
33cabdff1aSopenharmony_civoid ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
34cabdff1aSopenharmony_civoid ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_civoid ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci#if HAVE_X86ASM
39cabdff1aSopenharmony_ci
40cabdff1aSopenharmony_ci#define HPEL_FILTER(MMSIZE, EXT)                                                             \
41cabdff1aSopenharmony_ci    void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int);               \
42cabdff1aSopenharmony_ci    void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int);                    \
43cabdff1aSopenharmony_ci                                                                                             \
44cabdff1aSopenharmony_ci    static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,       \
45cabdff1aSopenharmony_ci                                          const uint8_t *src, int stride, int width, int height)   \
46cabdff1aSopenharmony_ci    {                                                                                        \
47cabdff1aSopenharmony_ci        while( height-- )                                                                    \
48cabdff1aSopenharmony_ci        {                                                                                    \
49cabdff1aSopenharmony_ci            ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
50cabdff1aSopenharmony_ci            ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width);                                \
51cabdff1aSopenharmony_ci            ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width);                               \
52cabdff1aSopenharmony_ci                                                                                             \
53cabdff1aSopenharmony_ci            dsth += stride;                                                                  \
54cabdff1aSopenharmony_ci            dstv += stride;                                                                  \
55cabdff1aSopenharmony_ci            dstc += stride;                                                                  \
56cabdff1aSopenharmony_ci            src  += stride;                                                                  \
57cabdff1aSopenharmony_ci        }                                                                                    \
58cabdff1aSopenharmony_ci    }
59cabdff1aSopenharmony_ci
60cabdff1aSopenharmony_ci#define PIXFUNC(PFX, IDX, EXT)                                                   \
61cabdff1aSopenharmony_ci    /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = PFX ## _dirac_pixels8_ ## EXT;*/  \
62cabdff1aSopenharmony_ci    c->PFX ## _dirac_pixels_tab[1][IDX] = PFX ## _dirac_pixels16_ ## EXT; \
63cabdff1aSopenharmony_ci    c->PFX ## _dirac_pixels_tab[2][IDX] = PFX ## _dirac_pixels32_ ## EXT
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci#define DIRAC_PIXOP(OPNAME, EXT)\
66cabdff1aSopenharmony_cistatic void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
67cabdff1aSopenharmony_ci                                              int stride, int h) \
68cabdff1aSopenharmony_ci{\
69cabdff1aSopenharmony_ci    if (h&3)\
70cabdff1aSopenharmony_ci        ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\
71cabdff1aSopenharmony_ci    else\
72cabdff1aSopenharmony_ci        ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
73cabdff1aSopenharmony_ci}\
74cabdff1aSopenharmony_cistatic void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
75cabdff1aSopenharmony_ci                                              int stride, int h) \
76cabdff1aSopenharmony_ci{\
77cabdff1aSopenharmony_ci    if (h&3) {\
78cabdff1aSopenharmony_ci        ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\
79cabdff1aSopenharmony_ci    } else {\
80cabdff1aSopenharmony_ci        ff_ ## OPNAME ## _pixels16_ ## EXT(dst   , src[0]   , stride, h);\
81cabdff1aSopenharmony_ci        ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
82cabdff1aSopenharmony_ci    }\
83cabdff1aSopenharmony_ci}
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ciDIRAC_PIXOP(put, sse2)
86cabdff1aSopenharmony_ciDIRAC_PIXOP(avg, sse2)
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ciHPEL_FILTER(16, sse2)
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci#endif // HAVE_X86ASM
91cabdff1aSopenharmony_ci
92cabdff1aSopenharmony_civoid ff_diracdsp_init_x86(DiracDSPContext* c)
93cabdff1aSopenharmony_ci{
94cabdff1aSopenharmony_ci#if HAVE_X86ASM
95cabdff1aSopenharmony_ci    int mm_flags = av_get_cpu_flags();
96cabdff1aSopenharmony_ci
97cabdff1aSopenharmony_ci    if (EXTERNAL_MMX(mm_flags)) {
98cabdff1aSopenharmony_ci        c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
99cabdff1aSopenharmony_ci    }
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(mm_flags)) {
102cabdff1aSopenharmony_ci        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
103cabdff1aSopenharmony_ci        c->add_rect_clamped = ff_add_rect_clamped_sse2;
104cabdff1aSopenharmony_ci        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
107cabdff1aSopenharmony_ci        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci        c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2;
110cabdff1aSopenharmony_ci        c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2;
111cabdff1aSopenharmony_ci        c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2;
112cabdff1aSopenharmony_ci        c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2;
113cabdff1aSopenharmony_ci    }
114cabdff1aSopenharmony_ci
115cabdff1aSopenharmony_ci    if (EXTERNAL_SSE4(mm_flags)) {
116cabdff1aSopenharmony_ci        c->dequant_subband[1]         = ff_dequant_subband_32_sse4;
117cabdff1aSopenharmony_ci        c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
118cabdff1aSopenharmony_ci    }
119cabdff1aSopenharmony_ci#endif
120cabdff1aSopenharmony_ci}
121