xref: /third_party/ffmpeg/libavcodec/x86/hevcdsp.h (revision cabdff1a)
1/*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
6 *
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#ifndef AVCODEC_X86_HEVCDSP_H
26#define AVCODEC_X86_HEVCDSP_H
27
28#include <stddef.h>
29#include <stdint.h>
30
31
32#define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
33dst[idx1][idx2][idx3] = ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt; \
34dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt; \
35dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt; \
36dst ## _uni_w[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt; \
37dst ## _bi_w[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt
38
39
40#define PEL_PROTOTYPE(name, D, opt) \
41void ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \
42void ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \
43void ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \
44void ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); \
45void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
46
47
48///////////////////////////////////////////////////////////////////////////////
49// MC functions
50///////////////////////////////////////////////////////////////////////////////
51
52#define EPEL_PROTOTYPES(fname, bitd, opt) \
53        PEL_PROTOTYPE(fname##4,  bitd, opt); \
54        PEL_PROTOTYPE(fname##6,  bitd, opt); \
55        PEL_PROTOTYPE(fname##8,  bitd, opt); \
56        PEL_PROTOTYPE(fname##12, bitd, opt); \
57        PEL_PROTOTYPE(fname##16, bitd, opt); \
58        PEL_PROTOTYPE(fname##24, bitd, opt); \
59        PEL_PROTOTYPE(fname##32, bitd, opt); \
60        PEL_PROTOTYPE(fname##48, bitd, opt); \
61        PEL_PROTOTYPE(fname##64, bitd, opt)
62
63#define QPEL_PROTOTYPES(fname, bitd, opt) \
64        PEL_PROTOTYPE(fname##4,  bitd, opt); \
65        PEL_PROTOTYPE(fname##8,  bitd, opt); \
66        PEL_PROTOTYPE(fname##12, bitd, opt); \
67        PEL_PROTOTYPE(fname##16, bitd, opt); \
68        PEL_PROTOTYPE(fname##24, bitd, opt); \
69        PEL_PROTOTYPE(fname##32, bitd, opt); \
70        PEL_PROTOTYPE(fname##48, bitd, opt); \
71        PEL_PROTOTYPE(fname##64, bitd, opt)
72
73#define WEIGHTING_PROTOTYPE(width, bitd, opt) \
74void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int height, int denom,  int _wx, int _ox); \
75void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int16_t *_src2, int height, int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)
76
77#define WEIGHTING_PROTOTYPES(bitd, opt) \
78        WEIGHTING_PROTOTYPE(2, bitd, opt); \
79        WEIGHTING_PROTOTYPE(4, bitd, opt); \
80        WEIGHTING_PROTOTYPE(6, bitd, opt); \
81        WEIGHTING_PROTOTYPE(8, bitd, opt); \
82        WEIGHTING_PROTOTYPE(12, bitd, opt); \
83        WEIGHTING_PROTOTYPE(16, bitd, opt); \
84        WEIGHTING_PROTOTYPE(24, bitd, opt); \
85        WEIGHTING_PROTOTYPE(32, bitd, opt); \
86        WEIGHTING_PROTOTYPE(48, bitd, opt); \
87        WEIGHTING_PROTOTYPE(64, bitd, opt)
88
89
90///////////////////////////////////////////////////////////////////////////////
91// QPEL_PIXELS EPEL_PIXELS
92///////////////////////////////////////////////////////////////////////////////
93EPEL_PROTOTYPES(pel_pixels ,  8, sse4);
94EPEL_PROTOTYPES(pel_pixels , 10, sse4);
95EPEL_PROTOTYPES(pel_pixels , 12, sse4);
96
97void ff_hevc_put_hevc_pel_pixels16_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
98void ff_hevc_put_hevc_pel_pixels24_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
99void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
100void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
101void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
102
103void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
104void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
105void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
106void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
107void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
108
109
110
111void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
112void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
113void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);
114void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); //used for 10bit
115void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);//used for 10bit
116
117
118void ff_hevc_put_hevc_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
119void ff_hevc_put_hevc_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
120void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
121void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
122void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
123
124void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
125void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
126void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
127void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
128void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
129
130///////////////////////////////////////////////////////////////////////////////
131// EPEL
132///////////////////////////////////////////////////////////////////////////////
133EPEL_PROTOTYPES(epel_h ,  8, sse4);
134EPEL_PROTOTYPES(epel_h , 10, sse4);
135EPEL_PROTOTYPES(epel_h , 12, sse4);
136
137EPEL_PROTOTYPES(epel_v ,  8, sse4);
138EPEL_PROTOTYPES(epel_v , 10, sse4);
139EPEL_PROTOTYPES(epel_v , 12, sse4);
140
141EPEL_PROTOTYPES(epel_hv ,  8, sse4);
142EPEL_PROTOTYPES(epel_hv , 10, sse4);
143EPEL_PROTOTYPES(epel_hv , 12, sse4);
144
145PEL_PROTOTYPE(epel_h16, 8, avx2);
146PEL_PROTOTYPE(epel_h24, 8, avx2);
147PEL_PROTOTYPE(epel_h32, 8, avx2);
148PEL_PROTOTYPE(epel_h48, 8, avx2);
149PEL_PROTOTYPE(epel_h64, 8, avx2);
150
151PEL_PROTOTYPE(epel_h16,10, avx2);
152PEL_PROTOTYPE(epel_h24,10, avx2);
153PEL_PROTOTYPE(epel_h32,10, avx2);
154PEL_PROTOTYPE(epel_h48,10, avx2);
155PEL_PROTOTYPE(epel_h64,10, avx2);
156
157PEL_PROTOTYPE(epel_v16, 8, avx2);
158PEL_PROTOTYPE(epel_v24, 8, avx2);
159PEL_PROTOTYPE(epel_v32, 8, avx2);
160PEL_PROTOTYPE(epel_v48, 8, avx2);
161PEL_PROTOTYPE(epel_v64, 8, avx2);
162
163PEL_PROTOTYPE(epel_v16,10, avx2);
164PEL_PROTOTYPE(epel_v24,10, avx2);
165PEL_PROTOTYPE(epel_v32,10, avx2);
166PEL_PROTOTYPE(epel_v48,10, avx2);
167PEL_PROTOTYPE(epel_v64,10, avx2);
168
169PEL_PROTOTYPE(epel_hv16, 8, avx2);
170PEL_PROTOTYPE(epel_hv24, 8, avx2);
171PEL_PROTOTYPE(epel_hv32, 8, avx2);
172PEL_PROTOTYPE(epel_hv48, 8, avx2);
173PEL_PROTOTYPE(epel_hv64, 8, avx2);
174
175PEL_PROTOTYPE(epel_hv16,10, avx2);
176PEL_PROTOTYPE(epel_hv24,10, avx2);
177PEL_PROTOTYPE(epel_hv32,10, avx2);
178PEL_PROTOTYPE(epel_hv48,10, avx2);
179PEL_PROTOTYPE(epel_hv64,10, avx2);
180
181///////////////////////////////////////////////////////////////////////////////
182// QPEL
183///////////////////////////////////////////////////////////////////////////////
184QPEL_PROTOTYPES(qpel_h ,  8, sse4);
185QPEL_PROTOTYPES(qpel_h , 10, sse4);
186QPEL_PROTOTYPES(qpel_h , 12, sse4);
187
188QPEL_PROTOTYPES(qpel_v,  8, sse4);
189QPEL_PROTOTYPES(qpel_v, 10, sse4);
190QPEL_PROTOTYPES(qpel_v, 12, sse4);
191
192QPEL_PROTOTYPES(qpel_hv,  8, sse4);
193QPEL_PROTOTYPES(qpel_hv, 10, sse4);
194QPEL_PROTOTYPES(qpel_hv, 12, sse4);
195
196PEL_PROTOTYPE(qpel_h16, 8, avx2);
197PEL_PROTOTYPE(qpel_h24, 8, avx2);
198PEL_PROTOTYPE(qpel_h32, 8, avx2);
199PEL_PROTOTYPE(qpel_h48, 8, avx2);
200PEL_PROTOTYPE(qpel_h64, 8, avx2);
201
202PEL_PROTOTYPE(qpel_h16,10, avx2);
203PEL_PROTOTYPE(qpel_h24,10, avx2);
204PEL_PROTOTYPE(qpel_h32,10, avx2);
205PEL_PROTOTYPE(qpel_h48,10, avx2);
206PEL_PROTOTYPE(qpel_h64,10, avx2);
207
208PEL_PROTOTYPE(qpel_v16, 8, avx2);
209PEL_PROTOTYPE(qpel_v24, 8, avx2);
210PEL_PROTOTYPE(qpel_v32, 8, avx2);
211PEL_PROTOTYPE(qpel_v48, 8, avx2);
212PEL_PROTOTYPE(qpel_v64, 8, avx2);
213
214PEL_PROTOTYPE(qpel_v16,10, avx2);
215PEL_PROTOTYPE(qpel_v24,10, avx2);
216PEL_PROTOTYPE(qpel_v32,10, avx2);
217PEL_PROTOTYPE(qpel_v48,10, avx2);
218PEL_PROTOTYPE(qpel_v64,10, avx2);
219
220PEL_PROTOTYPE(qpel_hv16, 8, avx2);
221PEL_PROTOTYPE(qpel_hv24, 8, avx2);
222PEL_PROTOTYPE(qpel_hv32, 8, avx2);
223PEL_PROTOTYPE(qpel_hv48, 8, avx2);
224PEL_PROTOTYPE(qpel_hv64, 8, avx2);
225
226PEL_PROTOTYPE(qpel_hv16,10, avx2);
227PEL_PROTOTYPE(qpel_hv24,10, avx2);
228PEL_PROTOTYPE(qpel_hv32,10, avx2);
229PEL_PROTOTYPE(qpel_hv48,10, avx2);
230PEL_PROTOTYPE(qpel_hv64,10, avx2);
231
232WEIGHTING_PROTOTYPES(8, sse4);
233WEIGHTING_PROTOTYPES(10, sse4);
234WEIGHTING_PROTOTYPES(12, sse4);
235
236void ff_hevc_put_hevc_qpel_h4_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
237void ff_hevc_put_hevc_qpel_h8_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
238void ff_hevc_put_hevc_qpel_h16_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
239void ff_hevc_put_hevc_qpel_h32_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
240void ff_hevc_put_hevc_qpel_h64_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
241void ff_hevc_put_hevc_qpel_hv8_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
242
243///////////////////////////////////////////////////////////////////////////////
244// TRANSFORM_ADD
245///////////////////////////////////////////////////////////////////////////////
246
247void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
248void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
249void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
250void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
251
252void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
253void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
254void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
255
256void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
257
258void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
259void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
260void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
261void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
262
263void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
264void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
265
266#endif // AVCODEC_X86_HEVCDSP_H
267