1/*
2 * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include <stddef.h>
22#include <stdint.h>
23#include "config.h"
24#include "libavutil/attributes.h"
25#include "libavutil/cpu.h"
26#include "libavutil/x86/cpu.h"
27#include "libavcodec/codec_id.h"
28#include "libavcodec/h264pred.h"
29
30#define PRED4x4(TYPE, DEPTH, OPT) \
31void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
32                                                    const uint8_t *topright, \
33                                                    ptrdiff_t stride);
34
35PRED4x4(dc, 10, mmxext)
36PRED4x4(down_left, 10, sse2)
37PRED4x4(down_left, 10, avx)
38PRED4x4(down_right, 10, sse2)
39PRED4x4(down_right, 10, ssse3)
40PRED4x4(down_right, 10, avx)
41PRED4x4(vertical_left, 10, sse2)
42PRED4x4(vertical_left, 10, avx)
43PRED4x4(vertical_right, 10, sse2)
44PRED4x4(vertical_right, 10, ssse3)
45PRED4x4(vertical_right, 10, avx)
46PRED4x4(horizontal_up, 10, mmxext)
47PRED4x4(horizontal_down, 10, sse2)
48PRED4x4(horizontal_down, 10, ssse3)
49PRED4x4(horizontal_down, 10, avx)
50
51#define PRED8x8(TYPE, DEPTH, OPT) \
52void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
53                                                    ptrdiff_t stride);
54
55PRED8x8(dc, 10, sse2)
56PRED8x8(top_dc, 10, sse2)
57PRED8x8(plane, 10, sse2)
58PRED8x8(vertical, 10, sse2)
59PRED8x8(horizontal, 10, sse2)
60
61#define PRED8x8L(TYPE, DEPTH, OPT)\
62void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
63                                                     int has_topleft, \
64                                                     int has_topright, \
65                                                     ptrdiff_t stride);
66
67PRED8x8L(dc, 10, sse2)
68PRED8x8L(dc, 10, avx)
69PRED8x8L(128_dc, 10, sse2)
70PRED8x8L(top_dc, 10, sse2)
71PRED8x8L(top_dc, 10, avx)
72PRED8x8L(vertical, 10, sse2)
73PRED8x8L(vertical, 10, avx)
74PRED8x8L(horizontal, 10, sse2)
75PRED8x8L(horizontal, 10, ssse3)
76PRED8x8L(horizontal, 10, avx)
77PRED8x8L(down_left, 10, sse2)
78PRED8x8L(down_left, 10, ssse3)
79PRED8x8L(down_left, 10, avx)
80PRED8x8L(down_right, 10, sse2)
81PRED8x8L(down_right, 10, ssse3)
82PRED8x8L(down_right, 10, avx)
83PRED8x8L(vertical_right, 10, sse2)
84PRED8x8L(vertical_right, 10, ssse3)
85PRED8x8L(vertical_right, 10, avx)
86PRED8x8L(horizontal_up, 10, sse2)
87PRED8x8L(horizontal_up, 10, ssse3)
88PRED8x8L(horizontal_up, 10, avx)
89
90#define PRED16x16(TYPE, DEPTH, OPT)\
91void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
92                                                      ptrdiff_t stride);
93
94PRED16x16(dc, 10, sse2)
95PRED16x16(top_dc, 10, sse2)
96PRED16x16(128_dc, 10, sse2)
97PRED16x16(left_dc, 10, sse2)
98PRED16x16(vertical, 10, sse2)
99PRED16x16(horizontal, 10, sse2)
100
101/* 8-bit versions */
102PRED16x16(vertical, 8, sse)
103PRED16x16(horizontal, 8, mmxext)
104PRED16x16(horizontal, 8, ssse3)
105PRED16x16(dc, 8, sse2)
106PRED16x16(dc, 8, ssse3)
107PRED16x16(plane_h264, 8, sse2)
108PRED16x16(plane_h264, 8, ssse3)
109PRED16x16(plane_rv40, 8, sse2)
110PRED16x16(plane_rv40, 8, ssse3)
111PRED16x16(plane_svq3, 8, sse2)
112PRED16x16(plane_svq3, 8, ssse3)
113PRED16x16(tm_vp8, 8, sse2)
114PRED16x16(tm_vp8, 8, avx2)
115
116PRED8x8(top_dc, 8, mmxext)
117PRED8x8(dc_rv40, 8, mmxext)
118PRED8x8(dc, 8, mmxext)
119PRED8x8(vertical, 8, mmx)
120PRED8x8(horizontal, 8, mmxext)
121PRED8x8(horizontal, 8, ssse3)
122PRED8x8(plane, 8, sse2)
123PRED8x8(plane, 8, ssse3)
124PRED8x8(tm_vp8, 8, sse2)
125PRED8x8(tm_vp8, 8, ssse3)
126
127PRED8x8L(top_dc, 8, mmxext)
128PRED8x8L(top_dc, 8, ssse3)
129PRED8x8L(dc, 8, mmxext)
130PRED8x8L(dc, 8, ssse3)
131PRED8x8L(horizontal, 8, mmxext)
132PRED8x8L(horizontal, 8, ssse3)
133PRED8x8L(vertical, 8, mmxext)
134PRED8x8L(vertical, 8, ssse3)
135PRED8x8L(down_left, 8, sse2)
136PRED8x8L(down_left, 8, ssse3)
137PRED8x8L(down_right, 8, sse2)
138PRED8x8L(down_right, 8, ssse3)
139PRED8x8L(vertical_right, 8, sse2)
140PRED8x8L(vertical_right, 8, ssse3)
141PRED8x8L(vertical_left, 8, sse2)
142PRED8x8L(vertical_left, 8, ssse3)
143PRED8x8L(horizontal_up, 8, mmxext)
144PRED8x8L(horizontal_up, 8, ssse3)
145PRED8x8L(horizontal_down, 8, sse2)
146PRED8x8L(horizontal_down, 8, ssse3)
147
148PRED4x4(dc, 8, mmxext)
149PRED4x4(down_left, 8, mmxext)
150PRED4x4(down_right, 8, mmxext)
151PRED4x4(vertical_left, 8, mmxext)
152PRED4x4(vertical_right, 8, mmxext)
153PRED4x4(horizontal_up, 8, mmxext)
154PRED4x4(horizontal_down, 8, mmxext)
155PRED4x4(tm_vp8, 8, mmxext)
156PRED4x4(tm_vp8, 8, ssse3)
157PRED4x4(vertical_vp8, 8, mmxext)
158
159av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
160                                   const int bit_depth,
161                                   const int chroma_format_idc)
162{
163    int cpu_flags = av_get_cpu_flags();
164
165    if (bit_depth == 8) {
166        if (EXTERNAL_MMX(cpu_flags)) {
167            if (chroma_format_idc <= 1) {
168                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
169            }
170        }
171
172        if (EXTERNAL_MMXEXT(cpu_flags)) {
173            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
174            if (chroma_format_idc <= 1)
175                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
176            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
177            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
178            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
179            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
180            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
181            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
182            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
183            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
184            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
185            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 ||
186                codec_id == AV_CODEC_ID_H264) {
187                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
188            }
189            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
190                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
191            }
192            if (codec_id != AV_CODEC_ID_RV40) {
193                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
194            }
195            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
196                if (chroma_format_idc <= 1) {
197                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
198                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
199                }
200            }
201            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
202                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
203                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
204                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
205            }
206        }
207
208        if (EXTERNAL_SSE(cpu_flags)) {
209            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
210        }
211
212        if (EXTERNAL_SSE2(cpu_flags)) {
213            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
214            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
215            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
216            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
217            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
218            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
219            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
220                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
221                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
222            } else {
223                if (chroma_format_idc <= 1)
224                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
225                if (codec_id == AV_CODEC_ID_SVQ3) {
226                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
227                } else if (codec_id == AV_CODEC_ID_RV40) {
228                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
229                } else {
230                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
231                }
232            }
233        }
234
235        if (EXTERNAL_SSSE3(cpu_flags)) {
236            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
237            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
238            if (chroma_format_idc <= 1)
239                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
240            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
241            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
242            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
243            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
244            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
245            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
246            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
247            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
248            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
249            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
250            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
251                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
252                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
253            } else {
254                if (chroma_format_idc <= 1)
255                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
256                if (codec_id == AV_CODEC_ID_SVQ3) {
257                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
258                } else if (codec_id == AV_CODEC_ID_RV40) {
259                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
260                } else {
261                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
262                }
263            }
264        }
265
266        if(EXTERNAL_AVX2(cpu_flags)){
267            if (codec_id == AV_CODEC_ID_VP8) {
268                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_avx2;
269            }
270        }
271    } else if (bit_depth == 10) {
272        if (EXTERNAL_MMXEXT(cpu_flags)) {
273            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
274            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
275        }
276        if (EXTERNAL_SSE2(cpu_flags)) {
277            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
278            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
279            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
280            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
281            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
282
283            if (chroma_format_idc <= 1) {
284                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
285                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
286                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
287                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
288                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
289            }
290
291            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
292            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
293            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
294            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
295            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
296            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
297            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
298            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
299            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;
300
301            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
302            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
303            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
304            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
305            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
306            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
307        }
308        if (EXTERNAL_SSSE3(cpu_flags)) {
309            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
310            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
311            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
312
313            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
314            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
315            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
316            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
317            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
318        }
319        if (EXTERNAL_AVX(cpu_flags)) {
320            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
321            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
322            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
323            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
324            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
325
326            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
327            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
328            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
329            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
330            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
331            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
332            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
333            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
334        }
335    }
336}
337