1 /*
2  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stddef.h>
22 #include <stdint.h>
23 #include "config.h"
24 #include "libavutil/attributes.h"
25 #include "libavutil/cpu.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/codec_id.h"
28 #include "libavcodec/h264pred.h"
29 
30 #define PRED4x4(TYPE, DEPTH, OPT) \
31 void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
32                                                     const uint8_t *topright, \
33                                                     ptrdiff_t stride);
34 
35 PRED4x4(dc, 10, mmxext)
36 PRED4x4(down_left, 10, sse2)
37 PRED4x4(down_left, 10, avx)
38 PRED4x4(down_right, 10, sse2)
39 PRED4x4(down_right, 10, ssse3)
40 PRED4x4(down_right, 10, avx)
41 PRED4x4(vertical_left, 10, sse2)
42 PRED4x4(vertical_left, 10, avx)
43 PRED4x4(vertical_right, 10, sse2)
44 PRED4x4(vertical_right, 10, ssse3)
45 PRED4x4(vertical_right, 10, avx)
46 PRED4x4(horizontal_up, 10, mmxext)
47 PRED4x4(horizontal_down, 10, sse2)
48 PRED4x4(horizontal_down, 10, ssse3)
49 PRED4x4(horizontal_down, 10, avx)
50 
51 #define PRED8x8(TYPE, DEPTH, OPT) \
52 void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
53                                                     ptrdiff_t stride);
54 
55 PRED8x8(dc, 10, sse2)
56 PRED8x8(top_dc, 10, sse2)
57 PRED8x8(plane, 10, sse2)
58 PRED8x8(vertical, 10, sse2)
59 PRED8x8(horizontal, 10, sse2)
60 
61 #define PRED8x8L(TYPE, DEPTH, OPT)\
62 void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
63                                                      int has_topleft, \
64                                                      int has_topright, \
65                                                      ptrdiff_t stride);
66 
67 PRED8x8L(dc, 10, sse2)
68 PRED8x8L(dc, 10, avx)
69 PRED8x8L(128_dc, 10, sse2)
70 PRED8x8L(top_dc, 10, sse2)
71 PRED8x8L(top_dc, 10, avx)
72 PRED8x8L(vertical, 10, sse2)
73 PRED8x8L(vertical, 10, avx)
74 PRED8x8L(horizontal, 10, sse2)
75 PRED8x8L(horizontal, 10, ssse3)
76 PRED8x8L(horizontal, 10, avx)
77 PRED8x8L(down_left, 10, sse2)
78 PRED8x8L(down_left, 10, ssse3)
79 PRED8x8L(down_left, 10, avx)
80 PRED8x8L(down_right, 10, sse2)
81 PRED8x8L(down_right, 10, ssse3)
82 PRED8x8L(down_right, 10, avx)
83 PRED8x8L(vertical_right, 10, sse2)
84 PRED8x8L(vertical_right, 10, ssse3)
85 PRED8x8L(vertical_right, 10, avx)
86 PRED8x8L(horizontal_up, 10, sse2)
87 PRED8x8L(horizontal_up, 10, ssse3)
88 PRED8x8L(horizontal_up, 10, avx)
89 
90 #define PRED16x16(TYPE, DEPTH, OPT)\
91 void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
92                                                       ptrdiff_t stride);
93 
94 PRED16x16(dc, 10, sse2)
95 PRED16x16(top_dc, 10, sse2)
96 PRED16x16(128_dc, 10, sse2)
97 PRED16x16(left_dc, 10, sse2)
98 PRED16x16(vertical, 10, sse2)
99 PRED16x16(horizontal, 10, sse2)
100 
101 /* 8-bit versions */
102 PRED16x16(vertical, 8, sse)
103 PRED16x16(horizontal, 8, mmxext)
104 PRED16x16(horizontal, 8, ssse3)
105 PRED16x16(dc, 8, sse2)
106 PRED16x16(dc, 8, ssse3)
107 PRED16x16(plane_h264, 8, sse2)
108 PRED16x16(plane_h264, 8, ssse3)
109 PRED16x16(plane_rv40, 8, sse2)
110 PRED16x16(plane_rv40, 8, ssse3)
111 PRED16x16(plane_svq3, 8, sse2)
112 PRED16x16(plane_svq3, 8, ssse3)
113 PRED16x16(tm_vp8, 8, sse2)
114 PRED16x16(tm_vp8, 8, avx2)
115 
116 PRED8x8(top_dc, 8, mmxext)
117 PRED8x8(dc_rv40, 8, mmxext)
118 PRED8x8(dc, 8, mmxext)
119 PRED8x8(vertical, 8, mmx)
120 PRED8x8(horizontal, 8, mmxext)
121 PRED8x8(horizontal, 8, ssse3)
122 PRED8x8(plane, 8, sse2)
123 PRED8x8(plane, 8, ssse3)
124 PRED8x8(tm_vp8, 8, sse2)
125 PRED8x8(tm_vp8, 8, ssse3)
126 
127 PRED8x8L(top_dc, 8, mmxext)
128 PRED8x8L(top_dc, 8, ssse3)
129 PRED8x8L(dc, 8, mmxext)
130 PRED8x8L(dc, 8, ssse3)
131 PRED8x8L(horizontal, 8, mmxext)
132 PRED8x8L(horizontal, 8, ssse3)
133 PRED8x8L(vertical, 8, mmxext)
134 PRED8x8L(vertical, 8, ssse3)
135 PRED8x8L(down_left, 8, sse2)
136 PRED8x8L(down_left, 8, ssse3)
137 PRED8x8L(down_right, 8, sse2)
138 PRED8x8L(down_right, 8, ssse3)
139 PRED8x8L(vertical_right, 8, sse2)
140 PRED8x8L(vertical_right, 8, ssse3)
141 PRED8x8L(vertical_left, 8, sse2)
142 PRED8x8L(vertical_left, 8, ssse3)
143 PRED8x8L(horizontal_up, 8, mmxext)
144 PRED8x8L(horizontal_up, 8, ssse3)
145 PRED8x8L(horizontal_down, 8, sse2)
146 PRED8x8L(horizontal_down, 8, ssse3)
147 
148 PRED4x4(dc, 8, mmxext)
149 PRED4x4(down_left, 8, mmxext)
150 PRED4x4(down_right, 8, mmxext)
151 PRED4x4(vertical_left, 8, mmxext)
152 PRED4x4(vertical_right, 8, mmxext)
153 PRED4x4(horizontal_up, 8, mmxext)
154 PRED4x4(horizontal_down, 8, mmxext)
155 PRED4x4(tm_vp8, 8, mmxext)
156 PRED4x4(tm_vp8, 8, ssse3)
157 PRED4x4(vertical_vp8, 8, mmxext)
158 
ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)159 av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
160                                    const int bit_depth,
161                                    const int chroma_format_idc)
162 {
163     int cpu_flags = av_get_cpu_flags();
164 
165     if (bit_depth == 8) {
166         if (EXTERNAL_MMX(cpu_flags)) {
167             if (chroma_format_idc <= 1) {
168                 h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
169             }
170         }
171 
172         if (EXTERNAL_MMXEXT(cpu_flags)) {
173             h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
174             if (chroma_format_idc <= 1)
175                 h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
176             h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
177             h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
178             h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
179             h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
180             h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
181             h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
182             h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
183             h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
184             h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
185             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 ||
186                 codec_id == AV_CODEC_ID_H264) {
187                 h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
188             }
189             if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
190                 h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
191             }
192             if (codec_id != AV_CODEC_ID_RV40) {
193                 h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
194             }
195             if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
196                 if (chroma_format_idc <= 1) {
197                     h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
198                     h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
199                 }
200             }
201             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
202                 h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
203                 h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
204                 h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
205             }
206         }
207 
208         if (EXTERNAL_SSE(cpu_flags)) {
209             h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
210         }
211 
212         if (EXTERNAL_SSE2(cpu_flags)) {
213             h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
214             h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
215             h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
216             h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
217             h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
218             h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
219             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
220                 h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
221                 h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
222             } else {
223                 if (chroma_format_idc <= 1)
224                     h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
225                 if (codec_id == AV_CODEC_ID_SVQ3) {
226                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
227                 } else if (codec_id == AV_CODEC_ID_RV40) {
228                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
229                 } else {
230                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
231                 }
232             }
233         }
234 
235         if (EXTERNAL_SSSE3(cpu_flags)) {
236             h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
237             h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
238             if (chroma_format_idc <= 1)
239                 h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
240             h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
241             h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
242             h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
243             h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
244             h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
245             h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
246             h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
247             h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
248             h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
249             h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
250             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
251                 h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
252                 h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
253             } else {
254                 if (chroma_format_idc <= 1)
255                     h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
256                 if (codec_id == AV_CODEC_ID_SVQ3) {
257                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
258                 } else if (codec_id == AV_CODEC_ID_RV40) {
259                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
260                 } else {
261                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
262                 }
263             }
264         }
265 
266         if(EXTERNAL_AVX2(cpu_flags)){
267             if (codec_id == AV_CODEC_ID_VP8) {
268                 h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_avx2;
269             }
270         }
271     } else if (bit_depth == 10) {
272         if (EXTERNAL_MMXEXT(cpu_flags)) {
273             h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
274             h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
275         }
276         if (EXTERNAL_SSE2(cpu_flags)) {
277             h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
278             h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
279             h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
280             h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
281             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
282 
283             if (chroma_format_idc <= 1) {
284                 h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
285                 h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
286                 h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
287                 h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
288                 h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
289             }
290 
291             h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
292             h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
293             h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
294             h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
295             h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
296             h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
297             h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
298             h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
299             h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;
300 
301             h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
302             h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
303             h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
304             h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
305             h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
306             h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
307         }
308         if (EXTERNAL_SSSE3(cpu_flags)) {
309             h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
310             h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
311             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
312 
313             h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
314             h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
315             h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
316             h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
317             h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
318         }
319         if (EXTERNAL_AVX(cpu_flags)) {
320             h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
321             h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
322             h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
323             h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
324             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
325 
326             h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
327             h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
328             h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
329             h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
330             h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
331             h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
332             h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
333             h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
334         }
335     }
336 }
337