1 /*
2 * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <stddef.h>
22 #include <stdint.h>
23 #include "config.h"
24 #include "libavutil/attributes.h"
25 #include "libavutil/cpu.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/codec_id.h"
28 #include "libavcodec/h264pred.h"
29
30 #define PRED4x4(TYPE, DEPTH, OPT) \
31 void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
32 const uint8_t *topright, \
33 ptrdiff_t stride);
34
35 PRED4x4(dc, 10, mmxext)
36 PRED4x4(down_left, 10, sse2)
37 PRED4x4(down_left, 10, avx)
38 PRED4x4(down_right, 10, sse2)
39 PRED4x4(down_right, 10, ssse3)
40 PRED4x4(down_right, 10, avx)
41 PRED4x4(vertical_left, 10, sse2)
42 PRED4x4(vertical_left, 10, avx)
43 PRED4x4(vertical_right, 10, sse2)
44 PRED4x4(vertical_right, 10, ssse3)
45 PRED4x4(vertical_right, 10, avx)
46 PRED4x4(horizontal_up, 10, mmxext)
47 PRED4x4(horizontal_down, 10, sse2)
48 PRED4x4(horizontal_down, 10, ssse3)
49 PRED4x4(horizontal_down, 10, avx)
50
51 #define PRED8x8(TYPE, DEPTH, OPT) \
52 void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
53 ptrdiff_t stride);
54
55 PRED8x8(dc, 10, sse2)
56 PRED8x8(top_dc, 10, sse2)
57 PRED8x8(plane, 10, sse2)
58 PRED8x8(vertical, 10, sse2)
59 PRED8x8(horizontal, 10, sse2)
60
61 #define PRED8x8L(TYPE, DEPTH, OPT)\
62 void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
63 int has_topleft, \
64 int has_topright, \
65 ptrdiff_t stride);
66
67 PRED8x8L(dc, 10, sse2)
68 PRED8x8L(dc, 10, avx)
69 PRED8x8L(128_dc, 10, sse2)
70 PRED8x8L(top_dc, 10, sse2)
71 PRED8x8L(top_dc, 10, avx)
72 PRED8x8L(vertical, 10, sse2)
73 PRED8x8L(vertical, 10, avx)
74 PRED8x8L(horizontal, 10, sse2)
75 PRED8x8L(horizontal, 10, ssse3)
76 PRED8x8L(horizontal, 10, avx)
77 PRED8x8L(down_left, 10, sse2)
78 PRED8x8L(down_left, 10, ssse3)
79 PRED8x8L(down_left, 10, avx)
80 PRED8x8L(down_right, 10, sse2)
81 PRED8x8L(down_right, 10, ssse3)
82 PRED8x8L(down_right, 10, avx)
83 PRED8x8L(vertical_right, 10, sse2)
84 PRED8x8L(vertical_right, 10, ssse3)
85 PRED8x8L(vertical_right, 10, avx)
86 PRED8x8L(horizontal_up, 10, sse2)
87 PRED8x8L(horizontal_up, 10, ssse3)
88 PRED8x8L(horizontal_up, 10, avx)
89
90 #define PRED16x16(TYPE, DEPTH, OPT)\
91 void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
92 ptrdiff_t stride);
93
94 PRED16x16(dc, 10, sse2)
95 PRED16x16(top_dc, 10, sse2)
96 PRED16x16(128_dc, 10, sse2)
97 PRED16x16(left_dc, 10, sse2)
98 PRED16x16(vertical, 10, sse2)
99 PRED16x16(horizontal, 10, sse2)
100
101 /* 8-bit versions */
102 PRED16x16(vertical, 8, sse)
103 PRED16x16(horizontal, 8, mmxext)
104 PRED16x16(horizontal, 8, ssse3)
105 PRED16x16(dc, 8, sse2)
106 PRED16x16(dc, 8, ssse3)
107 PRED16x16(plane_h264, 8, sse2)
108 PRED16x16(plane_h264, 8, ssse3)
109 PRED16x16(plane_rv40, 8, sse2)
110 PRED16x16(plane_rv40, 8, ssse3)
111 PRED16x16(plane_svq3, 8, sse2)
112 PRED16x16(plane_svq3, 8, ssse3)
113 PRED16x16(tm_vp8, 8, sse2)
114 PRED16x16(tm_vp8, 8, avx2)
115
116 PRED8x8(top_dc, 8, mmxext)
117 PRED8x8(dc_rv40, 8, mmxext)
118 PRED8x8(dc, 8, mmxext)
119 PRED8x8(vertical, 8, mmx)
120 PRED8x8(horizontal, 8, mmxext)
121 PRED8x8(horizontal, 8, ssse3)
122 PRED8x8(plane, 8, sse2)
123 PRED8x8(plane, 8, ssse3)
124 PRED8x8(tm_vp8, 8, sse2)
125 PRED8x8(tm_vp8, 8, ssse3)
126
127 PRED8x8L(top_dc, 8, mmxext)
128 PRED8x8L(top_dc, 8, ssse3)
129 PRED8x8L(dc, 8, mmxext)
130 PRED8x8L(dc, 8, ssse3)
131 PRED8x8L(horizontal, 8, mmxext)
132 PRED8x8L(horizontal, 8, ssse3)
133 PRED8x8L(vertical, 8, mmxext)
134 PRED8x8L(vertical, 8, ssse3)
135 PRED8x8L(down_left, 8, sse2)
136 PRED8x8L(down_left, 8, ssse3)
137 PRED8x8L(down_right, 8, sse2)
138 PRED8x8L(down_right, 8, ssse3)
139 PRED8x8L(vertical_right, 8, sse2)
140 PRED8x8L(vertical_right, 8, ssse3)
141 PRED8x8L(vertical_left, 8, sse2)
142 PRED8x8L(vertical_left, 8, ssse3)
143 PRED8x8L(horizontal_up, 8, mmxext)
144 PRED8x8L(horizontal_up, 8, ssse3)
145 PRED8x8L(horizontal_down, 8, sse2)
146 PRED8x8L(horizontal_down, 8, ssse3)
147
148 PRED4x4(dc, 8, mmxext)
149 PRED4x4(down_left, 8, mmxext)
150 PRED4x4(down_right, 8, mmxext)
151 PRED4x4(vertical_left, 8, mmxext)
152 PRED4x4(vertical_right, 8, mmxext)
153 PRED4x4(horizontal_up, 8, mmxext)
154 PRED4x4(horizontal_down, 8, mmxext)
155 PRED4x4(tm_vp8, 8, mmxext)
156 PRED4x4(tm_vp8, 8, ssse3)
157 PRED4x4(vertical_vp8, 8, mmxext)
158
ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)159 av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
160 const int bit_depth,
161 const int chroma_format_idc)
162 {
163 int cpu_flags = av_get_cpu_flags();
164
165 if (bit_depth == 8) {
166 if (EXTERNAL_MMX(cpu_flags)) {
167 if (chroma_format_idc <= 1) {
168 h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx;
169 }
170 }
171
172 if (EXTERNAL_MMXEXT(cpu_flags)) {
173 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext;
174 if (chroma_format_idc <= 1)
175 h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext;
176 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext;
177 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext;
178 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_mmxext;
179 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_mmxext;
180 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_mmxext;
181 h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_8_mmxext;
182 h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_8_mmxext;
183 h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_8_mmxext;
184 h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_8_mmxext;
185 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 ||
186 codec_id == AV_CODEC_ID_H264) {
187 h->pred4x4 [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
188 }
189 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
190 h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_8_mmxext;
191 }
192 if (codec_id != AV_CODEC_ID_RV40) {
193 h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_8_mmxext;
194 }
195 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
196 if (chroma_format_idc <= 1) {
197 h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_8_mmxext;
198 h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_8_mmxext;
199 }
200 }
201 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
202 h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_8_mmxext;
203 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmxext;
204 h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_8_mmxext;
205 }
206 }
207
208 if (EXTERNAL_SSE(cpu_flags)) {
209 h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
210 }
211
212 if (EXTERNAL_SSE2(cpu_flags)) {
213 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2;
214 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2;
215 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
216 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_sse2;
217 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_sse2;
218 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_sse2;
219 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
220 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_sse2;
221 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_sse2;
222 } else {
223 if (chroma_format_idc <= 1)
224 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
225 if (codec_id == AV_CODEC_ID_SVQ3) {
226 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
227 } else if (codec_id == AV_CODEC_ID_RV40) {
228 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
229 } else {
230 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
231 }
232 }
233 }
234
235 if (EXTERNAL_SSSE3(cpu_flags)) {
236 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_ssse3;
237 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3;
238 if (chroma_format_idc <= 1)
239 h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_ssse3;
240 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_ssse3;
241 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_ssse3;
242 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_ssse3;
243 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_ssse3;
244 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_ssse3;
245 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
246 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_ssse3;
247 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_ssse3;
248 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_ssse3;
249 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_ssse3;
250 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
251 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_ssse3;
252 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_ssse3;
253 } else {
254 if (chroma_format_idc <= 1)
255 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
256 if (codec_id == AV_CODEC_ID_SVQ3) {
257 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
258 } else if (codec_id == AV_CODEC_ID_RV40) {
259 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
260 } else {
261 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
262 }
263 }
264 }
265
266 if(EXTERNAL_AVX2(cpu_flags)){
267 if (codec_id == AV_CODEC_ID_VP8) {
268 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_avx2;
269 }
270 }
271 } else if (bit_depth == 10) {
272 if (EXTERNAL_MMXEXT(cpu_flags)) {
273 h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
274 h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
275 }
276 if (EXTERNAL_SSE2(cpu_flags)) {
277 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
278 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
279 h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2;
280 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2;
281 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2;
282
283 if (chroma_format_idc <= 1) {
284 h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2;
285 h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2;
286 h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2;
287 h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2;
288 h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2;
289 }
290
291 h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2;
292 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2;
293 h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_sse2;
294 h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_sse2;
295 h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_sse2;
296 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
297 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
298 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_sse2;
299 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_sse2;
300
301 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_10_sse2;
302 h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_10_sse2;
303 h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_10_sse2;
304 h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_sse2;
305 h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
306 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
307 }
308 if (EXTERNAL_SSSE3(cpu_flags)) {
309 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
310 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
311 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
312
313 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_ssse3;
314 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
315 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
316 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3;
317 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3;
318 }
319 if (EXTERNAL_AVX(cpu_flags)) {
320 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
321 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
322 h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx;
323 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_avx;
324 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_avx;
325
326 h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_avx;
327 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_avx;
328 h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_avx;
329 h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_avx;
330 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
331 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
332 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx;
333 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx;
334 }
335 }
336 }
337