1/* 2 * Copyright (c) 2010 Fiona Glaser <fiona@x264.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include <stddef.h> 22#include <stdint.h> 23#include "config.h" 24#include "libavutil/attributes.h" 25#include "libavutil/cpu.h" 26#include "libavutil/x86/cpu.h" 27#include "libavcodec/codec_id.h" 28#include "libavcodec/h264pred.h" 29 30#define PRED4x4(TYPE, DEPTH, OPT) \ 31void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ 32 const uint8_t *topright, \ 33 ptrdiff_t stride); 34 35PRED4x4(dc, 10, mmxext) 36PRED4x4(down_left, 10, sse2) 37PRED4x4(down_left, 10, avx) 38PRED4x4(down_right, 10, sse2) 39PRED4x4(down_right, 10, ssse3) 40PRED4x4(down_right, 10, avx) 41PRED4x4(vertical_left, 10, sse2) 42PRED4x4(vertical_left, 10, avx) 43PRED4x4(vertical_right, 10, sse2) 44PRED4x4(vertical_right, 10, ssse3) 45PRED4x4(vertical_right, 10, avx) 46PRED4x4(horizontal_up, 10, mmxext) 47PRED4x4(horizontal_down, 10, sse2) 48PRED4x4(horizontal_down, 10, ssse3) 49PRED4x4(horizontal_down, 10, avx) 50 51#define PRED8x8(TYPE, DEPTH, OPT) \ 52void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ 53 ptrdiff_t stride); 54 55PRED8x8(dc, 10, sse2) 56PRED8x8(top_dc, 10, sse2) 57PRED8x8(plane, 10, sse2) 58PRED8x8(vertical, 10, sse2) 59PRED8x8(horizontal, 10, sse2) 60 61#define PRED8x8L(TYPE, DEPTH, OPT)\ 62void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ 63 int has_topleft, \ 64 int has_topright, \ 65 ptrdiff_t stride); 66 67PRED8x8L(dc, 10, sse2) 68PRED8x8L(dc, 10, avx) 69PRED8x8L(128_dc, 10, sse2) 70PRED8x8L(top_dc, 10, sse2) 71PRED8x8L(top_dc, 10, avx) 72PRED8x8L(vertical, 10, sse2) 73PRED8x8L(vertical, 10, avx) 74PRED8x8L(horizontal, 10, sse2) 75PRED8x8L(horizontal, 10, ssse3) 76PRED8x8L(horizontal, 10, avx) 77PRED8x8L(down_left, 10, sse2) 78PRED8x8L(down_left, 10, ssse3) 79PRED8x8L(down_left, 10, avx) 80PRED8x8L(down_right, 10, sse2) 81PRED8x8L(down_right, 10, ssse3) 82PRED8x8L(down_right, 10, avx) 83PRED8x8L(vertical_right, 10, sse2) 84PRED8x8L(vertical_right, 10, ssse3) 85PRED8x8L(vertical_right, 10, avx) 86PRED8x8L(horizontal_up, 10, sse2) 87PRED8x8L(horizontal_up, 10, ssse3) 88PRED8x8L(horizontal_up, 10, avx) 89 90#define PRED16x16(TYPE, DEPTH, OPT)\ 91void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ 92 ptrdiff_t stride); 93 94PRED16x16(dc, 10, sse2) 95PRED16x16(top_dc, 10, sse2) 96PRED16x16(128_dc, 10, sse2) 97PRED16x16(left_dc, 10, sse2) 98PRED16x16(vertical, 10, sse2) 99PRED16x16(horizontal, 10, sse2) 100 101/* 8-bit versions */ 102PRED16x16(vertical, 8, sse) 103PRED16x16(horizontal, 8, mmxext) 104PRED16x16(horizontal, 8, ssse3) 105PRED16x16(dc, 8, sse2) 106PRED16x16(dc, 8, ssse3) 107PRED16x16(plane_h264, 8, sse2) 108PRED16x16(plane_h264, 8, ssse3) 109PRED16x16(plane_rv40, 8, sse2) 110PRED16x16(plane_rv40, 8, ssse3) 111PRED16x16(plane_svq3, 8, sse2) 112PRED16x16(plane_svq3, 8, ssse3) 113PRED16x16(tm_vp8, 8, sse2) 114PRED16x16(tm_vp8, 8, avx2) 115 116PRED8x8(top_dc, 8, mmxext) 117PRED8x8(dc_rv40, 8, mmxext) 118PRED8x8(dc, 8, mmxext) 119PRED8x8(vertical, 8, mmx) 120PRED8x8(horizontal, 8, mmxext) 121PRED8x8(horizontal, 8, ssse3) 122PRED8x8(plane, 8, sse2) 123PRED8x8(plane, 8, ssse3) 124PRED8x8(tm_vp8, 8, sse2) 125PRED8x8(tm_vp8, 8, ssse3) 126 127PRED8x8L(top_dc, 8, mmxext) 128PRED8x8L(top_dc, 8, ssse3) 129PRED8x8L(dc, 8, mmxext) 130PRED8x8L(dc, 8, ssse3) 131PRED8x8L(horizontal, 8, mmxext) 132PRED8x8L(horizontal, 8, ssse3) 133PRED8x8L(vertical, 8, mmxext) 134PRED8x8L(vertical, 8, ssse3) 135PRED8x8L(down_left, 8, sse2) 136PRED8x8L(down_left, 8, ssse3) 137PRED8x8L(down_right, 8, sse2) 138PRED8x8L(down_right, 8, ssse3) 139PRED8x8L(vertical_right, 8, sse2) 140PRED8x8L(vertical_right, 8, ssse3) 141PRED8x8L(vertical_left, 8, sse2) 142PRED8x8L(vertical_left, 8, ssse3) 143PRED8x8L(horizontal_up, 8, mmxext) 144PRED8x8L(horizontal_up, 8, ssse3) 145PRED8x8L(horizontal_down, 8, sse2) 146PRED8x8L(horizontal_down, 8, ssse3) 147 148PRED4x4(dc, 8, mmxext) 149PRED4x4(down_left, 8, mmxext) 150PRED4x4(down_right, 8, mmxext) 151PRED4x4(vertical_left, 8, mmxext) 152PRED4x4(vertical_right, 8, mmxext) 153PRED4x4(horizontal_up, 8, mmxext) 154PRED4x4(horizontal_down, 8, mmxext) 155PRED4x4(tm_vp8, 8, mmxext) 156PRED4x4(tm_vp8, 8, ssse3) 157PRED4x4(vertical_vp8, 8, mmxext) 158 159av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, 160 const int bit_depth, 161 const int chroma_format_idc) 162{ 163 int cpu_flags = av_get_cpu_flags(); 164 165 if (bit_depth == 8) { 166 if (EXTERNAL_MMX(cpu_flags)) { 167 if (chroma_format_idc <= 1) { 168 h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx; 169 } 170 } 171 172 if (EXTERNAL_MMXEXT(cpu_flags)) { 173 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext; 174 if (chroma_format_idc <= 1) 175 h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext; 176 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext; 177 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext; 178 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_mmxext; 179 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_mmxext; 180 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_mmxext; 181 h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_8_mmxext; 182 h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_8_mmxext; 183 h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_8_mmxext; 184 h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_8_mmxext; 185 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 || 186 codec_id == AV_CODEC_ID_H264) { 187 h->pred4x4 [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext; 188 } 189 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) { 190 h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_8_mmxext; 191 } 192 if (codec_id != AV_CODEC_ID_RV40) { 193 h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_8_mmxext; 194 } 195 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) { 196 if (chroma_format_idc <= 1) { 197 h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_8_mmxext; 198 h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_8_mmxext; 199 } 200 } 201 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { 202 h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_8_mmxext; 203 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmxext; 204 h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_8_mmxext; 205 } 206 } 207 208 if (EXTERNAL_SSE(cpu_flags)) { 209 h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse; 210 } 211 212 if (EXTERNAL_SSE2(cpu_flags)) { 213 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2; 214 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2; 215 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2; 216 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_sse2; 217 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_sse2; 218 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_sse2; 219 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { 220 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_sse2; 221 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_sse2; 222 } else { 223 if (chroma_format_idc <= 1) 224 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2; 225 if (codec_id == AV_CODEC_ID_SVQ3) { 226 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2; 227 } else if (codec_id == AV_CODEC_ID_RV40) { 228 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2; 229 } else { 230 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2; 231 } 232 } 233 } 234 235 if (EXTERNAL_SSSE3(cpu_flags)) { 236 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_ssse3; 237 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3; 238 if (chroma_format_idc <= 1) 239 h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_ssse3; 240 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_ssse3; 241 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_ssse3; 242 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_ssse3; 243 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_ssse3; 244 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_ssse3; 245 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3; 246 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_ssse3; 247 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_ssse3; 248 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_ssse3; 249 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_ssse3; 250 if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { 251 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_ssse3; 252 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_ssse3; 253 } else { 254 if (chroma_format_idc <= 1) 255 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3; 256 if (codec_id == AV_CODEC_ID_SVQ3) { 257 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3; 258 } else if (codec_id == AV_CODEC_ID_RV40) { 259 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3; 260 } else { 261 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3; 262 } 263 } 264 } 265 266 if(EXTERNAL_AVX2(cpu_flags)){ 267 if (codec_id == AV_CODEC_ID_VP8) { 268 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_avx2; 269 } 270 } 271 } else if (bit_depth == 10) { 272 if (EXTERNAL_MMXEXT(cpu_flags)) { 273 h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; 274 h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; 275 } 276 if (EXTERNAL_SSE2(cpu_flags)) { 277 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; 278 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; 279 h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; 280 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; 281 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; 282 283 if (chroma_format_idc <= 1) { 284 h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; 285 h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; 286 h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; 287 h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; 288 h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; 289 } 290 291 h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; 292 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; 293 h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_sse2; 294 h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_sse2; 295 h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_sse2; 296 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2; 297 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2; 298 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_sse2; 299 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_sse2; 300 301 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_10_sse2; 302 h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_10_sse2; 303 h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_10_sse2; 304 h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_sse2; 305 h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; 306 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; 307 } 308 if (EXTERNAL_SSSE3(cpu_flags)) { 309 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; 310 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; 311 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; 312 313 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_ssse3; 314 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3; 315 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3; 316 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; 317 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; 318 } 319 if (EXTERNAL_AVX(cpu_flags)) { 320 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; 321 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; 322 h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; 323 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_avx; 324 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_avx; 325 326 h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_avx; 327 h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_avx; 328 h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_avx; 329 h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_avx; 330 h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx; 331 h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx; 332 h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx; 333 h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; 334 } 335 } 336} 337