1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "config.h" 20#include "config_components.h" 21 22#include "libavutil/cpu.h" 23#include "libavutil/mem_internal.h" 24 25#include "libavcodec/x86/fdct.h" 26#include "libavcodec/x86/xvididct.h" 27#include "libavcodec/x86/simple_idct.h" 28 29#if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM 30void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, 31 int16_t *block, int16_t *qmat); 32 33#define PR_WRAP(INSN) \ 34static void ff_prores_idct_put_10_##INSN##_wrap(int16_t *dst){ \ 35 LOCAL_ALIGNED(16, int16_t, qmat, [64]); \ 36 LOCAL_ALIGNED(16, int16_t, tmp, [64]); \ 37 int i; \ 38 \ 39 for(i=0; i<64; i++){ \ 40 qmat[i]=4; \ 41 tmp[i]= dst[i]; \ 42 } \ 43 ff_prores_idct_put_10_##INSN (dst, 16, tmp, qmat); \ 44 \ 45 for(i=0; i<64; i++) { \ 46 dst[i] -= 512; \ 47 } \ 48} 49 50PR_WRAP(sse2) 51 52# if HAVE_AVX_EXTERNAL 53void ff_prores_idct_put_10_avx(uint16_t *dst, int linesize, 54 int16_t *block, int16_t *qmat); 55PR_WRAP(avx) 56# endif 57 58#endif 59 60static const struct algo fdct_tab_arch[] = { 61#if HAVE_SSE2_INLINE 62 { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 }, 63#endif 64 { 0 } 65}; 66 67static const struct algo idct_tab_arch[] = { 68#if CONFIG_MPEG4_DECODER && HAVE_X86ASM 69#if HAVE_SSE2_EXTERNAL 70 { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, 71#endif 72#endif /* CONFIG_MPEG4_DECODER && HAVE_X86ASM */ 73#if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM 74 { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 }, 75# if HAVE_AVX_EXTERNAL 76 { "PR-AVX", ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 }, 77# endif 78#endif 79#if HAVE_X86ASM 80#if ARCH_X86_64 81#if HAVE_SSE2_EXTERNAL 82 { "SIMPLE8-SSE2", ff_simple_idct8_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2}, 83 { "SIMPLE10-SSE2", ff_simple_idct10_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2}, 84 { "SIMPLE12-SSE2", ff_simple_idct12_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 }, 85#endif 86#if HAVE_AVX_EXTERNAL 87 { "SIMPLE8-AVX", ff_simple_idct8_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX}, 88 { "SIMPLE10-AVX", ff_simple_idct10_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX}, 89 { "SIMPLE12-AVX", ff_simple_idct12_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 }, 90#endif 91#endif 92#endif 93 { 0 } 94}; 95 96static const uint8_t idct_simple_mmx_perm[64] = { 97 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 98 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 99 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 100 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 101 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 102 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 103 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 104 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 105}; 106 107static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; 108 109static int permute_x86(int16_t dst[64], const int16_t src[64], 110 enum idct_permutation_type perm_type) 111{ 112 int i; 113 114 switch (perm_type) { 115 case FF_IDCT_PERM_SIMPLE: 116 for (i = 0; i < 64; i++) 117 dst[idct_simple_mmx_perm[i]] = src[i]; 118 return 1; 119 case FF_IDCT_PERM_SSE2: 120 for (i = 0; i < 64; i++) 121 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; 122 return 1; 123 } 124 125 return 0; 126} 127