1/* 2 * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at) 3 * 4 * This file is part of libswresample 5 * 6 * libswresample is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * libswresample is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with libswresample; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/attributes.h" 22#include "libavutil/x86/cpu.h" 23#include "libswresample/swresample_internal.h" 24 25#define D(type, simd) \ 26mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\ 27mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd; 28 29D(float, sse) 30D(float, avx) 31D(int16, sse2) 32 33av_cold int swri_rematrix_init_x86(struct SwrContext *s){ 34#if HAVE_X86ASM 35 int mm_flags = av_get_cpu_flags(); 36 int nb_in = s->used_ch_count; 37 int nb_out = s->out.ch_count; 38 int num = nb_in * nb_out; 39 int i,j; 40 41 s->mix_1_1_simd = NULL; 42 s->mix_2_1_simd = NULL; 43 44 if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ 45 if(EXTERNAL_SSE2(mm_flags)) { 46 s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2; 47 s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2; 48 } 49 s->native_simd_matrix = av_calloc(num, 2 * sizeof(int16_t)); 50 s->native_simd_one = av_mallocz(2 * sizeof(int16_t)); 51 if (!s->native_simd_matrix || !s->native_simd_one) 52 return AVERROR(ENOMEM); 53 54 for(i=0; i<nb_out; i++){ 55 int sh = 0; 56 for(j=0; j<nb_in; j++) 57 sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j])); 58 sh = FFMAX(av_log2(sh) - 14, 0); 59 for(j=0; j<nb_in; j++) { 60 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh; 61 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] = 62 ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; 63 } 64 } 65 ((int16_t*)s->native_simd_one)[1] = 14; 66 ((int16_t*)s->native_simd_one)[0] = 16384; 67 } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ 68 if(EXTERNAL_SSE(mm_flags)) { 69 s->mix_1_1_simd = ff_mix_1_1_a_float_sse; 70 s->mix_2_1_simd = ff_mix_2_1_a_float_sse; 71 } 72 if(EXTERNAL_AVX_FAST(mm_flags)) { 73 s->mix_1_1_simd = ff_mix_1_1_a_float_avx; 74 s->mix_2_1_simd = ff_mix_2_1_a_float_avx; 75 } 76 s->native_simd_matrix = av_calloc(num, sizeof(float)); 77 s->native_simd_one = av_mallocz(sizeof(float)); 78 if (!s->native_simd_matrix || !s->native_simd_one) 79 return AVERROR(ENOMEM); 80 memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); 81 memcpy(s->native_simd_one, s->native_one, sizeof(float)); 82 } 83#endif 84 85 return 0; 86} 87