1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* x86 optimized dithering format conversion 3cabdff1aSopenharmony_ci;* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com> 4cabdff1aSopenharmony_ci;* 5cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 6cabdff1aSopenharmony_ci;* 7cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci;* 12cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 16cabdff1aSopenharmony_ci;* 17cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci;****************************************************************************** 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ciSECTION_RODATA 32 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci; 1.0f / (2.0f * INT32_MAX) 27cabdff1aSopenharmony_cipf_dither_scale: times 8 dd 2.32830643762e-10 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_cipf_s16_scale: times 4 dd 32753.0 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ciSECTION .text 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 34cabdff1aSopenharmony_ci; void ff_quantize(int16_t *dst, float *src, float *dither, int len); 35cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ciINIT_XMM sse2 38cabdff1aSopenharmony_cicglobal quantize, 4,4,3, dst, src, dither, len 39cabdff1aSopenharmony_ci lea lenq, [2*lend] 40cabdff1aSopenharmony_ci add dstq, lenq 41cabdff1aSopenharmony_ci lea srcq, [srcq+2*lenq] 42cabdff1aSopenharmony_ci lea ditherq, [ditherq+2*lenq] 43cabdff1aSopenharmony_ci neg lenq 44cabdff1aSopenharmony_ci mova m2, [pf_s16_scale] 45cabdff1aSopenharmony_ci.loop: 46cabdff1aSopenharmony_ci mulps m0, m2, [srcq+2*lenq] 47cabdff1aSopenharmony_ci mulps m1, m2, [srcq+2*lenq+mmsize] 48cabdff1aSopenharmony_ci addps m0, [ditherq+2*lenq] 49cabdff1aSopenharmony_ci addps m1, [ditherq+2*lenq+mmsize] 50cabdff1aSopenharmony_ci cvtps2dq m0, m0 51cabdff1aSopenharmony_ci cvtps2dq m1, m1 52cabdff1aSopenharmony_ci packssdw m0, m1 53cabdff1aSopenharmony_ci mova [dstq+lenq], m0 54cabdff1aSopenharmony_ci add lenq, mmsize 55cabdff1aSopenharmony_ci jl .loop 56cabdff1aSopenharmony_ci REP_RET 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 59cabdff1aSopenharmony_ci; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len) 60cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci%macro DITHER_INT_TO_FLOAT_RECTANGULAR 0 63cabdff1aSopenharmony_cicglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len 64cabdff1aSopenharmony_ci lea lenq, [4*lend] 65cabdff1aSopenharmony_ci add srcq, lenq 66cabdff1aSopenharmony_ci add dstq, lenq 67cabdff1aSopenharmony_ci neg lenq 68cabdff1aSopenharmony_ci mova m0, [pf_dither_scale] 69cabdff1aSopenharmony_ci.loop: 70cabdff1aSopenharmony_ci cvtdq2ps m1, [srcq+lenq] 71cabdff1aSopenharmony_ci cvtdq2ps m2, [srcq+lenq+mmsize] 72cabdff1aSopenharmony_ci mulps m1, m1, m0 73cabdff1aSopenharmony_ci mulps m2, m2, m0 74cabdff1aSopenharmony_ci mova [dstq+lenq], m1 75cabdff1aSopenharmony_ci mova [dstq+lenq+mmsize], m2 76cabdff1aSopenharmony_ci add lenq, 2*mmsize 77cabdff1aSopenharmony_ci jl .loop 78cabdff1aSopenharmony_ci REP_RET 79cabdff1aSopenharmony_ci%endmacro 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_ciINIT_XMM sse2 82cabdff1aSopenharmony_ciDITHER_INT_TO_FLOAT_RECTANGULAR 83cabdff1aSopenharmony_ciINIT_YMM avx 84cabdff1aSopenharmony_ciDITHER_INT_TO_FLOAT_RECTANGULAR 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 87cabdff1aSopenharmony_ci; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len) 88cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci%macro DITHER_INT_TO_FLOAT_TRIANGULAR 0 91cabdff1aSopenharmony_cicglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1 92cabdff1aSopenharmony_ci lea lenq, [4*lend] 93cabdff1aSopenharmony_ci lea src1q, [src0q+2*lenq] 94cabdff1aSopenharmony_ci add src0q, lenq 95cabdff1aSopenharmony_ci add dstq, lenq 96cabdff1aSopenharmony_ci neg lenq 97cabdff1aSopenharmony_ci mova m0, [pf_dither_scale] 98cabdff1aSopenharmony_ci.loop: 99cabdff1aSopenharmony_ci cvtdq2ps m1, [src0q+lenq] 100cabdff1aSopenharmony_ci cvtdq2ps m2, [src0q+lenq+mmsize] 101cabdff1aSopenharmony_ci cvtdq2ps m3, [src1q+lenq] 102cabdff1aSopenharmony_ci cvtdq2ps m4, [src1q+lenq+mmsize] 103cabdff1aSopenharmony_ci addps m1, m1, m3 104cabdff1aSopenharmony_ci addps m2, m2, m4 105cabdff1aSopenharmony_ci mulps m1, m1, m0 106cabdff1aSopenharmony_ci mulps m2, m2, m0 107cabdff1aSopenharmony_ci mova [dstq+lenq], m1 108cabdff1aSopenharmony_ci mova [dstq+lenq+mmsize], m2 109cabdff1aSopenharmony_ci add lenq, 2*mmsize 110cabdff1aSopenharmony_ci jl .loop 111cabdff1aSopenharmony_ci REP_RET 112cabdff1aSopenharmony_ci%endmacro 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ciINIT_XMM sse2 115cabdff1aSopenharmony_ciDITHER_INT_TO_FLOAT_TRIANGULAR 116cabdff1aSopenharmony_ciINIT_YMM avx 117cabdff1aSopenharmony_ciDITHER_INT_TO_FLOAT_TRIANGULAR 118