1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD optimized SBC encoder DSP functions 3cabdff1aSopenharmony_ci;* 4cabdff1aSopenharmony_ci;* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> 5cabdff1aSopenharmony_ci;* Copyright (C) 2008-2010 Nokia Corporation 6cabdff1aSopenharmony_ci;* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> 7cabdff1aSopenharmony_ci;* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> 8cabdff1aSopenharmony_ci;* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> 9cabdff1aSopenharmony_ci;* 10cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 11cabdff1aSopenharmony_ci;* 12cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 13cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 14cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 15cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 16cabdff1aSopenharmony_ci;* 17cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 18cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 19cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 21cabdff1aSopenharmony_ci;* 22cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 23cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 24cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25cabdff1aSopenharmony_ci;****************************************************************************** 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ciSECTION_RODATA 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ciscale_mask: times 2 dd 0x8000 ; 1 << (SBC_PROTO_FIXED_SCALE - 1) 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ciSECTION .text 34cabdff1aSopenharmony_ci 35cabdff1aSopenharmony_ci%macro NIDN 3 36cabdff1aSopenharmony_ci%ifnidn %2, %3 37cabdff1aSopenharmony_ci %1 %2, %3 38cabdff1aSopenharmony_ci%endif 39cabdff1aSopenharmony_ci%endmacro 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci%macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset 42cabdff1aSopenharmony_ci NIDN movq, %5, %3 43cabdff1aSopenharmony_ci NIDN movq, %6, %4 44cabdff1aSopenharmony_ci pmaddwd %5, [constsq+%9] 45cabdff1aSopenharmony_ci pmaddwd %6, [constsq+%9+8] 46cabdff1aSopenharmony_ci NIDN paddd, %1, %7 47cabdff1aSopenharmony_ci NIDN paddd, %2, %8 48cabdff1aSopenharmony_ci%endmacro 49cabdff1aSopenharmony_ci 50cabdff1aSopenharmony_ci%macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset 51cabdff1aSopenharmony_ci ANALYZE_MAC %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7 52cabdff1aSopenharmony_ci%endmacro 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci%macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack 55cabdff1aSopenharmony_ci%ifidn %7, pack 56cabdff1aSopenharmony_ci psrad %3, 16 ; SBC_PROTO_FIXED_SCALE 57cabdff1aSopenharmony_ci packssdw %3, %3 58cabdff1aSopenharmony_ci%endif 59cabdff1aSopenharmony_ci ANALYZE_MAC %1, %2, %3, %3, %4, %5, %4, %5, %6 60cabdff1aSopenharmony_ci%endmacro 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci;******************************************************************* 63cabdff1aSopenharmony_ci;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts); 64cabdff1aSopenharmony_ci;******************************************************************* 65cabdff1aSopenharmony_ciINIT_MMX mmx 66cabdff1aSopenharmony_cicglobal sbc_analyze_4, 3, 3, 4, in, out, consts 67cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 68cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 16 69cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 32 70cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 48 71cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 64 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m2, m0, m0, m2, 80, pack 74cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m2, m1, m1, m3, 96, pack 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci movq [outq ], m0 77cabdff1aSopenharmony_ci movq [outq+8], m2 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_ci RET 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_ci;******************************************************************* 83cabdff1aSopenharmony_ci;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts); 84cabdff1aSopenharmony_ci;******************************************************************* 85cabdff1aSopenharmony_ciINIT_MMX mmx 86cabdff1aSopenharmony_cicglobal sbc_analyze_8, 3, 3, 4, in, out, consts 87cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 88cabdff1aSopenharmony_ci ANALYZE_MAC_IN m2, m3, m2, m3, [scale_mask], [scale_mask], 16 89cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 32 90cabdff1aSopenharmony_ci ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 48 91cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 64 92cabdff1aSopenharmony_ci ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 80 93cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 96 94cabdff1aSopenharmony_ci ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 112 95cabdff1aSopenharmony_ci ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 128 96cabdff1aSopenharmony_ci ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 144 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci ANALYZE_MAC_REG m4, m5, m0, m4, m5, 160, pack 99cabdff1aSopenharmony_ci ANALYZE_MAC_REG m4, m5, m1, m6, m7, 192, pack 100cabdff1aSopenharmony_ci ANALYZE_MAC_REG m4, m5, m2, m6, m7, 224, pack 101cabdff1aSopenharmony_ci ANALYZE_MAC_REG m4, m5, m3, m6, m7, 256, pack 102cabdff1aSopenharmony_ci 103cabdff1aSopenharmony_ci movq [outq ], m4 104cabdff1aSopenharmony_ci movq [outq+8], m5 105cabdff1aSopenharmony_ci 106cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m5, m0, m0, m5, 176, no 107cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m5, m1, m1, m7, 208, no 108cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m5, m2, m2, m7, 240, no 109cabdff1aSopenharmony_ci ANALYZE_MAC_REG m0, m5, m3, m3, m7, 272, no 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ci movq [outq+16], m0 112cabdff1aSopenharmony_ci movq [outq+24], m5 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci RET 115cabdff1aSopenharmony_ci 116cabdff1aSopenharmony_ci 117cabdff1aSopenharmony_ci;******************************************************************* 118cabdff1aSopenharmony_ci;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8], 119cabdff1aSopenharmony_ci; uint32_t scale_factor[2][8], 120cabdff1aSopenharmony_ci; int blocks, int channels, int subbands) 121cabdff1aSopenharmony_ci;******************************************************************* 122cabdff1aSopenharmony_ciINIT_MMX mmx 123cabdff1aSopenharmony_cicglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk 124cabdff1aSopenharmony_ci ; subbands = 4 * subbands * channels 125cabdff1aSopenharmony_ci movq m3, [scale_mask] 126cabdff1aSopenharmony_ci shl subbandsd, 2 127cabdff1aSopenharmony_ci cmp channelsd, 2 128cabdff1aSopenharmony_ci jl .loop_1 129cabdff1aSopenharmony_ci shl subbandsd, 1 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci.loop_1: 132cabdff1aSopenharmony_ci sub subbandsq, 8 133cabdff1aSopenharmony_ci lea ptrq, [sb_sample_fq + subbandsq] 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci ; blk = (blocks - 1) * 64; 136cabdff1aSopenharmony_ci lea blkq, [blocksq - 1] 137cabdff1aSopenharmony_ci shl blkd, 6 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci movq m0, m3 140cabdff1aSopenharmony_ci.loop_2: 141cabdff1aSopenharmony_ci movq m1, [ptrq+blkq] 142cabdff1aSopenharmony_ci pxor m2, m2 143cabdff1aSopenharmony_ci pcmpgtd m1, m2 144cabdff1aSopenharmony_ci paddd m1, [ptrq+blkq] 145cabdff1aSopenharmony_ci pcmpgtd m2, m1 146cabdff1aSopenharmony_ci pxor m1, m2 147cabdff1aSopenharmony_ci 148cabdff1aSopenharmony_ci por m0, m1 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci sub blkq, 64 151cabdff1aSopenharmony_ci jns .loop_2 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci movd blkd, m0 154cabdff1aSopenharmony_ci psrlq m0, 32 155cabdff1aSopenharmony_ci bsr blkd, blkd 156cabdff1aSopenharmony_ci sub blkd, 15 ; SCALE_OUT_BITS 157cabdff1aSopenharmony_ci mov [scale_factorq + subbandsq], blkd 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ci movd blkd, m0 160cabdff1aSopenharmony_ci bsr blkd, blkd 161cabdff1aSopenharmony_ci sub blkd, 15 ; SCALE_OUT_BITS 162cabdff1aSopenharmony_ci mov [scale_factorq + subbandsq + 4], blkd 163cabdff1aSopenharmony_ci 164cabdff1aSopenharmony_ci cmp subbandsq, 0 165cabdff1aSopenharmony_ci jg .loop_1 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci emms 168cabdff1aSopenharmony_ci RET 169