1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * ARM NEON optimised Format Conversion Utils 3cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 4cabdff1aSopenharmony_ci * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "config.h" 24cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_cifunction ff_int32_to_float_fmul_scalar_neon, export=1 27cabdff1aSopenharmony_ci ld1 {v1.4s,v2.4s}, [x1], #32 28cabdff1aSopenharmony_ci scvtf v1.4s, v1.4s 29cabdff1aSopenharmony_ci scvtf v2.4s, v2.4s 30cabdff1aSopenharmony_ci1: 31cabdff1aSopenharmony_ci subs w2, w2, #8 32cabdff1aSopenharmony_ci fmul v3.4s, v1.4s, v0.s[0] 33cabdff1aSopenharmony_ci fmul v4.4s, v2.4s, v0.s[0] 34cabdff1aSopenharmony_ci b.le 2f 35cabdff1aSopenharmony_ci ld1 {v1.4s,v2.4s}, [x1], #32 36cabdff1aSopenharmony_ci st1 {v3.4s,v4.4s}, [x0], #32 37cabdff1aSopenharmony_ci scvtf v1.4s, v1.4s 38cabdff1aSopenharmony_ci scvtf v2.4s, v2.4s 39cabdff1aSopenharmony_ci b 1b 40cabdff1aSopenharmony_ci2: 41cabdff1aSopenharmony_ci st1 {v3.4s,v4.4s}, [x0] 42cabdff1aSopenharmony_ci ret 43cabdff1aSopenharmony_ciendfunc 44cabdff1aSopenharmony_ci 45cabdff1aSopenharmony_cifunction ff_int32_to_float_fmul_array8_neon, export=1 46cabdff1aSopenharmony_ci lsr w4, w4, #3 47cabdff1aSopenharmony_ci subs w5, w4, #1 48cabdff1aSopenharmony_ci b.eq 1f 49cabdff1aSopenharmony_ci2: 50cabdff1aSopenharmony_ci ld1 {v0.4s,v1.4s}, [x2], #32 51cabdff1aSopenharmony_ci ld1 {v2.4s,v3.4s}, [x2], #32 52cabdff1aSopenharmony_ci scvtf v0.4s, v0.4s 53cabdff1aSopenharmony_ci scvtf v1.4s, v1.4s 54cabdff1aSopenharmony_ci ld1 {v16.2s}, [x3], #8 55cabdff1aSopenharmony_ci scvtf v2.4s, v2.4s 56cabdff1aSopenharmony_ci scvtf v3.4s, v3.4s 57cabdff1aSopenharmony_ci fmul v4.4s, v0.4s, v16.s[0] 58cabdff1aSopenharmony_ci fmul v5.4s, v1.4s, v16.s[0] 59cabdff1aSopenharmony_ci fmul v6.4s, v2.4s, v16.s[1] 60cabdff1aSopenharmony_ci fmul v7.4s, v3.4s, v16.s[1] 61cabdff1aSopenharmony_ci st1 {v4.4s,v5.4s}, [x1], #32 62cabdff1aSopenharmony_ci st1 {v6.4s,v7.4s}, [x1], #32 63cabdff1aSopenharmony_ci subs w5, w5, #2 64cabdff1aSopenharmony_ci b.gt 2b 65cabdff1aSopenharmony_ci b.eq 1f 66cabdff1aSopenharmony_ci ret 67cabdff1aSopenharmony_ci1: 68cabdff1aSopenharmony_ci ld1 {v0.4s,v1.4s}, [x2] 69cabdff1aSopenharmony_ci ld1 {v16.s}[0], [x3] 70cabdff1aSopenharmony_ci scvtf v0.4s, v0.4s 71cabdff1aSopenharmony_ci scvtf v1.4s, v1.4s 72cabdff1aSopenharmony_ci fmul v4.4s, v0.4s, v16.s[0] 73cabdff1aSopenharmony_ci fmul v5.4s, v1.4s, v16.s[0] 74cabdff1aSopenharmony_ci st1 {v4.4s,v5.4s}, [x1] 75cabdff1aSopenharmony_ci ret 76cabdff1aSopenharmony_ciendfunc 77