1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_cifunction ff_resample_common_apply_filter_x4_float_neon, export=1 24cabdff1aSopenharmony_ci vmov.f32 q0, #0.0 @ accumulator 25cabdff1aSopenharmony_ci1: vld1.32 {q1}, [r1]! @ src[0..3] 26cabdff1aSopenharmony_ci vld1.32 {q2}, [r2]! @ filter[0..3] 27cabdff1aSopenharmony_ci vmla.f32 q0, q1, q2 @ accumulator += src[0..3] * filter[0..3] 28cabdff1aSopenharmony_ci subs r3, #4 @ filter_length -= 4 29cabdff1aSopenharmony_ci bgt 1b @ loop until filter_length 30cabdff1aSopenharmony_ci vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values 31cabdff1aSopenharmony_ci vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values 32cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0] @ write accumulator 33cabdff1aSopenharmony_ci mov pc, lr 34cabdff1aSopenharmony_ciendfunc 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_cifunction ff_resample_common_apply_filter_x8_float_neon, export=1 37cabdff1aSopenharmony_ci vmov.f32 q0, #0.0 @ accumulator 38cabdff1aSopenharmony_ci1: vld1.32 {q1}, [r1]! @ src[0..3] 39cabdff1aSopenharmony_ci vld1.32 {q2}, [r2]! @ filter[0..3] 40cabdff1aSopenharmony_ci vld1.32 {q8}, [r1]! @ src[4..7] 41cabdff1aSopenharmony_ci vld1.32 {q9}, [r2]! @ filter[4..7] 42cabdff1aSopenharmony_ci vmla.f32 q0, q1, q2 @ accumulator += src[0..3] * filter[0..3] 43cabdff1aSopenharmony_ci vmla.f32 q0, q8, q9 @ accumulator += src[4..7] * filter[4..7] 44cabdff1aSopenharmony_ci subs r3, #8 @ filter_length -= 8 45cabdff1aSopenharmony_ci bgt 1b @ loop until filter_length 46cabdff1aSopenharmony_ci vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values 47cabdff1aSopenharmony_ci vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values 48cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0] @ write accumulator 49cabdff1aSopenharmony_ci mov pc, lr 50cabdff1aSopenharmony_ciendfunc 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_cifunction ff_resample_common_apply_filter_x4_s16_neon, export=1 53cabdff1aSopenharmony_ci vmov.s32 q0, #0 @ accumulator 54cabdff1aSopenharmony_ci1: vld1.16 {d2}, [r1]! @ src[0..3] 55cabdff1aSopenharmony_ci vld1.16 {d4}, [r2]! @ filter[0..3] 56cabdff1aSopenharmony_ci vmlal.s16 q0, d2, d4 @ accumulator += src[0..3] * filter[0..3] 57cabdff1aSopenharmony_ci subs r3, #4 @ filter_length -= 4 58cabdff1aSopenharmony_ci bgt 1b @ loop until filter_length 59cabdff1aSopenharmony_ci vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values 60cabdff1aSopenharmony_ci vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values 61cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0] @ write accumulator 62cabdff1aSopenharmony_ci mov pc, lr 63cabdff1aSopenharmony_ciendfunc 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_cifunction ff_resample_common_apply_filter_x8_s16_neon, export=1 66cabdff1aSopenharmony_ci vmov.s32 q0, #0 @ accumulator 67cabdff1aSopenharmony_ci1: vld1.16 {q1}, [r1]! @ src[0..7] 68cabdff1aSopenharmony_ci vld1.16 {q2}, [r2]! @ filter[0..7] 69cabdff1aSopenharmony_ci vmlal.s16 q0, d2, d4 @ accumulator += src[0..3] * filter[0..3] 70cabdff1aSopenharmony_ci vmlal.s16 q0, d3, d5 @ accumulator += src[4..7] * filter[4..7] 71cabdff1aSopenharmony_ci subs r3, #8 @ filter_length -= 8 72cabdff1aSopenharmony_ci bgt 1b @ loop until filter_length 73cabdff1aSopenharmony_ci vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values 74cabdff1aSopenharmony_ci vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values 75cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0] @ write accumulator 76cabdff1aSopenharmony_ci mov pc, lr 77cabdff1aSopenharmony_ciendfunc 78