1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 22cabdff1aSopenharmony_ci#include "asm-offsets.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci.macro resample_one fmt, es=2 25cabdff1aSopenharmony_ci.ifnc \fmt, dbl 26cabdff1aSopenharmony_ci .macro M_MUL2 x:vararg 27cabdff1aSopenharmony_ci .endm 28cabdff1aSopenharmony_ci .macro M_MLA2 x:vararg 29cabdff1aSopenharmony_ci .endm 30cabdff1aSopenharmony_ci.endif 31cabdff1aSopenharmony_cifunction ff_resample_one_\fmt\()_neon, export=1 32cabdff1aSopenharmony_ci sxtw x2, w2 33cabdff1aSopenharmony_ci ldr x9, [x0, #FILTER_BANK] 34cabdff1aSopenharmony_ci ldr w6, [x0, #FILTER_LENGTH] 35cabdff1aSopenharmony_ci ldp w7, w8, [x0, #PHASE_SHIFT] // and phase_mask 36cabdff1aSopenharmony_ci lsr x10, x4, x7 // sample_index 37cabdff1aSopenharmony_ci and x4, x4, x8 38cabdff1aSopenharmony_ci lsl x11, x6, #\es // filter_length * elem_size 39cabdff1aSopenharmony_ci add x3, x3, x10, lsl #\es // src[sample_index] 40cabdff1aSopenharmony_ci madd x9, x11, x4, x9 // filter 41cabdff1aSopenharmony_ci cmp w6, #16 42cabdff1aSopenharmony_ci b.lt 5f 43cabdff1aSopenharmony_ci8: // remaining filter_length at least 16 44cabdff1aSopenharmony_ci subs w6, w6, #16 45cabdff1aSopenharmony_ci LOAD8 v4, v5, v6, v7, x3 46cabdff1aSopenharmony_ci LOAD8 v16, v17, v18, v19, x9 47cabdff1aSopenharmony_ci M_MUL v0, v4, v16, v1 48cabdff1aSopenharmony_ci M_MUL2 v1, v6, v18 49cabdff1aSopenharmony_ci7: 50cabdff1aSopenharmony_ci LOAD8 v20, v21, v22, v23, x3 51cabdff1aSopenharmony_ci M_MLA v0, v5, v17, v1 52cabdff1aSopenharmony_ci M_MLA2 v1, v7, v19 53cabdff1aSopenharmony_ci LOAD8 v24, v25, v26, v27, x9 54cabdff1aSopenharmony_ci M_MLA v0, v20, v24, v1 55cabdff1aSopenharmony_ci M_MLA2 v1, v22, v26 56cabdff1aSopenharmony_ci b.eq 6f 57cabdff1aSopenharmony_ci cmp w6, #16 58cabdff1aSopenharmony_ci M_MLA v0, v21, v25, v1 59cabdff1aSopenharmony_ci M_MLA2 v1, v23, v27 60cabdff1aSopenharmony_ci b.lt 4f 61cabdff1aSopenharmony_ci subs w6, w6, #16 62cabdff1aSopenharmony_ci LOAD8 v4, v5, v6, v7, x3 63cabdff1aSopenharmony_ci LOAD8 v16, v17, v18, v19, x9 64cabdff1aSopenharmony_ci M_MLA v0, v4, v16, v1 65cabdff1aSopenharmony_ci M_MLA2 v1, v6, v18 66cabdff1aSopenharmony_ci b 7b 67cabdff1aSopenharmony_ci6: 68cabdff1aSopenharmony_ci M_MLA v0, v21, v25, v1 69cabdff1aSopenharmony_ci M_MLA2 v1, v23, v27 70cabdff1aSopenharmony_ci STORE_ONE 0, x1, x2, v1 71cabdff1aSopenharmony_ci ret 72cabdff1aSopenharmony_ci5: 73cabdff1aSopenharmony_ci movi v0.16b, #0 74cabdff1aSopenharmony_ci movi v1.16b, #0 75cabdff1aSopenharmony_ci4: // remaining filter_length 1-15 76cabdff1aSopenharmony_ci cmp w6, #4 77cabdff1aSopenharmony_ci b.lt 2f 78cabdff1aSopenharmony_ci subs w6, w6, #4 79cabdff1aSopenharmony_ci LOAD4 v4, v5, x3 80cabdff1aSopenharmony_ci LOAD4 v6, v7, x9 81cabdff1aSopenharmony_ci M_MLA v0, v4, v6, v1 82cabdff1aSopenharmony_ci M_MLA2 v1, v5, v7 83cabdff1aSopenharmony_ci b.eq 0f 84cabdff1aSopenharmony_ci b 4b 85cabdff1aSopenharmony_ci2: // remaining filter_length 1-3 86cabdff1aSopenharmony_ci cmp w6, #2 87cabdff1aSopenharmony_ci b.lt 1f 88cabdff1aSopenharmony_ci LOAD2 2, x3 89cabdff1aSopenharmony_ci LOAD2 3, x9 90cabdff1aSopenharmony_ci subs w6, w6, #2 91cabdff1aSopenharmony_ci M_MLA v0, v2, v3 92cabdff1aSopenharmony_ci b.eq 0f 93cabdff1aSopenharmony_ci1: // remaining filter_length 1 94cabdff1aSopenharmony_ci LOAD1 6, x3 95cabdff1aSopenharmony_ci LOAD1 7, x9 96cabdff1aSopenharmony_ci M_MLA v0, v6, v7 97cabdff1aSopenharmony_ci0: 98cabdff1aSopenharmony_ci STORE_ONE 0, x1, x2, v1 99cabdff1aSopenharmony_ci ret 100cabdff1aSopenharmony_ciendfunc 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci.purgem LOAD1 103cabdff1aSopenharmony_ci.purgem LOAD2 104cabdff1aSopenharmony_ci.purgem LOAD4 105cabdff1aSopenharmony_ci.purgem LOAD8 106cabdff1aSopenharmony_ci.purgem M_MLA 107cabdff1aSopenharmony_ci.purgem M_MLA2 108cabdff1aSopenharmony_ci.purgem M_MUL 109cabdff1aSopenharmony_ci.purgem M_MUL2 110cabdff1aSopenharmony_ci.purgem STORE_ONE 111cabdff1aSopenharmony_ci.endm 112cabdff1aSopenharmony_ci 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci.macro LOAD1 d1, addr 115cabdff1aSopenharmony_ci ldr d\d1, [\addr], #8 116cabdff1aSopenharmony_ci.endm 117cabdff1aSopenharmony_ci.macro LOAD2 d1, addr 118cabdff1aSopenharmony_ci ld1 {v\d1\().2d}, [\addr], #16 119cabdff1aSopenharmony_ci.endm 120cabdff1aSopenharmony_ci.macro LOAD4 d1, d2, addr 121cabdff1aSopenharmony_ci ld1 {\d1\().2d,\d2\().2d}, [\addr], #32 122cabdff1aSopenharmony_ci.endm 123cabdff1aSopenharmony_ci.macro LOAD8 d1, d2, d3, d4, addr 124cabdff1aSopenharmony_ci ld1 {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64 125cabdff1aSopenharmony_ci.endm 126cabdff1aSopenharmony_ci.macro M_MLA d, r0, r1, d2:vararg 127cabdff1aSopenharmony_ci fmla \d\().2d, \r0\().2d, \r1\().2d 128cabdff1aSopenharmony_ci.endm 129cabdff1aSopenharmony_ci.macro M_MLA2 second:vararg 130cabdff1aSopenharmony_ci M_MLA \second 131cabdff1aSopenharmony_ci.endm 132cabdff1aSopenharmony_ci.macro M_MUL d, r0, r1, d2:vararg 133cabdff1aSopenharmony_ci fmul \d\().2d, \r0\().2d, \r1\().2d 134cabdff1aSopenharmony_ci.endm 135cabdff1aSopenharmony_ci.macro M_MUL2 second:vararg 136cabdff1aSopenharmony_ci M_MUL \second 137cabdff1aSopenharmony_ci.endm 138cabdff1aSopenharmony_ci.macro STORE_ONE rn, addr, idx, d2 139cabdff1aSopenharmony_ci fadd v\rn\().2d, v\rn\().2d, \d2\().2d 140cabdff1aSopenharmony_ci faddp d\rn\(), v\rn\().2d 141cabdff1aSopenharmony_ci str d\rn\(), [\addr, \idx, lsl #3] 142cabdff1aSopenharmony_ci.endm 143cabdff1aSopenharmony_ci 144cabdff1aSopenharmony_ciresample_one dbl, 3 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_ci.macro LOAD1 d1, addr 148cabdff1aSopenharmony_ci ldr s\d1, [\addr], #4 149cabdff1aSopenharmony_ci.endm 150cabdff1aSopenharmony_ci.macro LOAD2 d1, addr 151cabdff1aSopenharmony_ci ld1 {v\d1\().2s}, [\addr], #8 152cabdff1aSopenharmony_ci.endm 153cabdff1aSopenharmony_ci.macro LOAD4 d1, d2, addr 154cabdff1aSopenharmony_ci ld1 {\d1\().4s}, [\addr], #16 155cabdff1aSopenharmony_ci.endm 156cabdff1aSopenharmony_ci.macro LOAD8 d1, d2, d3, d4, addr 157cabdff1aSopenharmony_ci ld1 {\d1\().4s,\d2\().4s}, [\addr], #32 158cabdff1aSopenharmony_ci.endm 159cabdff1aSopenharmony_ci.macro M_MLA d, r0, r1, d2:vararg 160cabdff1aSopenharmony_ci fmla \d\().4s, \r0\().4s, \r1\().4s 161cabdff1aSopenharmony_ci.endm 162cabdff1aSopenharmony_ci.macro M_MUL d, r0, r1, d2:vararg 163cabdff1aSopenharmony_ci fmul \d\().4s, \r0\().4s, \r1\().4s 164cabdff1aSopenharmony_ci.endm 165cabdff1aSopenharmony_ci.macro STORE_ONE rn, addr, idx, d2 166cabdff1aSopenharmony_ci faddp v\rn\().4s, v\rn\().4s, v\rn\().4s 167cabdff1aSopenharmony_ci faddp s\rn\(), v\rn\().2s 168cabdff1aSopenharmony_ci str s\rn\(), [\addr, \idx, lsl #2] 169cabdff1aSopenharmony_ci.endm 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ciresample_one flt 172cabdff1aSopenharmony_ci 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci.macro LOAD1 d1, addr 175cabdff1aSopenharmony_ci ldr h\d1, [\addr], #2 176cabdff1aSopenharmony_ci.endm 177cabdff1aSopenharmony_ci.macro LOAD2 d1, addr 178cabdff1aSopenharmony_ci ldr s\d1, [\addr], #4 179cabdff1aSopenharmony_ci.endm 180cabdff1aSopenharmony_ci.macro LOAD4 d1, d2, addr 181cabdff1aSopenharmony_ci ld1 {\d1\().4h}, [\addr], #8 182cabdff1aSopenharmony_ci.endm 183cabdff1aSopenharmony_ci.macro LOAD8 d1, d2, d3, d4, addr 184cabdff1aSopenharmony_ci ld1 {\d1\().4h,\d2\().4h}, [\addr], #16 185cabdff1aSopenharmony_ci.endm 186cabdff1aSopenharmony_ci.macro M_MLA d, r0, r1, d2:vararg 187cabdff1aSopenharmony_ci smlal \d\().4s, \r0\().4h, \r1\().4h 188cabdff1aSopenharmony_ci.endm 189cabdff1aSopenharmony_ci.macro M_MUL d, r0, r1, d2:vararg 190cabdff1aSopenharmony_ci smull \d\().4s, \r0\().4h, \r1\().4h 191cabdff1aSopenharmony_ci.endm 192cabdff1aSopenharmony_ci.macro STORE_ONE rn, addr, idx, d2 193cabdff1aSopenharmony_ci addp v\rn\().4s, v\rn\().4s, v\rn\().4s 194cabdff1aSopenharmony_ci addp v\rn\().4s, v\rn\().4s, v\rn\().4s 195cabdff1aSopenharmony_ci sqrshrn v\rn\().4h, v\rn\().4s, #15 196cabdff1aSopenharmony_ci str h\rn\(), [\addr, \idx, lsl #1] 197cabdff1aSopenharmony_ci.endm 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ciresample_one s16, 1 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_ci 202cabdff1aSopenharmony_ci.macro LOAD1 d1, addr 203cabdff1aSopenharmony_ci ldr s\d1, [\addr], #4 204cabdff1aSopenharmony_ci.endm 205cabdff1aSopenharmony_ci.macro LOAD2 d1, addr 206cabdff1aSopenharmony_ci ld1 {v\d1\().2s}, [\addr], #8 207cabdff1aSopenharmony_ci.endm 208cabdff1aSopenharmony_ci.macro LOAD4 d1, d2, addr 209cabdff1aSopenharmony_ci ld1 {\d1\().4s}, [\addr], #16 210cabdff1aSopenharmony_ci.endm 211cabdff1aSopenharmony_ci.macro LOAD8 d1, d2, d3, d4, addr 212cabdff1aSopenharmony_ci ld1 {\d1\().4s,\d2\().4s}, [\addr], #32 213cabdff1aSopenharmony_ci.endm 214cabdff1aSopenharmony_ci.macro M_MLA d1, r0, r1, d2:vararg 215cabdff1aSopenharmony_ci smlal \d1\().2d, \r0\().2s, \r1\().2s 216cabdff1aSopenharmony_ci.ifnb \d2 217cabdff1aSopenharmony_ci smlal2 \d2\().2d, \r0\().4s, \r1\().4s 218cabdff1aSopenharmony_ci.endif 219cabdff1aSopenharmony_ci.endm 220cabdff1aSopenharmony_ci.macro M_MUL d1, r0, r1, d2:vararg 221cabdff1aSopenharmony_ci smull \d1\().2d, \r0\().2s, \r1\().2s 222cabdff1aSopenharmony_ci.ifnb \d2 223cabdff1aSopenharmony_ci smull2 \d2\().2d, \r0\().4s, \r1\().4s 224cabdff1aSopenharmony_ci.endif 225cabdff1aSopenharmony_ci.endm 226cabdff1aSopenharmony_ci.macro STORE_ONE rn, addr, idx, d2 227cabdff1aSopenharmony_ci add v\rn\().2d, v\rn\().2d, \d2\().2d 228cabdff1aSopenharmony_ci addp d\rn\(), v\rn\().2d 229cabdff1aSopenharmony_ci sqrshrn v\rn\().2s, v\rn\().2d, #30 230cabdff1aSopenharmony_ci str s\rn\(), [\addr, \idx, lsl #2] 231cabdff1aSopenharmony_ci.endm 232cabdff1aSopenharmony_ci 233cabdff1aSopenharmony_ciresample_one s32 234