1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12cabdff1aSopenharmony_ci * Lesser General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ciconst factors, align=4 22cabdff1aSopenharmony_ci .float 1.0, -1.0, 1.0, -1.0 23cabdff1aSopenharmony_ciendconst 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ciconst phi_noise_0, align=4 26cabdff1aSopenharmony_ci .float 1.0, 0.0, 1.0, 0.0 27cabdff1aSopenharmony_ciendconst 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ciconst phi_noise_1, align=4 30cabdff1aSopenharmony_ci .float 0.0, 1.0, 0.0, -1.0 31cabdff1aSopenharmony_ci .float 0.0, -1.0, 0.0, 1.0 32cabdff1aSopenharmony_ciendconst 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ciconst phi_noise_2, align=4 35cabdff1aSopenharmony_ci .float -1.0, 0.0, -1.0, 0.0 36cabdff1aSopenharmony_ciendconst 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ciconst phi_noise_3, align=4 39cabdff1aSopenharmony_ci .float 0.0, -1.0, 0.0, 1.0 40cabdff1aSopenharmony_ci .float 0.0, 1.0, 0.0, -1.0 41cabdff1aSopenharmony_ciendconst 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_cifunction ff_sbr_sum64x5_neon, export=1 44cabdff1aSopenharmony_ci add x1, x0, #64*4 45cabdff1aSopenharmony_ci add x2, x0, #128*4 46cabdff1aSopenharmony_ci add x3, x0, #192*4 47cabdff1aSopenharmony_ci add x4, x0, #256*4 48cabdff1aSopenharmony_ci mov x5, #64 49cabdff1aSopenharmony_ci1: ld1 {v0.4S}, [x0] 50cabdff1aSopenharmony_ci ld1 {v1.4S}, [x1], #16 51cabdff1aSopenharmony_ci fadd v0.4S, v0.4S, v1.4S 52cabdff1aSopenharmony_ci ld1 {v2.4S}, [x2], #16 53cabdff1aSopenharmony_ci fadd v0.4S, v0.4S, v2.4S 54cabdff1aSopenharmony_ci ld1 {v3.4S}, [x3], #16 55cabdff1aSopenharmony_ci fadd v0.4S, v0.4S, v3.4S 56cabdff1aSopenharmony_ci ld1 {v4.4S}, [x4], #16 57cabdff1aSopenharmony_ci fadd v0.4S, v0.4S, v4.4S 58cabdff1aSopenharmony_ci st1 {v0.4S}, [x0], #16 59cabdff1aSopenharmony_ci subs x5, x5, #4 60cabdff1aSopenharmony_ci b.gt 1b 61cabdff1aSopenharmony_ci ret 62cabdff1aSopenharmony_ciendfunc 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_cifunction ff_sbr_sum_square_neon, export=1 65cabdff1aSopenharmony_ci movi v0.4S, #0 66cabdff1aSopenharmony_ci1: ld1 {v1.4S}, [x0], #16 67cabdff1aSopenharmony_ci fmla v0.4S, v1.4S, v1.4S 68cabdff1aSopenharmony_ci subs w1, w1, #2 69cabdff1aSopenharmony_ci b.gt 1b 70cabdff1aSopenharmony_ci faddp v0.4S, v0.4S, v0.4S 71cabdff1aSopenharmony_ci faddp v0.4S, v0.4S, v0.4S 72cabdff1aSopenharmony_ci ret 73cabdff1aSopenharmony_ciendfunc 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_cifunction ff_sbr_neg_odd_64_neon, export=1 76cabdff1aSopenharmony_ci mov x1, x0 77cabdff1aSopenharmony_ci movi v5.4S, #1<<7, lsl #24 78cabdff1aSopenharmony_ci ld2 {v0.4S, v1.4S}, [x0], #32 79cabdff1aSopenharmony_ci eor v1.16B, v1.16B, v5.16B 80cabdff1aSopenharmony_ci ld2 {v2.4S, v3.4S}, [x0], #32 81cabdff1aSopenharmony_ci.rept 3 82cabdff1aSopenharmony_ci st2 {v0.4S, v1.4S}, [x1], #32 83cabdff1aSopenharmony_ci eor v3.16B, v3.16B, v5.16B 84cabdff1aSopenharmony_ci ld2 {v0.4S, v1.4S}, [x0], #32 85cabdff1aSopenharmony_ci st2 {v2.4S, v3.4S}, [x1], #32 86cabdff1aSopenharmony_ci eor v1.16B, v1.16B, v5.16B 87cabdff1aSopenharmony_ci ld2 {v2.4S, v3.4S}, [x0], #32 88cabdff1aSopenharmony_ci.endr 89cabdff1aSopenharmony_ci eor v3.16B, v3.16B, v5.16B 90cabdff1aSopenharmony_ci st2 {v0.4S, v1.4S}, [x1], #32 91cabdff1aSopenharmony_ci st2 {v2.4S, v3.4S}, [x1], #32 92cabdff1aSopenharmony_ci ret 93cabdff1aSopenharmony_ciendfunc 94cabdff1aSopenharmony_ci 95cabdff1aSopenharmony_cifunction ff_sbr_qmf_pre_shuffle_neon, export=1 96cabdff1aSopenharmony_ci add x1, x0, #60*4 97cabdff1aSopenharmony_ci add x2, x0, #64*4 98cabdff1aSopenharmony_ci mov x3, #-16 99cabdff1aSopenharmony_ci mov x4, #-4 100cabdff1aSopenharmony_ci movi v6.4S, #1<<7, lsl #24 101cabdff1aSopenharmony_ci ld1 {v0.2S}, [x0], #8 102cabdff1aSopenharmony_ci st1 {v0.2S}, [x2], #8 103cabdff1aSopenharmony_ci.rept 7 104cabdff1aSopenharmony_ci ld1 {v1.4S}, [x1], x3 105cabdff1aSopenharmony_ci ld1 {v2.4S}, [x0], #16 106cabdff1aSopenharmony_ci eor v1.16B, v1.16B, v6.16B 107cabdff1aSopenharmony_ci rev64 v1.4S, v1.4S 108cabdff1aSopenharmony_ci ext v1.16B, v1.16B, v1.16B, #8 109cabdff1aSopenharmony_ci st2 {v1.4S, v2.4S}, [x2], #32 110cabdff1aSopenharmony_ci.endr 111cabdff1aSopenharmony_ci add x1, x1, #8 112cabdff1aSopenharmony_ci ld1 {v1.2S}, [x1], x4 113cabdff1aSopenharmony_ci ld1 {v2.2S}, [x0], #8 114cabdff1aSopenharmony_ci ld1 {v1.S}[3], [x1] 115cabdff1aSopenharmony_ci ld1 {v2.S}[2], [x0] 116cabdff1aSopenharmony_ci eor v1.16B, v1.16B, v6.16B 117cabdff1aSopenharmony_ci rev64 v1.4S, v1.4S 118cabdff1aSopenharmony_ci st2 {v1.2S, v2.2S}, [x2], #16 119cabdff1aSopenharmony_ci st2 {v1.S, v2.S}[2], [x2] 120cabdff1aSopenharmony_ci ret 121cabdff1aSopenharmony_ciendfunc 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_cifunction ff_sbr_qmf_post_shuffle_neon, export=1 124cabdff1aSopenharmony_ci add x2, x1, #60*4 125cabdff1aSopenharmony_ci mov x3, #-16 126cabdff1aSopenharmony_ci mov x4, #32 127cabdff1aSopenharmony_ci movi v6.4S, #1<<7, lsl #24 128cabdff1aSopenharmony_ci1: ld1 {v0.4S}, [x2], x3 129cabdff1aSopenharmony_ci ld1 {v1.4S}, [x1], #16 130cabdff1aSopenharmony_ci eor v0.16B, v0.16B, v6.16B 131cabdff1aSopenharmony_ci rev64 v0.4S, v0.4S 132cabdff1aSopenharmony_ci ext v0.16B, v0.16B, v0.16B, #8 133cabdff1aSopenharmony_ci st2 {v0.4S, v1.4S}, [x0], #32 134cabdff1aSopenharmony_ci subs x4, x4, #4 135cabdff1aSopenharmony_ci b.gt 1b 136cabdff1aSopenharmony_ci ret 137cabdff1aSopenharmony_ciendfunc 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_cifunction ff_sbr_qmf_deint_neg_neon, export=1 140cabdff1aSopenharmony_ci add x1, x1, #56*4 141cabdff1aSopenharmony_ci add x2, x0, #60*4 142cabdff1aSopenharmony_ci mov x3, #-32 143cabdff1aSopenharmony_ci mov x4, #32 144cabdff1aSopenharmony_ci movi v2.4S, #1<<7, lsl #24 145cabdff1aSopenharmony_ci1: ld2 {v0.4S, v1.4S}, [x1], x3 146cabdff1aSopenharmony_ci eor v0.16B, v0.16B, v2.16B 147cabdff1aSopenharmony_ci rev64 v1.4S, v1.4S 148cabdff1aSopenharmony_ci ext v1.16B, v1.16B, v1.16B, #8 149cabdff1aSopenharmony_ci st1 {v0.4S}, [x2] 150cabdff1aSopenharmony_ci st1 {v1.4S}, [x0], #16 151cabdff1aSopenharmony_ci sub x2, x2, #16 152cabdff1aSopenharmony_ci subs x4, x4, #4 153cabdff1aSopenharmony_ci b.gt 1b 154cabdff1aSopenharmony_ci ret 155cabdff1aSopenharmony_ciendfunc 156cabdff1aSopenharmony_ci 157cabdff1aSopenharmony_cifunction ff_sbr_qmf_deint_bfly_neon, export=1 158cabdff1aSopenharmony_ci add x2, x2, #60*4 159cabdff1aSopenharmony_ci add x3, x0, #124*4 160cabdff1aSopenharmony_ci mov x4, #64 161cabdff1aSopenharmony_ci mov x5, #-16 162cabdff1aSopenharmony_ci1: ld1 {v0.4S}, [x1], #16 163cabdff1aSopenharmony_ci ld1 {v1.4S}, [x2], x5 164cabdff1aSopenharmony_ci rev64 v2.4S, v0.4S 165cabdff1aSopenharmony_ci ext v2.16B, v2.16B, v2.16B, #8 166cabdff1aSopenharmony_ci rev64 v3.4S, v1.4S 167cabdff1aSopenharmony_ci ext v3.16B, v3.16B, v3.16B, #8 168cabdff1aSopenharmony_ci fadd v1.4S, v1.4S, v2.4S 169cabdff1aSopenharmony_ci fsub v0.4S, v0.4S, v3.4S 170cabdff1aSopenharmony_ci st1 {v0.4S}, [x0], #16 171cabdff1aSopenharmony_ci st1 {v1.4S}, [x3], x5 172cabdff1aSopenharmony_ci subs x4, x4, #4 173cabdff1aSopenharmony_ci b.gt 1b 174cabdff1aSopenharmony_ci ret 175cabdff1aSopenharmony_ciendfunc 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_cifunction ff_sbr_hf_gen_neon, export=1 178cabdff1aSopenharmony_ci sxtw x4, w4 179cabdff1aSopenharmony_ci sxtw x5, w5 180cabdff1aSopenharmony_ci movrel x6, factors 181cabdff1aSopenharmony_ci ld1 {v7.4S}, [x6] 182cabdff1aSopenharmony_ci dup v1.4S, v0.S[0] 183cabdff1aSopenharmony_ci mov v2.8B, v1.8B 184cabdff1aSopenharmony_ci mov v2.S[2], v7.S[0] 185cabdff1aSopenharmony_ci mov v2.S[3], v7.S[0] 186cabdff1aSopenharmony_ci fmul v1.4S, v1.4S, v2.4S 187cabdff1aSopenharmony_ci ld1 {v0.D}[0], [x3] 188cabdff1aSopenharmony_ci ld1 {v0.D}[1], [x2] 189cabdff1aSopenharmony_ci fmul v0.4S, v0.4S, v1.4S 190cabdff1aSopenharmony_ci fmul v1.4S, v0.4S, v7.4S 191cabdff1aSopenharmony_ci rev64 v0.4S, v0.4S 192cabdff1aSopenharmony_ci sub x7, x5, x4 193cabdff1aSopenharmony_ci add x0, x0, x4, lsl #3 194cabdff1aSopenharmony_ci add x1, x1, x4, lsl #3 195cabdff1aSopenharmony_ci sub x1, x1, #16 196cabdff1aSopenharmony_ci1: ld1 {v2.4S}, [x1], #16 197cabdff1aSopenharmony_ci ld1 {v3.2S}, [x1] 198cabdff1aSopenharmony_ci fmul v4.4S, v2.4S, v1.4S 199cabdff1aSopenharmony_ci fmul v5.4S, v2.4S, v0.4S 200cabdff1aSopenharmony_ci faddp v4.4S, v4.4S, v4.4S 201cabdff1aSopenharmony_ci faddp v5.4S, v5.4S, v5.4S 202cabdff1aSopenharmony_ci faddp v4.4S, v4.4S, v4.4S 203cabdff1aSopenharmony_ci faddp v5.4S, v5.4S, v5.4S 204cabdff1aSopenharmony_ci mov v4.S[1], v5.S[0] 205cabdff1aSopenharmony_ci fadd v4.2S, v4.2S, v3.2S 206cabdff1aSopenharmony_ci st1 {v4.2S}, [x0], #8 207cabdff1aSopenharmony_ci sub x1, x1, #8 208cabdff1aSopenharmony_ci subs x7, x7, #1 209cabdff1aSopenharmony_ci b.gt 1b 210cabdff1aSopenharmony_ci ret 211cabdff1aSopenharmony_ciendfunc 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_cifunction ff_sbr_hf_g_filt_neon, export=1 214cabdff1aSopenharmony_ci sxtw x3, w3 215cabdff1aSopenharmony_ci sxtw x4, w4 216cabdff1aSopenharmony_ci mov x5, #40*2*4 217cabdff1aSopenharmony_ci add x1, x1, x4, lsl #3 218cabdff1aSopenharmony_ci1: ld1 {v0.2S}, [x1], x5 219cabdff1aSopenharmony_ci ld1 {v1.S}[0], [x2], #4 220cabdff1aSopenharmony_ci fmul v2.4S, v0.4S, v1.S[0] 221cabdff1aSopenharmony_ci st1 {v2.2S}, [x0], #8 222cabdff1aSopenharmony_ci subs x3, x3, #1 223cabdff1aSopenharmony_ci b.gt 1b 224cabdff1aSopenharmony_ci ret 225cabdff1aSopenharmony_ciendfunc 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_cifunction ff_sbr_autocorrelate_neon, export=1 228cabdff1aSopenharmony_ci mov x2, #38 229cabdff1aSopenharmony_ci movrel x3, factors 230cabdff1aSopenharmony_ci ld1 {v0.4S}, [x3] 231cabdff1aSopenharmony_ci movi v1.4S, #0 232cabdff1aSopenharmony_ci movi v2.4S, #0 233cabdff1aSopenharmony_ci movi v3.4S, #0 234cabdff1aSopenharmony_ci ld1 {v4.2S}, [x0], #8 235cabdff1aSopenharmony_ci ld1 {v5.2S}, [x0], #8 236cabdff1aSopenharmony_ci fmul v16.2S, v4.2S, v4.2S 237cabdff1aSopenharmony_ci fmul v17.2S, v5.2S, v4.S[0] 238cabdff1aSopenharmony_ci fmul v18.2S, v5.2S, v4.S[1] 239cabdff1aSopenharmony_ci1: ld1 {v5.D}[1], [x0], #8 240cabdff1aSopenharmony_ci fmla v1.2S, v4.2S, v4.2S 241cabdff1aSopenharmony_ci fmla v2.4S, v5.4S, v4.S[0] 242cabdff1aSopenharmony_ci fmla v3.4S, v5.4S, v4.S[1] 243cabdff1aSopenharmony_ci mov v4.D[0], v5.D[0] 244cabdff1aSopenharmony_ci mov v5.D[0], v5.D[1] 245cabdff1aSopenharmony_ci subs x2, x2, #1 246cabdff1aSopenharmony_ci b.gt 1b 247cabdff1aSopenharmony_ci fmul v19.2S, v4.2S, v4.2S 248cabdff1aSopenharmony_ci fmul v20.2S, v5.2S, v4.S[0] 249cabdff1aSopenharmony_ci fmul v21.2S, v5.2S, v4.S[1] 250cabdff1aSopenharmony_ci fadd v22.4S, v2.4S, v20.4S 251cabdff1aSopenharmony_ci fsub v22.4S, v22.4S, v17.4S 252cabdff1aSopenharmony_ci fadd v23.4S, v3.4S, v21.4S 253cabdff1aSopenharmony_ci fsub v23.4S, v23.4S, v18.4S 254cabdff1aSopenharmony_ci rev64 v23.4S, v23.4S 255cabdff1aSopenharmony_ci fmul v23.4S, v23.4S, v0.4S 256cabdff1aSopenharmony_ci fadd v22.4S, v22.4S, v23.4S 257cabdff1aSopenharmony_ci st1 {v22.4S}, [x1], #16 258cabdff1aSopenharmony_ci fadd v23.2S, v1.2S, v19.2S 259cabdff1aSopenharmony_ci fsub v23.2S, v23.2S, v16.2S 260cabdff1aSopenharmony_ci faddp v23.2S, v23.2S, v23.2S 261cabdff1aSopenharmony_ci st1 {v23.S}[0], [x1] 262cabdff1aSopenharmony_ci add x1, x1, #8 263cabdff1aSopenharmony_ci rev64 v3.2S, v3.2S 264cabdff1aSopenharmony_ci fmul v3.2S, v3.2S, v0.2S 265cabdff1aSopenharmony_ci fadd v2.2S, v2.2S, v3.2S 266cabdff1aSopenharmony_ci st1 {v2.2S}, [x1] 267cabdff1aSopenharmony_ci add x1, x1, #16 268cabdff1aSopenharmony_ci faddp v1.2S, v1.2S, v1.2S 269cabdff1aSopenharmony_ci st1 {v1.S}[0], [x1] 270cabdff1aSopenharmony_ci ret 271cabdff1aSopenharmony_ciendfunc 272cabdff1aSopenharmony_ci 273cabdff1aSopenharmony_ci.macro apply_noise_common 274cabdff1aSopenharmony_ci sxtw x3, w3 275cabdff1aSopenharmony_ci sxtw x5, w5 276cabdff1aSopenharmony_ci movrel x7, X(ff_sbr_noise_table) 277cabdff1aSopenharmony_ci add x3, x3, #1 278cabdff1aSopenharmony_ci1: and x3, x3, #0x1ff 279cabdff1aSopenharmony_ci add x8, x7, x3, lsl #3 280cabdff1aSopenharmony_ci add x3, x3, #2 281cabdff1aSopenharmony_ci ld1 {v2.4S}, [x0] 282cabdff1aSopenharmony_ci ld1 {v3.2S}, [x1], #8 283cabdff1aSopenharmony_ci ld1 {v4.2S}, [x2], #8 284cabdff1aSopenharmony_ci ld1 {v5.4S}, [x8] 285cabdff1aSopenharmony_ci mov v6.16B, v2.16B 286cabdff1aSopenharmony_ci zip1 v3.4S, v3.4S, v3.4S 287cabdff1aSopenharmony_ci zip1 v4.4S, v4.4S, v4.4S 288cabdff1aSopenharmony_ci fmla v6.4S, v1.4S, v3.4S 289cabdff1aSopenharmony_ci fmla v2.4S, v5.4S, v4.4S 290cabdff1aSopenharmony_ci fcmeq v7.4S, v3.4S, #0 291cabdff1aSopenharmony_ci bif v2.16B, v6.16B, v7.16B 292cabdff1aSopenharmony_ci st1 {v2.4S}, [x0], #16 293cabdff1aSopenharmony_ci subs x5, x5, #2 294cabdff1aSopenharmony_ci b.gt 1b 295cabdff1aSopenharmony_ci.endm 296cabdff1aSopenharmony_ci 297cabdff1aSopenharmony_cifunction ff_sbr_hf_apply_noise_0_neon, export=1 298cabdff1aSopenharmony_ci movrel x9, phi_noise_0 299cabdff1aSopenharmony_ci ld1 {v1.4S}, [x9] 300cabdff1aSopenharmony_ci apply_noise_common 301cabdff1aSopenharmony_ci ret 302cabdff1aSopenharmony_ciendfunc 303cabdff1aSopenharmony_ci 304cabdff1aSopenharmony_cifunction ff_sbr_hf_apply_noise_1_neon, export=1 305cabdff1aSopenharmony_ci movrel x9, phi_noise_1 306cabdff1aSopenharmony_ci and x4, x4, #1 307cabdff1aSopenharmony_ci add x9, x9, x4, lsl #4 308cabdff1aSopenharmony_ci ld1 {v1.4S}, [x9] 309cabdff1aSopenharmony_ci apply_noise_common 310cabdff1aSopenharmony_ci ret 311cabdff1aSopenharmony_ciendfunc 312cabdff1aSopenharmony_ci 313cabdff1aSopenharmony_cifunction ff_sbr_hf_apply_noise_2_neon, export=1 314cabdff1aSopenharmony_ci movrel x9, phi_noise_2 315cabdff1aSopenharmony_ci ld1 {v1.4S}, [x9] 316cabdff1aSopenharmony_ci apply_noise_common 317cabdff1aSopenharmony_ci ret 318cabdff1aSopenharmony_ciendfunc 319cabdff1aSopenharmony_ci 320cabdff1aSopenharmony_cifunction ff_sbr_hf_apply_noise_3_neon, export=1 321cabdff1aSopenharmony_ci movrel x9, phi_noise_3 322cabdff1aSopenharmony_ci and x4, x4, #1 323cabdff1aSopenharmony_ci add x9, x9, x4, lsl #4 324cabdff1aSopenharmony_ci ld1 {v1.4S}, [x9] 325cabdff1aSopenharmony_ci apply_noise_common 326cabdff1aSopenharmony_ci ret 327cabdff1aSopenharmony_ciendfunc 328