1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "asm-offsets.h" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci.macro resample_one fmt, es=2 26cabdff1aSopenharmony_cifunction ff_resample_one_\fmt\()_neon, export=1 27cabdff1aSopenharmony_ci push {r4, r5} 28cabdff1aSopenharmony_ci add r1, r1, r2, lsl #\es 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */ 31cabdff1aSopenharmony_ci ldr ip, [sp, #8] /* index */ 32cabdff1aSopenharmony_ci ldr r5, [r0, #FILTER_LENGTH] 33cabdff1aSopenharmony_ci and r2, ip, r2 /* (index & phase_mask) */ 34cabdff1aSopenharmony_ci ldr r4, [r0, #PHASE_SHIFT] 35cabdff1aSopenharmony_ci lsr r4, ip, r4 /* compute sample_index */ 36cabdff1aSopenharmony_ci mul r2, r2, r5 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci ldr ip, [r0, #FILTER_BANK] 39cabdff1aSopenharmony_ci add r3, r3, r4, lsl #\es /* &src[sample_index] */ 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci cmp r5, #8 42cabdff1aSopenharmony_ci add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */ 43cabdff1aSopenharmony_ci 44cabdff1aSopenharmony_ci blt 5f 45cabdff1aSopenharmony_ci8: 46cabdff1aSopenharmony_ci subs r5, r5, #8 47cabdff1aSopenharmony_ci LOAD4 48cabdff1aSopenharmony_ci MUL4 49cabdff1aSopenharmony_ci7: 50cabdff1aSopenharmony_ci LOAD4 51cabdff1aSopenharmony_ci beq 6f 52cabdff1aSopenharmony_ci cmp r5, #8 53cabdff1aSopenharmony_ci MLA4 54cabdff1aSopenharmony_ci blt 4f 55cabdff1aSopenharmony_ci subs r5, r5, #8 56cabdff1aSopenharmony_ci LOAD4 57cabdff1aSopenharmony_ci MLA4 58cabdff1aSopenharmony_ci b 7b 59cabdff1aSopenharmony_ci6: 60cabdff1aSopenharmony_ci MLA4 61cabdff1aSopenharmony_ci STORE 62cabdff1aSopenharmony_ci pop {r4, r5} 63cabdff1aSopenharmony_ci bx lr 64cabdff1aSopenharmony_ci5: 65cabdff1aSopenharmony_ci INIT4 66cabdff1aSopenharmony_ci4: /* remaining filter_length 1 to 7 */ 67cabdff1aSopenharmony_ci cmp r5, #4 68cabdff1aSopenharmony_ci blt 2f 69cabdff1aSopenharmony_ci subs r5, r5, #4 70cabdff1aSopenharmony_ci LOAD4 71cabdff1aSopenharmony_ci MLA4 72cabdff1aSopenharmony_ci beq 0f 73cabdff1aSopenharmony_ci2: /* remaining filter_length 1 to 3 */ 74cabdff1aSopenharmony_ci cmp r5, #2 75cabdff1aSopenharmony_ci blt 1f 76cabdff1aSopenharmony_ci subs r5, r5, #2 77cabdff1aSopenharmony_ci LOAD2 78cabdff1aSopenharmony_ci MLA2 79cabdff1aSopenharmony_ci beq 0f 80cabdff1aSopenharmony_ci1: /* remaining filter_length 1 */ 81cabdff1aSopenharmony_ci LOAD1 82cabdff1aSopenharmony_ci MLA1 83cabdff1aSopenharmony_ci0: 84cabdff1aSopenharmony_ci STORE 85cabdff1aSopenharmony_ci pop {r4, r5} 86cabdff1aSopenharmony_ci bx lr 87cabdff1aSopenharmony_ciendfunc 88cabdff1aSopenharmony_ci 89cabdff1aSopenharmony_ci.purgem LOAD1 90cabdff1aSopenharmony_ci.purgem LOAD2 91cabdff1aSopenharmony_ci.purgem LOAD4 92cabdff1aSopenharmony_ci.purgem MLA1 93cabdff1aSopenharmony_ci.purgem MLA2 94cabdff1aSopenharmony_ci.purgem MLA4 95cabdff1aSopenharmony_ci.purgem MUL4 96cabdff1aSopenharmony_ci.purgem INIT4 97cabdff1aSopenharmony_ci.purgem STORE 98cabdff1aSopenharmony_ci.endm 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci/* float32 */ 102cabdff1aSopenharmony_ci.macro LOAD1 103cabdff1aSopenharmony_ci veor.32 d0, d0 104cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0]! /* load filter */ 105cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r3]! /* load src */ 106cabdff1aSopenharmony_ci.endm 107cabdff1aSopenharmony_ci.macro LOAD2 108cabdff1aSopenharmony_ci vld1.32 {d0}, [r0]! /* load filter */ 109cabdff1aSopenharmony_ci vld1.32 {d4}, [r3]! /* load src */ 110cabdff1aSopenharmony_ci.endm 111cabdff1aSopenharmony_ci.macro LOAD4 112cabdff1aSopenharmony_ci vld1.32 {d0,d1}, [r0]! /* load filter */ 113cabdff1aSopenharmony_ci vld1.32 {d4,d5}, [r3]! /* load src */ 114cabdff1aSopenharmony_ci.endm 115cabdff1aSopenharmony_ci.macro MLA1 116cabdff1aSopenharmony_ci vmla.f32 d16, d0, d4[0] 117cabdff1aSopenharmony_ci.endm 118cabdff1aSopenharmony_ci.macro MLA2 119cabdff1aSopenharmony_ci vmla.f32 d16, d0, d4 120cabdff1aSopenharmony_ci.endm 121cabdff1aSopenharmony_ci.macro MLA4 122cabdff1aSopenharmony_ci vmla.f32 d16, d0, d4 123cabdff1aSopenharmony_ci vmla.f32 d17, d1, d5 124cabdff1aSopenharmony_ci.endm 125cabdff1aSopenharmony_ci.macro MUL4 126cabdff1aSopenharmony_ci vmul.f32 d16, d0, d4 127cabdff1aSopenharmony_ci vmul.f32 d17, d1, d5 128cabdff1aSopenharmony_ci.endm 129cabdff1aSopenharmony_ci.macro INIT4 130cabdff1aSopenharmony_ci veor.f32 q8, q8 131cabdff1aSopenharmony_ci.endm 132cabdff1aSopenharmony_ci.macro STORE 133cabdff1aSopenharmony_ci vpadd.f32 d16, d16, d17 134cabdff1aSopenharmony_ci vpadd.f32 d16, d16, d16 135cabdff1aSopenharmony_ci vst1.32 d16[0], [r1] 136cabdff1aSopenharmony_ci.endm 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ciresample_one flt, 2 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_ci 141cabdff1aSopenharmony_ci/* s32 */ 142cabdff1aSopenharmony_ci.macro LOAD1 143cabdff1aSopenharmony_ci veor.32 d0, d0 144cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0]! /* load filter */ 145cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r3]! /* load src */ 146cabdff1aSopenharmony_ci.endm 147cabdff1aSopenharmony_ci.macro LOAD2 148cabdff1aSopenharmony_ci vld1.32 {d0}, [r0]! /* load filter */ 149cabdff1aSopenharmony_ci vld1.32 {d4}, [r3]! /* load src */ 150cabdff1aSopenharmony_ci.endm 151cabdff1aSopenharmony_ci.macro LOAD4 152cabdff1aSopenharmony_ci vld1.32 {d0,d1}, [r0]! /* load filter */ 153cabdff1aSopenharmony_ci vld1.32 {d4,d5}, [r3]! /* load src */ 154cabdff1aSopenharmony_ci.endm 155cabdff1aSopenharmony_ci.macro MLA1 156cabdff1aSopenharmony_ci vmlal.s32 q8, d0, d4[0] 157cabdff1aSopenharmony_ci.endm 158cabdff1aSopenharmony_ci.macro MLA2 159cabdff1aSopenharmony_ci vmlal.s32 q8, d0, d4 160cabdff1aSopenharmony_ci.endm 161cabdff1aSopenharmony_ci.macro MLA4 162cabdff1aSopenharmony_ci vmlal.s32 q8, d0, d4 163cabdff1aSopenharmony_ci vmlal.s32 q9, d1, d5 164cabdff1aSopenharmony_ci.endm 165cabdff1aSopenharmony_ci.macro MUL4 166cabdff1aSopenharmony_ci vmull.s32 q8, d0, d4 167cabdff1aSopenharmony_ci vmull.s32 q9, d1, d5 168cabdff1aSopenharmony_ci.endm 169cabdff1aSopenharmony_ci.macro INIT4 170cabdff1aSopenharmony_ci veor.s64 q8, q8 171cabdff1aSopenharmony_ci veor.s64 q9, q9 172cabdff1aSopenharmony_ci.endm 173cabdff1aSopenharmony_ci.macro STORE 174cabdff1aSopenharmony_ci vadd.s64 q8, q8, q9 175cabdff1aSopenharmony_ci vadd.s64 d16, d16, d17 176cabdff1aSopenharmony_ci vqrshrn.s64 d16, q8, #30 177cabdff1aSopenharmony_ci vst1.32 d16[0], [r1] 178cabdff1aSopenharmony_ci.endm 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ciresample_one s32, 2 181cabdff1aSopenharmony_ci 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci/* s16 */ 184cabdff1aSopenharmony_ci.macro LOAD1 185cabdff1aSopenharmony_ci veor.16 d0, d0 186cabdff1aSopenharmony_ci vld1.16 {d0[0]}, [r0]! /* load filter */ 187cabdff1aSopenharmony_ci vld1.16 {d4[0]}, [r3]! /* load src */ 188cabdff1aSopenharmony_ci.endm 189cabdff1aSopenharmony_ci.macro LOAD2 190cabdff1aSopenharmony_ci veor.16 d0, d0 191cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0]! /* load filter */ 192cabdff1aSopenharmony_ci veor.16 d4, d4 193cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r3]! /* load src */ 194cabdff1aSopenharmony_ci.endm 195cabdff1aSopenharmony_ci.macro LOAD4 196cabdff1aSopenharmony_ci vld1.16 {d0}, [r0]! /* load filter */ 197cabdff1aSopenharmony_ci vld1.16 {d4}, [r3]! /* load src */ 198cabdff1aSopenharmony_ci.endm 199cabdff1aSopenharmony_ci.macro MLA1 200cabdff1aSopenharmony_ci vmlal.s16 q8, d0, d4[0] 201cabdff1aSopenharmony_ci.endm 202cabdff1aSopenharmony_ci.macro MLA2 203cabdff1aSopenharmony_ci vmlal.s16 q8, d0, d4 204cabdff1aSopenharmony_ci.endm 205cabdff1aSopenharmony_ci.macro MLA4 206cabdff1aSopenharmony_ci vmlal.s16 q8, d0, d4 207cabdff1aSopenharmony_ci.endm 208cabdff1aSopenharmony_ci.macro MUL4 209cabdff1aSopenharmony_ci vmull.s16 q8, d0, d4 210cabdff1aSopenharmony_ci.endm 211cabdff1aSopenharmony_ci.macro INIT4 212cabdff1aSopenharmony_ci veor.s32 q8, q8 213cabdff1aSopenharmony_ci.endm 214cabdff1aSopenharmony_ci.macro STORE 215cabdff1aSopenharmony_ci vpadd.s32 d16, d16, d17 216cabdff1aSopenharmony_ci vpadd.s32 d16, d16, d16 217cabdff1aSopenharmony_ci vqrshrn.s32 d16, q8, #15 218cabdff1aSopenharmony_ci vst1.16 d16[0], [r1] 219cabdff1aSopenharmony_ci.endm 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_ciresample_one s16, 1 222cabdff1aSopenharmony_ci 223cabdff1aSopenharmony_ci 224cabdff1aSopenharmony_ci.macro resample_linear fmt, es=2 225cabdff1aSopenharmony_cifunction ff_resample_linear_\fmt\()_neon, export=1 226cabdff1aSopenharmony_ci push {r4, r5} 227cabdff1aSopenharmony_ci add r1, r1, r2, lsl #\es 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */ 230cabdff1aSopenharmony_ci ldr ip, [sp, #8] /* index */ 231cabdff1aSopenharmony_ci ldr r5, [r0, #FILTER_LENGTH] 232cabdff1aSopenharmony_ci and r2, ip, r2 /* (index & phase_mask) */ 233cabdff1aSopenharmony_ci ldr r4, [r0, #PHASE_SHIFT] 234cabdff1aSopenharmony_ci lsr r4, ip, r4 /* compute sample_index */ 235cabdff1aSopenharmony_ci mul r2, r2, r5 236cabdff1aSopenharmony_ci 237cabdff1aSopenharmony_ci ldr ip, [r0, #FILTER_BANK] 238cabdff1aSopenharmony_ci add r3, r3, r4, lsl #\es /* &src[sample_index] */ 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci cmp r5, #8 241cabdff1aSopenharmony_ci ldr r4, [r0, #SRC_INCR] 242cabdff1aSopenharmony_ci add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */ 243cabdff1aSopenharmony_ci add r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */ 244cabdff1aSopenharmony_ci 245cabdff1aSopenharmony_ci blt 5f 246cabdff1aSopenharmony_ci8: 247cabdff1aSopenharmony_ci subs r5, r5, #8 248cabdff1aSopenharmony_ci LOAD4 249cabdff1aSopenharmony_ci MUL4 250cabdff1aSopenharmony_ci7: 251cabdff1aSopenharmony_ci LOAD4 252cabdff1aSopenharmony_ci beq 6f 253cabdff1aSopenharmony_ci cmp r5, #8 254cabdff1aSopenharmony_ci MLA4 255cabdff1aSopenharmony_ci blt 4f 256cabdff1aSopenharmony_ci subs r5, r5, #8 257cabdff1aSopenharmony_ci LOAD4 258cabdff1aSopenharmony_ci MLA4 259cabdff1aSopenharmony_ci b 7b 260cabdff1aSopenharmony_ci6: 261cabdff1aSopenharmony_ci MLA4 262cabdff1aSopenharmony_ci STORE 263cabdff1aSopenharmony_ci pop {r4, r5} 264cabdff1aSopenharmony_ci bx lr 265cabdff1aSopenharmony_ci5: 266cabdff1aSopenharmony_ci INIT4 267cabdff1aSopenharmony_ci4: /* remaining filter_length 1 to 7 */ 268cabdff1aSopenharmony_ci cmp r5, #4 269cabdff1aSopenharmony_ci blt 2f 270cabdff1aSopenharmony_ci subs r5, r5, #4 271cabdff1aSopenharmony_ci LOAD4 272cabdff1aSopenharmony_ci MLA4 273cabdff1aSopenharmony_ci beq 0f 274cabdff1aSopenharmony_ci2: /* remaining filter_length 1 to 3 */ 275cabdff1aSopenharmony_ci cmp r5, #2 276cabdff1aSopenharmony_ci blt 1f 277cabdff1aSopenharmony_ci subs r5, r5, #2 278cabdff1aSopenharmony_ci LOAD2 279cabdff1aSopenharmony_ci MLA2 280cabdff1aSopenharmony_ci beq 0f 281cabdff1aSopenharmony_ci1: /* remaining filter_length 1 */ 282cabdff1aSopenharmony_ci LOAD1 283cabdff1aSopenharmony_ci MLA1 284cabdff1aSopenharmony_ci0: 285cabdff1aSopenharmony_ci STORE 286cabdff1aSopenharmony_ci pop {r4, r5} 287cabdff1aSopenharmony_ci bx lr 288cabdff1aSopenharmony_ciendfunc 289cabdff1aSopenharmony_ci 290cabdff1aSopenharmony_ci.purgem LOAD1 291cabdff1aSopenharmony_ci.purgem LOAD2 292cabdff1aSopenharmony_ci.purgem LOAD4 293cabdff1aSopenharmony_ci.purgem MLA1 294cabdff1aSopenharmony_ci.purgem MLA2 295cabdff1aSopenharmony_ci.purgem MLA4 296cabdff1aSopenharmony_ci.purgem MUL4 297cabdff1aSopenharmony_ci.purgem INIT4 298cabdff1aSopenharmony_ci.purgem STORE 299cabdff1aSopenharmony_ci.endm 300cabdff1aSopenharmony_ci 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci/* float32 linear */ 303cabdff1aSopenharmony_ci.macro LOAD1 304cabdff1aSopenharmony_ci veor.32 d0, d0 305cabdff1aSopenharmony_ci veor.32 d2, d2 306cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0]! /* load filter */ 307cabdff1aSopenharmony_ci vld1.32 {d2[0]}, [r2]! /* load filter */ 308cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r3]! /* load src */ 309cabdff1aSopenharmony_ci.endm 310cabdff1aSopenharmony_ci.macro LOAD2 311cabdff1aSopenharmony_ci vld1.32 {d0}, [r0]! /* load filter */ 312cabdff1aSopenharmony_ci vld1.32 {d2}, [r2]! /* load filter */ 313cabdff1aSopenharmony_ci vld1.32 {d4}, [r3]! /* load src */ 314cabdff1aSopenharmony_ci.endm 315cabdff1aSopenharmony_ci.macro LOAD4 316cabdff1aSopenharmony_ci vld1.32 {d0,d1}, [r0]! /* load filter */ 317cabdff1aSopenharmony_ci vld1.32 {d2,d3}, [r2]! /* load filter */ 318cabdff1aSopenharmony_ci vld1.32 {d4,d5}, [r3]! /* load src */ 319cabdff1aSopenharmony_ci.endm 320cabdff1aSopenharmony_ci.macro MLA1 321cabdff1aSopenharmony_ci vmla.f32 d18, d0, d4[0] 322cabdff1aSopenharmony_ci vmla.f32 d16, d2, d4[0] 323cabdff1aSopenharmony_ci.endm 324cabdff1aSopenharmony_ci.macro MLA2 325cabdff1aSopenharmony_ci vmla.f32 d18, d0, d4 326cabdff1aSopenharmony_ci vmla.f32 d16, d2, d4 327cabdff1aSopenharmony_ci.endm 328cabdff1aSopenharmony_ci.macro MLA4 329cabdff1aSopenharmony_ci vmla.f32 q9, q0, q2 330cabdff1aSopenharmony_ci vmla.f32 q8, q1, q2 331cabdff1aSopenharmony_ci.endm 332cabdff1aSopenharmony_ci.macro MUL4 333cabdff1aSopenharmony_ci vmul.f32 q9, q0, q2 334cabdff1aSopenharmony_ci vmul.f32 q8, q1, q2 335cabdff1aSopenharmony_ci.endm 336cabdff1aSopenharmony_ci.macro INIT4 337cabdff1aSopenharmony_ci veor.f32 q9, q9 338cabdff1aSopenharmony_ci veor.f32 q8, q8 339cabdff1aSopenharmony_ci.endm 340cabdff1aSopenharmony_ci.macro STORE 341cabdff1aSopenharmony_ci vldr s0, [sp, #12] /* frac */ 342cabdff1aSopenharmony_ci vmov s1, r4 343cabdff1aSopenharmony_ci vcvt.f32.s32 d0, d0 344cabdff1aSopenharmony_ci 345cabdff1aSopenharmony_ci vsub.f32 q8, q8, q9 /* v2 - val */ 346cabdff1aSopenharmony_ci vpadd.f32 d18, d18, d19 347cabdff1aSopenharmony_ci vpadd.f32 d16, d16, d17 348cabdff1aSopenharmony_ci vpadd.f32 d2, d18, d18 349cabdff1aSopenharmony_ci vpadd.f32 d1, d16, d16 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_ci vmul.f32 s2, s2, s0 /* (v2 - val) * frac */ 352cabdff1aSopenharmony_ci vdiv.f32 s2, s2, s1 /* / c->src_incr */ 353cabdff1aSopenharmony_ci vadd.f32 s4, s4, s2 354cabdff1aSopenharmony_ci 355cabdff1aSopenharmony_ci vstr s4, [r1] 356cabdff1aSopenharmony_ci.endm 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ciresample_linear flt, 2 359